Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/

1# helpers.py

2import html.entities

3import re

4import typing

6from . import __diag__

7from .core import *

8from .util import _bslash, _flatten, _escape_regex_range_chars

11#

12# global helpers

13#

14def delimited_list(

15 expr: Union[str, ParserElement],

16 delim: Union[str, ParserElement] = ",",

17 combine: bool = False,

18 min: typing.Optional[int] = None,

19 max: typing.Optional[int] = None,

20 *,

21 allow_trailing_delim: bool = False,

22) -> ParserElement:

23 """Helper to define a delimited list of expressions - the delimiter

24 defaults to ','. By default, the list elements and delimiters can

25 have intervening whitespace, and comments, but this can be

26 overridden by passing ``combine=True`` in the constructor. If

27 ``combine`` is set to ``True``, the matching tokens are

28 returned as a single token string, with the delimiters included;

29 otherwise, the matching tokens are returned as a list of tokens,

30 with the delimiters suppressed.

32 If ``allow_trailing_delim`` is set to True, then the list may end with

33 a delimiter.

35 Example::

37 delimited_list(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']

38 delimited_list(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

39 """

40 if isinstance(expr, str_type):

41 expr = ParserElement._literalStringClass(expr)

43 dlName = "{expr} [{delim} {expr}]...{end}".format(

44 expr=str(expr.copy().streamline()),

45 delim=str(delim),

46 end=" [{}]".format(str(delim)) if allow_trailing_delim else "",

47 )

49 if not combine:

50 delim = Suppress(delim)

52 if min is not None:

53 if min < 1:

54 raise ValueError("min must be greater than 0")

55 min -= 1

56 if max is not None:

57 if min is not None and max <= min:

58 raise ValueError("max must be greater than, or equal to min")

59 max -= 1

60 delimited_list_expr = expr + (delim + expr)[min, max]

62 if allow_trailing_delim:

63 delimited_list_expr += Opt(delim)

65 if combine:

66 return Combine(delimited_list_expr).set_name(dlName)

67 else:

68 return delimited_list_expr.set_name(dlName)

71def counted_array(

72 expr: ParserElement,

73 int_expr: typing.Optional[ParserElement] = None,

74 *,

75 intExpr: typing.Optional[ParserElement] = None,

76) -> ParserElement:

77 """Helper to define a counted list of expressions.

79 This helper defines a pattern of the form::

81 integer expr expr expr...

83 where the leading integer tells how many expr expressions follow.

84 The matched tokens returns the array of expr tokens as a list - the

85 leading count token is suppressed.

87 If ``int_expr`` is specified, it should be a pyparsing expression

88 that produces an integer value.

90 Example::

92 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']

94 # in this parser, the leading integer value is given in binary,

95 # '10' indicating that 2 values are in the array

96 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))

97 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']

99 # if other fields must be parsed after the count but before the

100 # list items, give the fields results names and they will

101 # be preserved in the returned ParseResults:

102 count_with_metadata = integer + Word(alphas)("type")

103 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")

104 result = typed_array.parse_string("3 bool True True False")

105 print(result.dump())

106

107 # prints

108 # ['True', 'True', 'False']

109 # - items: ['True', 'True', 'False']

110 # - type: 'bool'

111 """

112 intExpr = intExpr or int_expr

113 array_expr = Forward()

114

115 def count_field_parse_action(s, l, t):

116 nonlocal array_expr

117 n = t[0]

118 array_expr <<= (expr * n) if n else Empty()

119 # clear list contents, but keep any named results

120 del t[:]

121

122 if intExpr is None:

123 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))

124 else:

125 intExpr = intExpr.copy()

126 intExpr.set_name("arrayLen")

127 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)

128 return (intExpr + array_expr).set_name("(len) " + str(expr) + "...")

129

130

131def match_previous_literal(expr: ParserElement) -> ParserElement:

132 """Helper to define an expression that is indirectly defined from

133 the tokens matched in a previous expression, that is, it looks for

134 a 'repeat' of a previous expression. For example::

135

136 first = Word(nums)

137 second = match_previous_literal(first)

138 match_expr = first + ":" + second

139

140 will match ``"1:1"``, but not ``"1:2"``. Because this

141 matches a previous literal, will also match the leading

142 ``"1:1"`` in ``"1:10"``. If this is not desired, use

143 :class:`match_previous_expr`. Do *not* use with packrat parsing

144 enabled.

145 """

146 rep = Forward()

147

148 def copy_token_to_repeater(s, l, t):

149 if t:

150 if len(t) == 1:

151 rep << t[0]

152 else:

153 # flatten t tokens

154 tflat = _flatten(t.as_list())

155 rep << And(Literal(tt) for tt in tflat)

156 else:

157 rep << Empty()

158

159 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)

160 rep.set_name("(prev) " + str(expr))

161 return rep

162

163

164def match_previous_expr(expr: ParserElement) -> ParserElement:

165 """Helper to define an expression that is indirectly defined from

166 the tokens matched in a previous expression, that is, it looks for

167 a 'repeat' of a previous expression. For example::

168

169 first = Word(nums)

170 second = match_previous_expr(first)

171 match_expr = first + ":" + second

172

173 will match ``"1:1"``, but not ``"1:2"``. Because this

174 matches by expressions, will *not* match the leading ``"1:1"``

175 in ``"1:10"``; the expressions are evaluated first, and then

176 compared, so ``"1"`` is compared with ``"10"``. Do *not* use

177 with packrat parsing enabled.

178 """

179 rep = Forward()

180 e2 = expr.copy()

181 rep <<= e2

182

183 def copy_token_to_repeater(s, l, t):

184 matchTokens = _flatten(t.as_list())

185

186 def must_match_these_tokens(s, l, t):

187 theseTokens = _flatten(t.as_list())

188 if theseTokens != matchTokens:

189 raise ParseException(

190 s, l, "Expected {}, found{}".format(matchTokens, theseTokens)

191 )

192

193 rep.set_parse_action(must_match_these_tokens, callDuringTry=True)

194

195 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)

196 rep.set_name("(prev) " + str(expr))

197 return rep

198

199

200def one_of(

201 strs: Union[typing.Iterable[str], str],

202 caseless: bool = False,

203 use_regex: bool = True,

204 as_keyword: bool = False,

205 *,

206 useRegex: bool = True,

207 asKeyword: bool = False,

208) -> ParserElement:

209 """Helper to quickly define a set of alternative :class:`Literal` s,

210 and makes sure to do longest-first testing when there is a conflict,

211 regardless of the input order, but returns

212 a :class:`MatchFirst` for best performance.

213

214 Parameters:

215

216 - ``strs`` - a string of space-delimited literals, or a collection of

217 string literals

218 - ``caseless`` - treat all literals as caseless - (default= ``False``)

219 - ``use_regex`` - as an optimization, will

220 generate a :class:`Regex` object; otherwise, will generate

221 a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if

222 creating a :class:`Regex` raises an exception) - (default= ``True``)

223 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the

224 generated expressions - (default= ``False``)

225 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,

226 but will be removed in a future release

227

228 Example::

229

230 comp_oper = one_of("< = > <= >= !=")

231 var = Word(alphas)

232 number = Word(nums)

233 term = var | number

234 comparison_expr = term + comp_oper + term

235 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))

236

237 prints::

238

239 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]

240 """

241 asKeyword = asKeyword or as_keyword

242 useRegex = useRegex and use_regex

243

244 if (

245 isinstance(caseless, str_type)

246 and __diag__.warn_on_multiple_string_args_to_oneof

247 ):

248 warnings.warn(

249 "More than one string argument passed to one_of, pass"

250 " choices as a list or space-delimited string",

251 stacklevel=2,

252 )

253

254 if caseless:

255 isequal = lambda a, b: a.upper() == b.upper()

256 masks = lambda a, b: b.upper().startswith(a.upper())

257 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral

258 else:

259 isequal = lambda a, b: a == b

260 masks = lambda a, b: b.startswith(a)

261 parseElementClass = Keyword if asKeyword else Literal

262

263 symbols: List[str] = []

264 if isinstance(strs, str_type):

265 symbols = strs.split()

266 elif isinstance(strs, Iterable):

267 symbols = list(strs)

268 else:

269 raise TypeError("Invalid argument to one_of, expected string or iterable")

270 if not symbols:

271 return NoMatch()

272

273 # reorder given symbols to take care to avoid masking longer choices with shorter ones

274 # (but only if the given symbols are not just single characters)

275 if any(len(sym) > 1 for sym in symbols):

276 i = 0

277 while i < len(symbols) - 1:

278 cur = symbols[i]

279 for j, other in enumerate(symbols[i + 1 :]):

280 if isequal(other, cur):

281 del symbols[i + j + 1]

282 break

283 elif masks(cur, other):

284 del symbols[i + j + 1]

285 symbols.insert(i, other)

286 break

287 else:

288 i += 1

289

290 if useRegex:

291 re_flags: int = re.IGNORECASE if caseless else 0

292

293 try:

294 if all(len(sym) == 1 for sym in symbols):

295 # symbols are just single characters, create range regex pattern

296 patt = "[{}]".format(

297 "".join(_escape_regex_range_chars(sym) for sym in symbols)

298 )

299 else:

300 patt = "|".join(re.escape(sym) for sym in symbols)

301

302 # wrap with \b word break markers if defining as keywords

303 if asKeyword:

304 patt = r"\b(?:{})\b".format(patt)

305

306 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))

307

308 if caseless:

309 # add parse action to return symbols as specified, not in random

310 # casing as found in input string

311 symbol_map = {sym.lower(): sym for sym in symbols}

312 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])

313

314 return ret

315

316 except re.error:

317 warnings.warn(

318 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2

319 )

320

321 # last resort, just use MatchFirst

322 return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(

323 " | ".join(symbols)

324 )

325

326

327def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:

328 """Helper to easily and clearly define a dictionary by specifying

329 the respective patterns for the key and value. Takes care of

330 defining the :class:`Dict`, :class:`ZeroOrMore`, and

331 :class:`Group` tokens in the proper order. The key pattern

332 can include delimiting markers or punctuation, as long as they are

333 suppressed, thereby leaving the significant key text. The value

334 pattern can include named results, so that the :class:`Dict` results

335 can include named token fields.

336

337 Example::

338

339 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

340 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

341 print(attr_expr[1, ...].parse_string(text).dump())

342

343 attr_label = label

344 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)

345

346 # similar to Dict, but simpler call format

347 result = dict_of(attr_label, attr_value).parse_string(text)

348 print(result.dump())

349 print(result['shape'])

350 print(result.shape) # object attribute access works too

351 print(result.as_dict())

352

353 prints::

354

355 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

356 - color: 'light blue'

357 - posn: 'upper left'

358 - shape: 'SQUARE'

359 - texture: 'burlap'

360 SQUARE

361 SQUARE

362 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}

363 """

364 return Dict(OneOrMore(Group(key + value)))

365

366

367def original_text_for(

368 expr: ParserElement, as_string: bool = True, *, asString: bool = True

369) -> ParserElement:

370 """Helper to return the original, untokenized text for a given

371 expression. Useful to restore the parsed fields of an HTML start

372 tag into the raw tag text itself, or to revert separate tokens with

373 intervening whitespace back to the original matching input text. By

374 default, returns astring containing the original parsed text.

375

376 If the optional ``as_string`` argument is passed as

377 ``False``, then the return value is

378 a :class:`ParseResults` containing any results names that

379 were originally matched, and a single token containing the original

380 matched text from the input string. So if the expression passed to

381 :class:`original_text_for` contains expressions with defined

382 results names, you must set ``as_string`` to ``False`` if you

383 want to preserve those results name values.

384

385 The ``asString`` pre-PEP8 argument is retained for compatibility,

386 but will be removed in a future release.

387

388 Example::

389

390 src = "this is test bold text normal text "

391 for tag in ("b", "i"):

392 opener, closer = make_html_tags(tag)

393 patt = original_text_for(opener + SkipTo(closer) + closer)

394 print(patt.search_string(src)[0])

395

396 prints::

397

398 [' bold text ']

399 ['text']

400 """

401 asString = asString and as_string

402

403 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)

404 endlocMarker = locMarker.copy()

405 endlocMarker.callPreparse = False

406 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")

407 if asString:

408 extractText = lambda s, l, t: s[t._original_start : t._original_end]

409 else:

410

411 def extractText(s, l, t):

412 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]

413

414 matchExpr.set_parse_action(extractText)

415 matchExpr.ignoreExprs = expr.ignoreExprs

416 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)

417 return matchExpr

418

419

420def ungroup(expr: ParserElement) -> ParserElement:

421 """Helper to undo pyparsing's default grouping of And expressions,

422 even if all but one are non-empty.

423 """

424 return TokenConverter(expr).add_parse_action(lambda t: t[0])

425

426

427def locatedExpr(expr: ParserElement) -> ParserElement:

428 """

429 (DEPRECATED - future code should use the Located class)

430 Helper to decorate a returned token with its starting and ending

431 locations in the input string.

432

433 This helper adds the following results names:

434

435 - ``locn_start`` - location where matched expression begins

436 - ``locn_end`` - location where matched expression ends

437 - ``value`` - the actual parsed results

438

439 Be careful if the input text contains ``<TAB>`` characters, you

440 may want to call :class:`ParserElement.parseWithTabs`

441

442 Example::

443

444 wd = Word(alphas)

445 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):

446 print(match)

447

448 prints::

449

450 [[0, 'ljsdf', 5]]

451 [[8, 'lksdjjf', 15]]

452 [[18, 'lkkjj', 23]]

453 """

454 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)

455 return Group(

456 locator("locn_start")

457 + expr("value")

458 + locator.copy().leaveWhitespace()("locn_end")

459 )

460

461

462def nested_expr(

463 opener: Union[str, ParserElement] = "(",

464 closer: Union[str, ParserElement] = ")",

465 content: typing.Optional[ParserElement] = None,

466 ignore_expr: ParserElement = quoted_string(),

467 *,

468 ignoreExpr: ParserElement = quoted_string(),

469) -> ParserElement:

470 """Helper method for defining nested lists enclosed in opening and

471 closing delimiters (``"("`` and ``")"`` are the default).

472

473 Parameters:

474 - ``opener`` - opening character for a nested list

475 (default= ``"("``); can also be a pyparsing expression

476 - ``closer`` - closing character for a nested list

477 (default= ``")"``); can also be a pyparsing expression

478 - ``content`` - expression for items within the nested lists

479 (default= ``None``)

480 - ``ignore_expr`` - expression for ignoring opening and closing delimiters

481 (default= :class:`quoted_string`)

482 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility

483 but will be removed in a future release

484

485 If an expression is not provided for the content argument, the

486 nested expression will capture all whitespace-delimited content

487 between delimiters as a list of separate values.

488

489 Use the ``ignore_expr`` argument to define expressions that may

490 contain opening or closing characters that should not be treated as

491 opening or closing characters for nesting, such as quoted_string or

492 a comment expression. Specify multiple expressions using an

493 :class:`Or` or :class:`MatchFirst`. The default is

494 :class:`quoted_string`, but if no expressions are to be ignored, then

495 pass ``None`` for this argument.

496

497 Example::

498

499 data_type = one_of("void int short long char float double")

500 decl_data_type = Combine(data_type + Opt(Word('*')))

501 ident = Word(alphas+'_', alphanums+'_')

502 number = pyparsing_common.number

503 arg = Group(decl_data_type + ident)

504 LPAR, RPAR = map(Suppress, "()")

505

506 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))

507

508 c_function = (decl_data_type("type")

509 + ident("name")

510 + LPAR + Opt(delimited_list(arg), [])("args") + RPAR

511 + code_body("body"))

512 c_function.ignore(c_style_comment)

513

514 source_code = '''

515 int is_odd(int x) {

516 return (x%2);

517 }

518

519 int dec_to_hex(char hchar) {

520 if (hchar >= '0' && hchar <= '9') {

521 return (ord(hchar)-ord('0'));

522 } else {

523 return (10+ord(hchar)-ord('A'));

524 }

525 }

526 '''

527 for func in c_function.search_string(source_code):

528 print("%(name)s (%(type)s) args: %(args)s" % func)

529

530

531 prints::

532

533 is_odd (int) args: [['int', 'x']]

534 dec_to_hex (int) args: [['char', 'hchar']]

535 """

536 if ignoreExpr != ignore_expr:

537 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr

538 if opener == closer:

539 raise ValueError("opening and closing strings cannot be the same")

540 if content is None:

541 if isinstance(opener, str_type) and isinstance(closer, str_type):

542 if len(opener) == 1 and len(closer) == 1:

543 if ignoreExpr is not None:

544 content = Combine(

545 OneOrMore(

546 ~ignoreExpr

547 + CharsNotIn(

548 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,

549 exact=1,

550 )

551 )

552 ).set_parse_action(lambda t: t[0].strip())

553 else:

554 content = empty.copy() + CharsNotIn(

555 opener + closer + ParserElement.DEFAULT_WHITE_CHARS

556 ).set_parse_action(lambda t: t[0].strip())

557 else:

558 if ignoreExpr is not None:

559 content = Combine(

560 OneOrMore(

561 ~ignoreExpr

562 + ~Literal(opener)

563 + ~Literal(closer)

564 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)

565 )

566 ).set_parse_action(lambda t: t[0].strip())

567 else:

568 content = Combine(

569 OneOrMore(

570 ~Literal(opener)

571 + ~Literal(closer)

572 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)

573 )

574 ).set_parse_action(lambda t: t[0].strip())

575 else:

576 raise ValueError(

577 "opening and closing arguments must be strings if no content expression is given"

578 )

579 ret = Forward()

580 if ignoreExpr is not None:

581 ret <<= Group(

582 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)

583 )

584 else:

585 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))

586 ret.set_name("nested %s%s expression" % (opener, closer))

587 return ret

588

589

590def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):

591 """Internal helper to construct opening and closing tag expressions, given a tag name"""

592 if isinstance(tagStr, str_type):

593 resname = tagStr

594 tagStr = Keyword(tagStr, caseless=not xml)

595 else:

596 resname = tagStr.name

597

598 tagAttrName = Word(alphas, alphanums + "_-:")

599 if xml:

600 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)

601 openTag = (

602 suppress_LT

603 + tagStr("tag")

604 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))

605 + Opt("/", default=[False])("empty").set_parse_action(

606 lambda s, l, t: t[0] == "/"

607 )

608 + suppress_GT

609 )

610 else:

611 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(

612 printables, exclude_chars=">"

613 )

614 openTag = (

615 suppress_LT

616 + tagStr("tag")

617 + Dict(

618 ZeroOrMore(

619 Group(

620 tagAttrName.set_parse_action(lambda t: t[0].lower())

621 + Opt(Suppress("=") + tagAttrValue)

622 )

623 )

624 )

625 + Opt("/", default=[False])("empty").set_parse_action(

626 lambda s, l, t: t[0] == "/"

627 )

628 + suppress_GT

629 )

630 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)

631

632 openTag.set_name("<%s>" % resname)

633 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels

634 openTag.add_parse_action(

635 lambda t: t.__setitem__(

636 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()

637 )

638 )

639 closeTag = closeTag(

640 "end" + "".join(resname.replace(":", " ").title().split())

641 ).set_name("</%s>" % resname)

642 openTag.tag = resname

643 closeTag.tag = resname

644 openTag.tag_body = SkipTo(closeTag())

645 return openTag, closeTag

646

647

648def make_html_tags(

649 tag_str: Union[str, ParserElement]

650) -> Tuple[ParserElement, ParserElement]:

651 """Helper to construct opening and closing tag expressions for HTML,

652 given a tag name. Matches tags in either upper or lower case,

653 attributes with namespaces and with quoted or unquoted values.

654

655 Example::

656

657 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'

658 # make_html_tags returns pyparsing expressions for the opening and

659 # closing tags as a 2-tuple

660 a, a_end = make_html_tags("A")

661 link_expr = a + SkipTo(a_end)("link_text") + a_end

662

663 for link in link_expr.search_string(text):

664 # attributes in the <A> tag (like "href" shown here) are

665 # also accessible as named results

666 print(link.link_text, '->', link.href)

667

668 prints::

669

670 pyparsing -> https://github.com/pyparsing/pyparsing/wiki

671 """

672 return _makeTags(tag_str, False)

673

674

675def make_xml_tags(

676 tag_str: Union[str, ParserElement]

677) -> Tuple[ParserElement, ParserElement]:

678 """Helper to construct opening and closing tag expressions for XML,

679 given a tag name. Matches tags only in the given upper/lower case.

680

681 Example: similar to :class:`make_html_tags`

682 """

683 return _makeTags(tag_str, True)

684

685

686any_open_tag: ParserElement

687any_close_tag: ParserElement

688any_open_tag, any_close_tag = make_html_tags(

689 Word(alphas, alphanums + "_:").set_name("any tag")

690)

691

692_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}

693common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(

694 "common HTML entity"

695)

696

697

698def replace_html_entity(t):

699 """Helper parser action to replace common HTML entities with their special characters"""

700 return _htmlEntityMap.get(t.entity)

701

702

703class OpAssoc(Enum):

704 LEFT = 1

705 RIGHT = 2

706

707

708InfixNotationOperatorArgType = Union[

709 ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]

710]

711InfixNotationOperatorSpec = Union[

712 Tuple[

713 InfixNotationOperatorArgType,

714 int,

715 OpAssoc,

716 typing.Optional[ParseAction],

717 ],

718 Tuple[

719 InfixNotationOperatorArgType,

720 int,

721 OpAssoc,

722 ],

723]

724

725

726def infix_notation(

727 base_expr: ParserElement,

728 op_list: List[InfixNotationOperatorSpec],

729 lpar: Union[str, ParserElement] = Suppress("("),

730 rpar: Union[str, ParserElement] = Suppress(")"),

731) -> ParserElement:

732 """Helper method for constructing grammars of expressions made up of

733 operators working in a precedence hierarchy. Operators may be unary

734 or binary, left- or right-associative. Parse actions can also be

735 attached to operator expressions. The generated parser will also

736 recognize the use of parentheses to override operator precedences

737 (see example below).

738

739 Note: if you define a deep operator list, you may see performance

740 issues when using infix_notation. See

741 :class:`ParserElement.enable_packrat` for a mechanism to potentially

742 improve your parser performance.

743

744 Parameters:

745 - ``base_expr`` - expression representing the most basic operand to

746 be used in the expression

747 - ``op_list`` - list of tuples, one for each operator precedence level

748 in the expression grammar; each tuple is of the form ``(op_expr,

749 num_operands, right_left_assoc, (optional)parse_action)``, where:

750

751 - ``op_expr`` is the pyparsing expression for the operator; may also

752 be a string, which will be converted to a Literal; if ``num_operands``

753 is 3, ``op_expr`` is a tuple of two expressions, for the two

754 operators separating the 3 terms

755 - ``num_operands`` is the number of terms for this operator (must be 1,

756 2, or 3)

757 - ``right_left_assoc`` is the indicator whether the operator is right

758 or left associative, using the pyparsing-defined constants

759 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.

760 - ``parse_action`` is the parse action to be associated with

761 expressions matching this operator expression (the parse action

762 tuple member may be omitted); if the parse action is passed

763 a tuple or list of functions, this is equivalent to calling

764 ``set_parse_action(*fn)``

765 (:class:`ParserElement.set_parse_action`)

766 - ``lpar`` - expression for matching left-parentheses; if passed as a

767 str, then will be parsed as Suppress(lpar). If lpar is passed as

768 an expression (such as ``Literal('(')``), then it will be kept in

769 the parsed results, and grouped with them. (default= ``Suppress('(')``)

770 - ``rpar`` - expression for matching right-parentheses; if passed as a

771 str, then will be parsed as Suppress(rpar). If rpar is passed as

772 an expression (such as ``Literal(')')``), then it will be kept in

773 the parsed results, and grouped with them. (default= ``Suppress(')')``)

774

775 Example::

776

777 # simple example of four-function arithmetic with ints and

778 # variable names

779 integer = pyparsing_common.signed_integer

780 varname = pyparsing_common.identifier

781

782 arith_expr = infix_notation(integer | varname,

783 [

784 ('-', 1, OpAssoc.RIGHT),

785 (one_of('* /'), 2, OpAssoc.LEFT),

786 (one_of('+ -'), 2, OpAssoc.LEFT),

787 ])

788

789 arith_expr.run_tests('''

790 5+3*6

791 (5+3)*6

792 -2--11

793 ''', full_dump=False)

794

795 prints::

796

797 5+3*6

798 [[5, '+', [3, '*', 6]]]

799

800 (5+3)*6

801 [[[5, '+', 3], '*', 6]]

802

803 -2--11

804 [[['-', 2], '-', ['-', 11]]]

805 """

806 # captive version of FollowedBy that does not do parse actions or capture results names

807 class _FB(FollowedBy):

808 def parseImpl(self, instring, loc, doActions=True):

809 self.expr.try_parse(instring, loc)

810 return loc, []

811

812 _FB.__name__ = "FollowedBy>"

813

814 ret = Forward()

815 if isinstance(lpar, str):

816 lpar = Suppress(lpar)

817 if isinstance(rpar, str):

818 rpar = Suppress(rpar)

819

820 # if lpar and rpar are not suppressed, wrap in group

821 if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)):

822 lastExpr = base_expr | Group(lpar + ret + rpar)

823 else:

824 lastExpr = base_expr | (lpar + ret + rpar)

825

826 for i, operDef in enumerate(op_list):

827 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4]

828 if isinstance(opExpr, str_type):

829 opExpr = ParserElement._literalStringClass(opExpr)

830 if arity == 3:

831 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:

832 raise ValueError(

833 "if numterms=3, opExpr must be a tuple or list of two expressions"

834 )

835 opExpr1, opExpr2 = opExpr

836 term_name = "{}{} term".format(opExpr1, opExpr2)

837 else:

838 term_name = "{} term".format(opExpr)

839

840 if not 1 <= arity <= 3:

841 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

842

843 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):

844 raise ValueError("operator must indicate right or left associativity")

845

846 thisExpr: Forward = Forward().set_name(term_name)

847 if rightLeftAssoc is OpAssoc.LEFT:

848 if arity == 1:

849 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...])

850 elif arity == 2:

851 if opExpr is not None:

852 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(

853 lastExpr + (opExpr + lastExpr)[1, ...]

854 )

855 else:

856 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...])

857 elif arity == 3:

858 matchExpr = _FB(

859 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr

860 ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))

861 elif rightLeftAssoc is OpAssoc.RIGHT:

862 if arity == 1:

863 # try to avoid LR with this extra test

864 if not isinstance(opExpr, Opt):

865 opExpr = Opt(opExpr)

866 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)

867 elif arity == 2:

868 if opExpr is not None:

869 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(

870 lastExpr + (opExpr + thisExpr)[1, ...]

871 )

872 else:

873 matchExpr = _FB(lastExpr + thisExpr) + Group(

874 lastExpr + thisExpr[1, ...]

875 )

876 elif arity == 3:

877 matchExpr = _FB(

878 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr

879 ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)

880 if pa:

881 if isinstance(pa, (tuple, list)):

882 matchExpr.set_parse_action(*pa)

883 else:

884 matchExpr.set_parse_action(pa)

885 thisExpr <<= (matchExpr | lastExpr).setName(term_name)

886 lastExpr = thisExpr

887 ret <<= lastExpr

888 return ret

889

890

891def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):

892 """

893 (DEPRECATED - use IndentedBlock class instead)

894 Helper method for defining space-delimited indentation blocks,

895 such as those used to define block statements in Python source code.

896

897 Parameters:

898

899 - ``blockStatementExpr`` - expression defining syntax of statement that

900 is repeated within the indented block

901 - ``indentStack`` - list created by caller to manage indentation stack

902 (multiple ``statementWithIndentedBlock`` expressions within a single

903 grammar should share a common ``indentStack``)

904 - ``indent`` - boolean indicating whether block must be indented beyond

905 the current level; set to ``False`` for block of left-most statements

906 (default= ``True``)

907

908 A valid block must contain at least one ``blockStatement``.

909

910 (Note that indentedBlock uses internal parse actions which make it

911 incompatible with packrat parsing.)

912

913 Example::

914

915 data = '''

916 def A(z):

917 A1

918 B = 100

919 G = A2

920 A2

921 A3

922 B

923 def BB(a,b,c):

924 BB1

925 def BBA():

926 bba1

927 bba2

928 bba3

929 C

930 D

931 def spam(x,y):

932 def eggs(z):

933 pass

934 '''

935

936

937 indentStack = [1]

938 stmt = Forward()

939

940 identifier = Word(alphas, alphanums)

941 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")

942 func_body = indentedBlock(stmt, indentStack)

943 funcDef = Group(funcDecl + func_body)

944

945 rvalue = Forward()

946 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")

947 rvalue << (funcCall | identifier | Word(nums))

948 assignment = Group(identifier + "=" + rvalue)

949 stmt << (funcDef | assignment | identifier)

950

951 module_body = stmt[1, ...]

952

953 parseTree = module_body.parseString(data)

954 parseTree.pprint()

955

956 prints::

957

958 [['def',

959 'A',

960 ['(', 'z', ')'],

961 ':',

962 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],

963 'B',

964 ['def',

965 'BB',

966 ['(', 'a', 'b', 'c', ')'],

967 ':',

968 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],

969 'C',

970 'D',

971 ['def',

972 'spam',

973 ['(', 'x', 'y', ')'],

974 ':',

975 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]

976 """

977 backup_stacks.append(indentStack[:])

978

979 def reset_stack():

980 indentStack[:] = backup_stacks[-1]

981

982 def checkPeerIndent(s, l, t):

983 if l >= len(s):

984 return

985 curCol = col(l, s)

986 if curCol != indentStack[-1]:

987 if curCol > indentStack[-1]:

988 raise ParseException(s, l, "illegal nesting")

989 raise ParseException(s, l, "not a peer entry")

990

991 def checkSubIndent(s, l, t):

992 curCol = col(l, s)

993 if curCol > indentStack[-1]:

994 indentStack.append(curCol)

995 else:

996 raise ParseException(s, l, "not a subentry")

997

998 def checkUnindent(s, l, t):

999 if l >= len(s):

1000 return

1001 curCol = col(l, s)

1002 if not (indentStack and curCol in indentStack):

1003 raise ParseException(s, l, "not an unindent")

1004 if curCol < indentStack[-1]:

1005 indentStack.pop()

1006

1007 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())

1008 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")

1009 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")

1010 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")

1011 if indent:

1012 smExpr = Group(

1013 Opt(NL)

1014 + INDENT

1015 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))

1016 + UNDENT

1017 )

1018 else:

1019 smExpr = Group(

1020 Opt(NL)

1021 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))

1022 + Opt(UNDENT)

1023 )

1024

1025 # add a parse action to remove backup_stack from list of backups

1026 smExpr.add_parse_action(

1027 lambda: backup_stacks.pop(-1) and None if backup_stacks else None

1028 )

1029 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())

1030 blockStatementExpr.ignore(_bslash + LineEnd())

1031 return smExpr.set_name("indented block")

1032

1033

1034# it's easy to get these comment structures wrong - they're very common, so may as well make them available

1035c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(

1036 "C style comment"

1037)

1038"Comment of the form ``/* ... */``"

1039

1040html_comment = Regex(r"").set_name("HTML comment")

1041"Comment of the form ````"

1042

1043rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")

1044dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")

1045"Comment of the form ``// ... (to end of line)``"

1046

1047cpp_style_comment = Combine(

1048 Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment

1049).set_name("C++ style comment")

1050"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"

1051

1052java_style_comment = cpp_style_comment

1053"Same as :class:`cpp_style_comment`"

1054

1055python_style_comment = Regex(r"#.*").set_name("Python style comment")

1056"Comment of the form ``# ... (to end of line)``"

1057

1058

1059# build list of built-in expressions, for future reference if a global default value

1060# gets updated

1061_builtin_exprs: List[ParserElement] = [

1062 v for v in vars().values() if isinstance(v, ParserElement)

1063]

1064

1065

1066# pre-PEP8 compatible names

1067delimitedList = delimited_list

1068countedArray = counted_array

1069matchPreviousLiteral = match_previous_literal

1070matchPreviousExpr = match_previous_expr

1071oneOf = one_of

1072dictOf = dict_of

1073originalTextFor = original_text_for

1074nestedExpr = nested_expr

1075makeHTMLTags = make_html_tags

1076makeXMLTags = make_xml_tags

1077anyOpenTag, anyCloseTag = any_open_tag, any_close_tag

1078commonHTMLEntity = common_html_entity

1079replaceHTMLEntity = replace_html_entity

1080opAssoc = OpAssoc

1081infixNotation = infix_notation

1082cStyleComment = c_style_comment

1083htmlComment = html_comment

1084restOfLine = rest_of_line

1085dblSlashComment = dbl_slash_comment

1086cppStyleComment = cpp_style_comment

1087javaStyleComment = java_style_comment

1088pythonStyleComment = python_style_comment

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/_vendor/pyparsing/helpers.py: 31%

314 statements