Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hyperlink/

1# -*- coding: utf-8 -*-

2u"""Hyperlink provides Pythonic URL parsing, construction, and rendering.

4Usage is straightforward::

6 >>> import hyperlink

7 >>> url = hyperlink.parse(u'http://github.com/mahmoud/hyperlink?utm_source=docs')

8 >>> url.host

9 u'github.com'

10 >>> secure_url = url.replace(scheme=u'https')

11 >>> secure_url.get('utm_source')[0]

12 u'docs'

14Hyperlink's API centers on the :class:`DecodedURL` type, which wraps

15the lower-level :class:`URL`, both of which can be returned by the

16:func:`parse()` convenience function.

18""" # noqa: E501

20import re

21import sys

22import string

23import socket

24from socket import AF_INET, AF_INET6

26try:

27 from socket import AddressFamily

28except ImportError:

29 AddressFamily = int # type: ignore[assignment,misc]

30from typing import (

31 Any,

32 Callable,

33 Dict,

34 Iterable,

35 Iterator,

36 List,

37 Mapping,

38 Optional,

39 Sequence,

40 Text,

41 Tuple,

42 Type,

43 TypeVar,

44 Union,

45 cast,

46 TYPE_CHECKING,

47 overload,

48)

49from unicodedata import normalize

50from ._socket import inet_pton

52try:

53 from collections.abc import Mapping as MappingABC

54except ImportError: # Python 2

55 from collections import Mapping as MappingABC

57from idna import encode as idna_encode, decode as idna_decode

60PY2 = sys.version_info[0] == 2

61try:

62 unichr

63except NameError: # Py3

64 unichr = chr # type: Callable[[int], Text]

65NoneType = type(None) # type: Type[None]

66QueryPairs = Tuple[Tuple[Text, Optional[Text]], ...] # internal representation

67QueryParameters = Union[

68 Mapping[Text, Optional[Text]],

69 QueryPairs,

70 Iterable[Tuple[Text, Optional[Text]]],

71]

72T = TypeVar("T")

73# Literal is not available in all pythons so we only bring it in for mypy.

74if TYPE_CHECKING:

75 from typing import Literal

78# from boltons.typeutils

79def make_sentinel(name="_MISSING", var_name=""):

80 # type: (str, str) -> object

81 """Creates and returns a new **instance** of a new class, suitable for

82 usage as a "sentinel", a kind of singleton often used to indicate

83 a value is missing when ``None`` is a valid input.

85 Args:

86 name: Name of the Sentinel

87 var_name: Set this name to the name of the variable in its respective

88 module enable pickle-ability.

90 >>> make_sentinel(var_name='_MISSING')

91 _MISSING

93 The most common use cases here in boltons are as default values

94 for optional function arguments, partly because of its

95 less-confusing appearance in automatically generated

96 documentation. Sentinels also function well as placeholders in queues

97 and linked lists.

99 .. note::

100

101 By design, additional calls to ``make_sentinel`` with the same

102 values will not produce equivalent objects.

103

104 >>> make_sentinel('TEST') == make_sentinel('TEST')

105 False

106 >>> type(make_sentinel('TEST')) == type(make_sentinel('TEST'))

107 False

108 """

109

110 class Sentinel(object):

111 def __init__(self):

112 # type: () -> None

113 self.name = name

114 self.var_name = var_name

115

116 def __repr__(self):

117 # type: () -> str

118 if self.var_name:

119 return self.var_name

120 return "%s(%r)" % (self.__class__.__name__, self.name)

121

122 if var_name:

123 # superclass type hints don't allow str return type, but it is

124 # allowed in the docs, hence the ignore[override] below

125 def __reduce__(self):

126 # type: () -> str

127 return self.var_name

128

129 def __nonzero__(self):

130 # type: () -> bool

131 return False

132

133 __bool__ = __nonzero__

134

135 return Sentinel()

136

137

138_unspecified = _UNSET = make_sentinel("_UNSET") # type: Any

139

140

141# RFC 3986 Section 2.3, Unreserved URI Characters

142# https://tools.ietf.org/html/rfc3986#section-2.3

143_UNRESERVED_CHARS = frozenset(

144 "~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"

145)

146

147

148# URL parsing regex (based on RFC 3986 Appendix B, with modifications)

149_URL_RE = re.compile(

150 r"^((?P<scheme>[^:/?#]+):)?"

151 r"((?P<_netloc_sep>//)"

152 r"(?P<authority>[^/?#]*))?"

153 r"(?P<path>[^?#]*)"

154 r"(\?(?P<query>[^#]*))?"

155 r"(#(?P<fragment>.*))?$"

156)

157_SCHEME_RE = re.compile(r"^[a-zA-Z0-9+-.]*$")

158_AUTHORITY_RE = re.compile(

159 r"^(?:(?P<userinfo>[^@/?#]*)@)?"

160 r"(?P<host>"

161 r"(?:\[(?P<ipv6_host>[^[\]/?#]*)\])"

162 r"|(?P<plain_host>[^:/?#[\]]*)"

163 r"|(?P<bad_host>.*?))?"

164 r"(?::(?P<port>.*))?$"

165)

166

167

168_HEX_CHAR_MAP = dict(

169 [

170 ((a + b).encode("ascii"), unichr(int(a + b, 16)).encode("charmap"))

171 for a in string.hexdigits

172 for b in string.hexdigits

173 ]

174)

175_ASCII_RE = re.compile("([\x00-\x7f]+)")

176

177# RFC 3986 section 2.2, Reserved Characters

178# https://tools.ietf.org/html/rfc3986#section-2.2

179_GEN_DELIMS = frozenset(u":/?#[]@")

180_SUB_DELIMS = frozenset(u"!$&'()*+,;=")

181_ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS

182

183_USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u"%")

184_USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE

185_PATH_SAFE = _USERINFO_SAFE | set(u":@")

186_PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE

187_SCHEMELESS_PATH_SAFE = _PATH_SAFE - set(":")

188_SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE

189_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u"/?")

190_FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE

191_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&")

192_QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE

193_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u"=")

194_QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE

195

196

197def _make_decode_map(delims, allow_percent=False):

198 # type: (Iterable[Text], bool) -> Mapping[bytes, bytes]

199 ret = dict(_HEX_CHAR_MAP)

200 if not allow_percent:

201 delims = set(delims) | set([u"%"])

202 for delim in delims:

203 _hexord = "{0:02X}".format(ord(delim)).encode("ascii")

204 _hexord_lower = _hexord.lower()

205 ret.pop(_hexord)

206 if _hexord != _hexord_lower:

207 ret.pop(_hexord_lower)

208 return ret

209

210

211def _make_quote_map(safe_chars):

212 # type: (Iterable[Text]) -> Mapping[Union[int, Text], Text]

213 ret = {} # type: Dict[Union[int, Text], Text]

214 # v is included in the dict for py3 mostly, because bytestrings

215 # are iterables of ints, of course!

216 for i, v in zip(range(256), range(256)):

217 c = chr(v)

218 if c in safe_chars:

219 ret[c] = ret[v] = c

220 else:

221 ret[c] = ret[v] = "%{0:02X}".format(i)

222 return ret

223

224

225_USERINFO_PART_QUOTE_MAP = _make_quote_map(_USERINFO_SAFE)

226_USERINFO_DECODE_MAP = _make_decode_map(_USERINFO_DELIMS)

227_PATH_PART_QUOTE_MAP = _make_quote_map(_PATH_SAFE)

228_SCHEMELESS_PATH_PART_QUOTE_MAP = _make_quote_map(_SCHEMELESS_PATH_SAFE)

229_PATH_DECODE_MAP = _make_decode_map(_PATH_DELIMS)

230_QUERY_KEY_QUOTE_MAP = _make_quote_map(_QUERY_KEY_SAFE)

231_QUERY_KEY_DECODE_MAP = _make_decode_map(_QUERY_KEY_DELIMS)

232_QUERY_VALUE_QUOTE_MAP = _make_quote_map(_QUERY_VALUE_SAFE)

233_QUERY_VALUE_DECODE_MAP = _make_decode_map(_QUERY_VALUE_DELIMS | set("+"))

234_FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE)

235_FRAGMENT_DECODE_MAP = _make_decode_map(_FRAGMENT_DELIMS)

236_UNRESERVED_QUOTE_MAP = _make_quote_map(_UNRESERVED_CHARS)

237_UNRESERVED_DECODE_MAP = dict(

238 [

239 (k, v)

240 for k, v in _HEX_CHAR_MAP.items()

241 if v.decode("ascii", "replace") in _UNRESERVED_CHARS

242 ]

243)

244

245_ROOT_PATHS = frozenset(((), (u"",)))

246

247

248def _encode_reserved(text, maximal=True):

249 # type: (Text, bool) -> Text

250 """A very comprehensive percent encoding for encoding all

251 delimiters. Used for arguments to DecodedURL, where a % means a

252 percent sign, and not the character used by URLs for escaping

253 bytes.

254 """

255 if maximal:

256 bytestr = normalize("NFC", text).encode("utf8")

257 return u"".join([_UNRESERVED_QUOTE_MAP[b] for b in bytestr])

258 return u"".join(

259 [

260 _UNRESERVED_QUOTE_MAP[t] if t in _UNRESERVED_CHARS else t

261 for t in text

262 ]

263 )

264

265

266def _encode_path_part(text, maximal=True):

267 # type: (Text, bool) -> Text

268 "Percent-encode a single segment of a URL path."

269 if maximal:

270 bytestr = normalize("NFC", text).encode("utf8")

271 return u"".join([_PATH_PART_QUOTE_MAP[b] for b in bytestr])

272 return u"".join(

273 [_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t for t in text]

274 )

275

276

277def _encode_schemeless_path_part(text, maximal=True):

278 # type: (Text, bool) -> Text

279 """Percent-encode the first segment of a URL path for a URL without a

280 scheme specified.

281 """

282 if maximal:

283 bytestr = normalize("NFC", text).encode("utf8")

284 return u"".join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr])

285 return u"".join(

286 [

287 _SCHEMELESS_PATH_PART_QUOTE_MAP[t]

288 if t in _SCHEMELESS_PATH_DELIMS

289 else t

290 for t in text

291 ]

292 )

293

294

295def _encode_path_parts(

296 text_parts, # type: Sequence[Text]

297 rooted=False, # type: bool

298 has_scheme=True, # type: bool

299 has_authority=True, # type: bool

300 maximal=True, # type: bool

301):

302 # type: (...) -> Sequence[Text]

303 """

304 Percent-encode a tuple of path parts into a complete path.

305

306 Setting *maximal* to False percent-encodes only the reserved

307 characters that are syntactically necessary for serialization,

308 preserving any IRI-style textual data.

309

310 Leaving *maximal* set to its default True percent-encodes

311 everything required to convert a portion of an IRI to a portion of

312 a URI.

313

314 RFC 3986 3.3:

315

316 If a URI contains an authority component, then the path component

317 must either be empty or begin with a slash ("/") character. If a URI

318 does not contain an authority component, then the path cannot begin

319 with two slash characters ("//"). In addition, a URI reference

320 (Section 4.1) may be a relative-path reference, in which case the

321 first path segment cannot contain a colon (":") character.

322 """

323 if not text_parts:

324 return ()

325 if rooted:

326 text_parts = (u"",) + tuple(text_parts)

327 # elif has_authority and text_parts:

328 # raise Exception('see rfc above') # TODO: too late to fail like this?

329 encoded_parts = [] # type: List[Text]

330 if has_scheme:

331 encoded_parts = [

332 _encode_path_part(part, maximal=maximal) if part else part

333 for part in text_parts

334 ]

335 else:

336 encoded_parts = [_encode_schemeless_path_part(text_parts[0])]

337 encoded_parts.extend(

338 [

339 _encode_path_part(part, maximal=maximal) if part else part

340 for part in text_parts[1:]

341 ]

342 )

343 return tuple(encoded_parts)

344

345

346def _encode_query_key(text, maximal=True):

347 # type: (Text, bool) -> Text

348 """

349 Percent-encode a single query string key or value.

350 """

351 if maximal:

352 bytestr = normalize("NFC", text).encode("utf8")

353 return u"".join([_QUERY_KEY_QUOTE_MAP[b] for b in bytestr])

354 return u"".join(

355 [_QUERY_KEY_QUOTE_MAP[t] if t in _QUERY_KEY_DELIMS else t for t in text]

356 )

357

358

359def _encode_query_value(text, maximal=True):

360 # type: (Text, bool) -> Text

361 """

362 Percent-encode a single query string key or value.

363 """

364 if maximal:

365 bytestr = normalize("NFC", text).encode("utf8")

366 return u"".join([_QUERY_VALUE_QUOTE_MAP[b] for b in bytestr])

367 return u"".join(

368 [

369 _QUERY_VALUE_QUOTE_MAP[t] if t in _QUERY_VALUE_DELIMS else t

370 for t in text

371 ]

372 )

373

374

375def _encode_fragment_part(text, maximal=True):

376 # type: (Text, bool) -> Text

377 """Quote the fragment part of the URL. Fragments don't have

378 subdelimiters, so the whole URL fragment can be passed.

379 """

380 if maximal:

381 bytestr = normalize("NFC", text).encode("utf8")

382 return u"".join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr])

383 return u"".join(

384 [_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t for t in text]

385 )

386

387

388def _encode_userinfo_part(text, maximal=True):

389 # type: (Text, bool) -> Text

390 """Quote special characters in either the username or password

391 section of the URL.

392 """

393 if maximal:

394 bytestr = normalize("NFC", text).encode("utf8")

395 return u"".join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr])

396 return u"".join(

397 [

398 _USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS else t

399 for t in text

400 ]

401 )

402

403

404# This port list painstakingly curated by hand searching through

405# https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml

406# and

407# https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml

408SCHEME_PORT_MAP = {

409 "acap": 674,

410 "afp": 548,

411 "dict": 2628,

412 "dns": 53,

413 "file": None,

414 "ftp": 21,

415 "git": 9418,

416 "gopher": 70,

417 "http": 80,

418 "https": 443,

419 "imap": 143,

420 "ipp": 631,

421 "ipps": 631,

422 "irc": 194,

423 "ircs": 6697,

424 "ldap": 389,

425 "ldaps": 636,

426 "mms": 1755,

427 "msrp": 2855,

428 "msrps": None,

429 "mtqp": 1038,

430 "nfs": 111,

431 "nntp": 119,

432 "nntps": 563,

433 "pop": 110,

434 "prospero": 1525,

435 "redis": 6379,

436 "rsync": 873,

437 "rtsp": 554,

438 "rtsps": 322,

439 "rtspu": 5005,

440 "sftp": 22,

441 "smb": 445,

442 "snmp": 161,

443 "ssh": 22,

444 "steam": None,

445 "svn": 3690,

446 "telnet": 23,

447 "ventrilo": 3784,

448 "vnc": 5900,

449 "wais": 210,

450 "ws": 80,

451 "wss": 443,

452 "xmpp": None,

453}

454

455# This list of schemes that don't use authorities is also from the link above.

456NO_NETLOC_SCHEMES = set(

457 [

458 "urn",

459 "about",

460 "bitcoin",

461 "blob",

462 "data",

463 "geo",

464 "magnet",

465 "mailto",

466 "news",

467 "pkcs11",

468 "sip",

469 "sips",

470 "tel",

471 ]

472)

473# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc

474

475NO_QUERY_PLUS_SCHEMES = set()

476

477

478def register_scheme(

479 text, uses_netloc=True, default_port=None, query_plus_is_space=True

480):

481 # type: (Text, bool, Optional[int], bool) -> None

482 """Registers new scheme information, resulting in correct port and

483 slash behavior from the URL object. There are dozens of standard

484 schemes preregistered, so this function is mostly meant for

485 proprietary internal customizations or stopgaps on missing

486 standards information. If a scheme seems to be missing, please

487 `file an issue`_!

488

489 Args:

490 text: A string representation of the scheme.

491 (the 'http' in 'http://hatnote.com')

492 uses_netloc: Does the scheme support specifying a

493 network host? For instance, "http" does, "mailto" does

494 not. Defaults to True.

495 default_port: The default port, if any, for

496 netloc-using schemes.

497 query_plus_is_space: If true, a "+" in the query string should be

498 decoded as a space by DecodedURL.

499

500 .. _file an issue: https://github.com/mahmoud/hyperlink/issues

501 """

502 text = text.lower()

503 if default_port is not None:

504 try:

505 default_port = int(default_port)

506 except (ValueError, TypeError):

507 raise ValueError(

508 "default_port expected integer or None, not %r"

509 % (default_port,)

510 )

511

512 if uses_netloc is True:

513 SCHEME_PORT_MAP[text] = default_port

514 elif uses_netloc is False:

515 if default_port is not None:

516 raise ValueError(

517 "unexpected default port while specifying"

518 " non-netloc scheme: %r" % default_port

519 )

520 NO_NETLOC_SCHEMES.add(text)

521 else:

522 raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc)

523

524 if not query_plus_is_space:

525 NO_QUERY_PLUS_SCHEMES.add(text)

526

527 return

528

529

530def scheme_uses_netloc(scheme, default=None):

531 # type: (Text, Optional[bool]) -> Optional[bool]

532 """Whether or not a URL uses :code:`:` or :code:`://` to separate the

533 scheme from the rest of the URL depends on the scheme's own

534 standard definition. There is no way to infer this behavior

535 from other parts of the URL. A scheme either supports network

536 locations or it does not.

537

538 The URL type's approach to this is to check for explicitly

539 registered schemes, with common schemes like HTTP

540 preregistered. This is the same approach taken by

541 :mod:`urlparse`.

542

543 URL adds two additional heuristics if the scheme as a whole is

544 not registered. First, it attempts to check the subpart of the

545 scheme after the last ``+`` character. This adds intuitive

546 behavior for schemes like ``git+ssh``. Second, if a URL with

547 an unrecognized scheme is loaded, it will maintain the

548 separator it sees.

549 """

550 if not scheme:

551 return False

552 scheme = scheme.lower()

553 if scheme in SCHEME_PORT_MAP:

554 return True

555 if scheme in NO_NETLOC_SCHEMES:

556 return False

557 if scheme.split("+")[-1] in SCHEME_PORT_MAP:

558 return True

559 return default

560

561

562class URLParseError(ValueError):

563 """Exception inheriting from :exc:`ValueError`, raised when failing to

564 parse a URL. Mostly raised on invalid ports and IPv6 addresses.

565 """

566

567 pass

568

569

570def _optional(argument, default):

571 # type: (Any, Any) -> Any

572 if argument is _UNSET:

573 return default

574 else:

575 return argument

576

577

578def _typecheck(name, value, *types):

579 # type: (Text, T, Type[Any]) -> T

580 """

581 Check that the given *value* is one of the given *types*, or raise an

582 exception describing the problem using *name*.

583 """

584 if not types:

585 raise ValueError("expected one or more types, maybe use _textcheck?")

586 if not isinstance(value, types):

587 raise TypeError(

588 "expected %s for %s, got %r"

589 % (" or ".join([t.__name__ for t in types]), name, value)

590 )

591 return value

592

593

594def _textcheck(name, value, delims=frozenset(), nullable=False):

595 # type: (Text, T, Iterable[Text], bool) -> T

596 if not isinstance(value, Text):

597 if nullable and value is None:

598 # used by query string values

599 return value # type: ignore[unreachable]

600 else:

601 str_name = "unicode" if PY2 else "str"

602 exp = str_name + " or NoneType" if nullable else str_name

603 raise TypeError("expected %s for %s, got %r" % (exp, name, value))

604 if delims and set(value) & set(delims): # TODO: test caching into regexes

605 raise ValueError(

606 "one or more reserved delimiters %s present in %s: %r"

607 % ("".join(delims), name, value)

608 )

609 return value # type: ignore[return-value] # T vs. Text

610

611

612def iter_pairs(iterable):

613 # type: (Iterable[Any]) -> Iterator[Any]

614 """

615 Iterate over the (key, value) pairs in ``iterable``.

616

617 This handles dictionaries sensibly, and falls back to assuming the

618 iterable yields (key, value) pairs. This behaviour is similar to

619 what Python's ``dict()`` constructor does.

620 """

621 if isinstance(iterable, MappingABC):

622 iterable = iterable.items()

623 return iter(iterable)

624

625

626def _decode_unreserved(text, normalize_case=False, encode_stray_percents=False):

627 # type: (Text, bool, bool) -> Text

628 return _percent_decode(

629 text,

630 normalize_case=normalize_case,

631 encode_stray_percents=encode_stray_percents,

632 _decode_map=_UNRESERVED_DECODE_MAP,

633 )

634

635

636def _decode_userinfo_part(

637 text, normalize_case=False, encode_stray_percents=False

638):

639 # type: (Text, bool, bool) -> Text

640 return _percent_decode(

641 text,

642 normalize_case=normalize_case,

643 encode_stray_percents=encode_stray_percents,

644 _decode_map=_USERINFO_DECODE_MAP,

645 )

646

647

648def _decode_path_part(text, normalize_case=False, encode_stray_percents=False):

649 # type: (Text, bool, bool) -> Text

650 """

651 >>> _decode_path_part(u'%61%77%2f%7a')

652 u'aw%2fz'

653 >>> _decode_path_part(u'%61%77%2f%7a', normalize_case=True)

654 u'aw%2Fz'

655 """

656 return _percent_decode(

657 text,

658 normalize_case=normalize_case,

659 encode_stray_percents=encode_stray_percents,

660 _decode_map=_PATH_DECODE_MAP,

661 )

662

663

664def _decode_query_key(text, normalize_case=False, encode_stray_percents=False):

665 # type: (Text, bool, bool) -> Text

666 return _percent_decode(

667 text,

668 normalize_case=normalize_case,

669 encode_stray_percents=encode_stray_percents,

670 _decode_map=_QUERY_KEY_DECODE_MAP,

671 )

672

673

674def _decode_query_value(

675 text, normalize_case=False, encode_stray_percents=False

676):

677 # type: (Text, bool, bool) -> Text

678 return _percent_decode(

679 text,

680 normalize_case=normalize_case,

681 encode_stray_percents=encode_stray_percents,

682 _decode_map=_QUERY_VALUE_DECODE_MAP,

683 )

684

685

686def _decode_fragment_part(

687 text, normalize_case=False, encode_stray_percents=False

688):

689 # type: (Text, bool, bool) -> Text

690 return _percent_decode(

691 text,

692 normalize_case=normalize_case,

693 encode_stray_percents=encode_stray_percents,

694 _decode_map=_FRAGMENT_DECODE_MAP,

695 )

696

697

698def _percent_decode(

699 text, # type: Text

700 normalize_case=False, # type: bool

701 subencoding="utf-8", # type: Text

702 raise_subencoding_exc=False, # type: bool

703 encode_stray_percents=False, # type: bool

704 _decode_map=_HEX_CHAR_MAP, # type: Mapping[bytes, bytes]

705):

706 # type: (...) -> Text

707 """Convert percent-encoded text characters to their normal,

708 human-readable equivalents.

709

710 All characters in the input text must be encodable by

711 *subencoding*. All special characters underlying the values in the

712 percent-encoding must be decodable as *subencoding*. If a

713 non-*subencoding*-valid string is passed, the original text is

714 returned with no changes applied.

715

716 Only called by field-tailored variants, e.g.,

717 :func:`_decode_path_part`, as every percent-encodable part of the

718 URL has characters which should not be percent decoded.

719

720 >>> _percent_decode(u'abc%20def')

721 u'abc def'

722

723 Args:

724 text: Text with percent-encoding present.

725 normalize_case: Whether undecoded percent segments, such as encoded

726 delimiters, should be uppercased, per RFC 3986 Section 2.1.

727 See :func:`_decode_path_part` for an example.

728 subencoding: The name of the encoding underlying the percent-encoding.

729 raise_subencoding_exc: Whether an error in decoding the bytes

730 underlying the percent-decoding should be raised.

731

732 Returns:

733 Text: The percent-decoded version of *text*, decoded by *subencoding*.

734 """

735 try:

736 quoted_bytes = text.encode(subencoding)

737 except UnicodeEncodeError:

738 return text

739

740 bits = quoted_bytes.split(b"%")

741 if len(bits) == 1:

742 return text

743

744 res = [bits[0]]

745 append = res.append

746

747 for item in bits[1:]:

748 hexpair, rest = item[:2], item[2:]

749 try:

750 append(_decode_map[hexpair])

751 append(rest)

752 except KeyError:

753 pair_is_hex = hexpair in _HEX_CHAR_MAP

754 if pair_is_hex or not encode_stray_percents:

755 append(b"%")

756 else:

757 # if it's undecodable, treat as a real percent sign,

758 # which is reserved (because it wasn't in the

759 # context-aware _decode_map passed in), and should

760 # stay in an encoded state.

761 append(b"%25")

762 if normalize_case and pair_is_hex:

763 append(hexpair.upper())

764 append(rest)

765 else:

766 append(item)

767

768 unquoted_bytes = b"".join(res)

769

770 try:

771 return unquoted_bytes.decode(subencoding)

772 except UnicodeDecodeError:

773 if raise_subencoding_exc:

774 raise

775 return text

776

777

778def _decode_host(host):

779 # type: (Text) -> Text

780 """Decode a host from ASCII-encodable text to IDNA-decoded text. If

781 the host text is not ASCII, it is returned unchanged, as it is

782 presumed that it is already IDNA-decoded.

783

784 Some technical details: _decode_host is built on top of the "idna"

785 package, which has some quirks:

786

787 Capital letters are not valid IDNA2008. The idna package will

788 raise an exception like this on capital letters:

789

790 > idna.core.InvalidCodepoint: Codepoint U+004B at position 1 ... not allowed

791

792 However, if a segment of a host (i.e., something in

793 url.host.split('.')) is already ASCII, idna doesn't perform its

794 usual checks. In fact, for capital letters it automatically

795 lowercases them.

796

797 This check and some other functionality can be bypassed by passing

798 uts46=True to idna.encode/decode. This allows a more permissive and

799 convenient interface. So far it seems like the balanced approach.

800

801 Example output (from idna==2.6):

802

803 >> idna.encode(u'mahmöud.io')

804 'xn--mahmud-zxa.io'

805 >> idna.encode(u'Mahmöud.io')

806 Traceback (most recent call last):

807 File "<stdin>", line 1, in <module>

808 File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 355, in encode

809 result.append(alabel(label))

810 File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 276, in alabel

811 check_label(label)

812 File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 253, in check_label

813 raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))

814 idna.core.InvalidCodepoint: Codepoint U+004D at position 1 of u'Mahm\xf6ud' not allowed

815 >> idna.encode(u'Mahmoud.io')

816 'Mahmoud.io'

817

818 # Similar behavior for decodes below

819 >> idna.decode(u'Mahmoud.io')

820 u'mahmoud.io

821 >> idna.decode(u'Méhmoud.io', uts46=True)

822 u'm\xe9hmoud.io'

823 """ # noqa: E501

824 if not host:

825 return u""

826 try:

827 host_bytes = host.encode("ascii")

828 except UnicodeEncodeError:

829 host_text = host

830 else:

831 try:

832 host_text = idna_decode(host_bytes, uts46=True)

833 except ValueError:

834 # only reached on "narrow" (UCS-2) Python builds <3.4, see #7

835 # NOTE: not going to raise here, because there's no

836 # ambiguity in the IDNA, and the host is still

837 # technically usable

838 host_text = host

839 return host_text

840

841

842def _resolve_dot_segments(path):

843 # type: (Sequence[Text]) -> Sequence[Text]

844 """Normalize the URL path by resolving segments of '.' and '..'. For

845 more details, see `RFC 3986 section 5.2.4, Remove Dot Segments`_.

846

847 Args:

848 path: sequence of path segments in text form

849

850 Returns:

851 A new sequence of path segments with the '.' and '..' elements removed

852 and resolved.

853

854 .. _RFC 3986 section 5.2.4, Remove Dot Segments: https://tools.ietf.org/html/rfc3986#section-5.2.4

855 """ # noqa: E501

856 segs = [] # type: List[Text]

857

858 for seg in path:

859 if seg == u".":

860 pass

861 elif seg == u"..":

862 if segs:

863 segs.pop()

864 else:

865 segs.append(seg)

866

867 if list(path[-1:]) in ([u"."], [u".."]):

868 segs.append(u"")

869

870 return segs

871

872

873def parse_host(host):

874 # type: (Text) -> Tuple[Optional[AddressFamily], Text]

875 """Parse the host into a tuple of ``(family, host)``, where family

876 is the appropriate :mod:`socket` module constant when the host is

877 an IP address. Family is ``None`` when the host is not an IP.

878

879 Will raise :class:`URLParseError` on invalid IPv6 constants.

880

881 Returns:

882 family (socket constant or None), host (string)

883

884 >>> import socket

885 >>> parse_host('googlewebsite.com') == (None, 'googlewebsite.com')

886 True

887 >>> parse_host('::1') == (socket.AF_INET6, '::1')

888 True

889 >>> parse_host('192.168.1.1') == (socket.AF_INET, '192.168.1.1')

890 True

891 """

892 if not host:

893 return None, u""

894

895 if u":" in host:

896 try:

897 inet_pton(AF_INET6, host)

898 except socket.error as se:

899 raise URLParseError("invalid IPv6 host: %r (%r)" % (host, se))

900 except UnicodeEncodeError:

901 pass # TODO: this can't be a real host right?

902 else:

903 family = AF_INET6 # type: Optional[AddressFamily]

904 else:

905 try:

906 inet_pton(AF_INET, host)

907 except (socket.error, UnicodeEncodeError):

908 family = None # not an IP

909 else:

910 family = AF_INET

911

912 return family, host

913

914

915class URL(object):

916 r"""From blogs to billboards, URLs are so common, that it's easy to

917 overlook their complexity and power. With hyperlink's

918 :class:`URL` type, working with URLs doesn't have to be hard.

919

920 URLs are made of many parts. Most of these parts are officially

921 named in `RFC 3986`_ and this diagram may prove handy in identifying

922 them::

923

924 foo://user:pass@example.com:8042/over/there?name=ferret#nose

925 \_/ \_______/ \_________/ \__/\_________/ \_________/ \__/

926 | | | | | | |

927 scheme userinfo host port path query fragment

928

929 While :meth:`~URL.from_text` is used for parsing whole URLs, the

930 :class:`URL` constructor builds a URL from the individual

931 components, like so::

932

933 >>> from hyperlink import URL

934 >>> url = URL(scheme=u'https', host=u'example.com', path=[u'hello', u'world'])

935 >>> print(url.to_text())

936 https://example.com/hello/world

937

938 The constructor runs basic type checks. All strings are expected

939 to be text (:class:`str` in Python 3, :class:`unicode` in Python 2). All

940 arguments are optional, defaulting to appropriately empty values. A full

941 list of constructor arguments is below.

942

943 Args:

944 scheme: The text name of the scheme.

945 host: The host portion of the network location

946 port: The port part of the network location. If ``None`` or no port is

947 passed, the port will default to the default port of the scheme, if

948 it is known. See the ``SCHEME_PORT_MAP`` and

949 :func:`register_default_port` for more info.

950 path: A tuple of strings representing the slash-separated parts of the

951 path, each percent-encoded.

952 query: The query parameters, as a dictionary or as an sequence of

953 percent-encoded key-value pairs.

954 fragment: The fragment part of the URL.

955 rooted: A rooted URL is one which indicates an absolute path.

956 This is True on any URL that includes a host, or any relative URL

957 that starts with a slash.

958 userinfo: The username or colon-separated username:password pair.

959 uses_netloc: Indicates whether ``://`` (the "netloc separator") will

960 appear to separate the scheme from the *path* in cases where no

961 host is present.

962 Setting this to ``True`` is a non-spec-compliant affordance for the

963 common practice of having URIs that are *not* URLs (cannot have a

964 'host' part) but nevertheless use the common ``://`` idiom that

965 most people associate with URLs; e.g. ``message:`` URIs like

966 ``message://message-id`` being equivalent to ``message:message-id``.

967 This may be inferred based on the scheme depending on whether

968 :func:`register_scheme` has been used to register the scheme and

969 should not be passed directly unless you know the scheme works like

970 this and you know it has not been registered.

971

972 All of these parts are also exposed as read-only attributes of :class:`URL`

973 instances, along with several useful methods.

974

975 .. _RFC 3986: https://tools.ietf.org/html/rfc3986

976 .. _RFC 3987: https://tools.ietf.org/html/rfc3987

977 """ # noqa: E501

978

979 def __init__(

980 self,

981 scheme=None, # type: Optional[Text]

982 host=None, # type: Optional[Text]

983 path=(), # type: Iterable[Text]

984 query=(), # type: QueryParameters

985 fragment=u"", # type: Text

986 port=None, # type: Optional[int]

987 rooted=None, # type: Optional[bool]

988 userinfo=u"", # type: Text

989 uses_netloc=None, # type: Optional[bool]

990 ):

991 # type: (...) -> None

992 if host is not None and scheme is None:

993 scheme = u"http" # TODO: why

994 if port is None and scheme is not None:

995 port = SCHEME_PORT_MAP.get(scheme)

996 if host and query and not path:

997 # per RFC 3986 6.2.3, "a URI that uses the generic syntax

998 # for authority with an empty path should be normalized to

999 # a path of '/'."

1000 path = (u"",)

1001

1002 # Now that we're done detecting whether they were passed, we can set

1003 # them to their defaults:

1004 if scheme is None:

1005 scheme = u""

1006 if host is None:

1007 host = u""

1008 if rooted is None:

1009 rooted = bool(host)

1010

1011 # Set attributes.

1012 self._scheme = _textcheck("scheme", scheme)

1013 if self._scheme:

1014 if not _SCHEME_RE.match(self._scheme):

1015 raise ValueError(

1016 'invalid scheme: %r. Only alphanumeric, "+",'

1017 ' "-", and "." allowed. Did you meant to call'

1018 " %s.from_text()?" % (self._scheme, self.__class__.__name__)

1019 )

1020

1021 _, self._host = parse_host(_textcheck("host", host, "/?#@"))

1022 if isinstance(path, Text):

1023 raise TypeError(

1024 "expected iterable of text for path, not: %r" % (path,)

1025 )

1026 self._path = tuple(

1027 (_textcheck("path segment", segment, "/?#") for segment in path)

1028 )

1029 self._query = tuple(

1030 (

1031 _textcheck("query parameter name", k, "&=#"),

1032 _textcheck("query parameter value", v, "&#", nullable=True),

1033 )

1034 for k, v in iter_pairs(query)

1035 )

1036 self._fragment = _textcheck("fragment", fragment)

1037 self._port = _typecheck("port", port, int, NoneType)

1038 self._rooted = _typecheck("rooted", rooted, bool)

1039 self._userinfo = _textcheck("userinfo", userinfo, "/?#@")

1040

1041 if uses_netloc is None:

1042 uses_netloc = scheme_uses_netloc(self._scheme, uses_netloc)

1043 self._uses_netloc = _typecheck(

1044 "uses_netloc", uses_netloc, bool, NoneType

1045 )

1046 will_have_authority = self._host or (

1047 self._port and self._port != SCHEME_PORT_MAP.get(scheme)

1048 )

1049 if will_have_authority:

1050 # fixup for rooted consistency; if there's any 'authority'

1051 # represented in the textual URL, then the path must be rooted, and

1052 # we're definitely using a netloc (there must be a ://).

1053 self._rooted = True

1054 self._uses_netloc = True

1055 if (not self._rooted) and self.path[:1] == (u"",):

1056 self._rooted = True

1057 self._path = self._path[1:]

1058 if not will_have_authority and self._path and not self._rooted:

1059 # If, after fixing up the path, there *is* a path and it *isn't*

1060 # rooted, then we are definitely not using a netloc; if we did, it

1061 # would make the path (erroneously) look like a hostname.

1062 self._uses_netloc = False

1063

1064 def get_decoded_url(self, lazy=False):

1065 # type: (bool) -> DecodedURL

1066 try:

1067 return self._decoded_url

1068 except AttributeError:

1069 self._decoded_url = DecodedURL(self, lazy=lazy) # type: DecodedURL

1070 return self._decoded_url

1071

1072 @property

1073 def scheme(self):

1074 # type: () -> Text

1075 """The scheme is a string, and the first part of an absolute URL, the

1076 part before the first colon, and the part which defines the

1077 semantics of the rest of the URL. Examples include "http",

1078 "https", "ssh", "file", "mailto", and many others. See

1079 :func:`~hyperlink.register_scheme()` for more info.

1080 """

1081 return self._scheme

1082

1083 @property

1084 def host(self):

1085 # type: () -> Text

1086 """The host is a string, and the second standard part of an absolute

1087 URL. When present, a valid host must be a domain name, or an

1088 IP (v4 or v6). It occurs before the first slash, or the second

1089 colon, if a :attr:`~hyperlink.URL.port` is provided.

1090 """

1091 return self._host

1092

1093 @property

1094 def port(self):

1095 # type: () -> Optional[int]

1096 """The port is an integer that is commonly used in connecting to the

1097 :attr:`host`, and almost never appears without it.

1098

1099 When not present in the original URL, this attribute defaults

1100 to the scheme's default port. If the scheme's default port is

1101 not known, and the port is not provided, this attribute will

1102 be set to None.

1103

1104 >>> URL.from_text(u'http://example.com/pa/th').port

1105 80

1106 >>> URL.from_text(u'foo://example.com/pa/th').port

1107 >>> URL.from_text(u'foo://example.com:8042/pa/th').port

1108 8042

1109

1110 .. note::

1111

1112 Per the standard, when the port is the same as the schemes

1113 default port, it will be omitted in the text URL.

1114 """

1115 return self._port

1116

1117 @property

1118 def path(self):

1119 # type: () -> Sequence[Text]

1120 """A tuple of strings, created by splitting the slash-separated

1121 hierarchical path. Started by the first slash after the host,

1122 terminated by a "?", which indicates the start of the

1123 :attr:`~hyperlink.URL.query` string.

1124 """

1125 return self._path

1126

1127 @property

1128 def query(self):

1129 # type: () -> QueryPairs

1130 """Tuple of pairs, created by splitting the ampersand-separated

1131 mapping of keys and optional values representing

1132 non-hierarchical data used to identify the resource. Keys are

1133 always strings. Values are strings when present, or None when

1134 missing.

1135

1136 For more operations on the mapping, see

1137 :meth:`~hyperlink.URL.get()`, :meth:`~hyperlink.URL.add()`,

1138 :meth:`~hyperlink.URL.set()`, and

1139 :meth:`~hyperlink.URL.delete()`.

1140 """

1141 return self._query

1142

1143 @property

1144 def fragment(self):

1145 # type: () -> Text

1146 """A string, the last part of the URL, indicated by the first "#"

1147 after the :attr:`~hyperlink.URL.path` or

1148 :attr:`~hyperlink.URL.query`. Enables indirect identification

1149 of a secondary resource, like an anchor within an HTML page.

1150 """

1151 return self._fragment

1152

1153 @property

1154 def rooted(self):

1155 # type: () -> bool

1156 """Whether or not the path starts with a forward slash (``/``).

1157

1158 This is taken from the terminology in the BNF grammar,

1159 specifically the "path-rootless", rule, since "absolute path"

1160 and "absolute URI" are somewhat ambiguous. :attr:`path` does

1161 not contain the implicit prefixed ``"/"`` since that is

1162 somewhat awkward to work with.

1163 """

1164 return self._rooted

1165

1166 @property

1167 def userinfo(self):

1168 # type: () -> Text

1169 """The colon-separated string forming the username-password

1170 combination.

1171 """

1172 return self._userinfo

1173

1174 @property

1175 def uses_netloc(self):

1176 # type: () -> Optional[bool]

1177 """

1178 Indicates whether ``://`` (the "netloc separator") will appear to

1179 separate the scheme from the *path* in cases where no host is present.

1180 """

1181 return self._uses_netloc

1182

1183 @property

1184 def user(self):

1185 # type: () -> Text

1186 """

1187 The user portion of :attr:`~hyperlink.URL.userinfo`.

1188 """

1189 return self.userinfo.split(u":")[0]

1190

1191 def authority(self, with_password=False, **kw):

1192 # type: (bool, Any) -> Text

1193 """Compute and return the appropriate host/port/userinfo combination.

1194

1195 >>> url = URL.from_text(u'http://user:pass@localhost:8080/a/b?x=y')

1196 >>> url.authority()

1197 u'user:@localhost:8080'

1198 >>> url.authority(with_password=True)

1199 u'user:pass@localhost:8080'

1200

1201 Args:

1202 with_password: Whether the return value of this method include the

1203 password in the URL, if it is set.

1204 Defaults to False.

1205

1206 Returns:

1207 Text: The authority (network location and user information) portion

1208 of the URL.

1209 """

1210 # first, a bit of twisted compat

1211 with_password = kw.pop("includeSecrets", with_password)

1212 if kw:

1213 raise TypeError("got unexpected keyword arguments: %r" % kw.keys())

1214 host = self.host

1215 if ":" in host:

1216 hostport = ["[" + host + "]"]

1217 else:

1218 hostport = [self.host]

1219 if self.port != SCHEME_PORT_MAP.get(self.scheme):

1220 hostport.append(Text(self.port))

1221 authority = []

1222 if self.userinfo:

1223 userinfo = self.userinfo

1224 if not with_password and u":" in userinfo:

1225 userinfo = userinfo[: userinfo.index(u":") + 1]

1226 authority.append(userinfo)

1227 authority.append(u":".join(hostport))

1228 return u"@".join(authority)

1229

1230 def __eq__(self, other):

1231 # type: (Any) -> bool

1232 if not isinstance(other, self.__class__):

1233 return NotImplemented

1234 for attr in [

1235 "scheme",

1236 "userinfo",

1237 "host",

1238 "query",

1239 "fragment",

1240 "port",

1241 "uses_netloc",

1242 "rooted",

1243 ]:

1244 if getattr(self, attr) != getattr(other, attr):

1245 return False

1246 if self.path == other.path or (

1247 self.path in _ROOT_PATHS and other.path in _ROOT_PATHS

1248 ):

1249 return True

1250 return False

1251

1252 def __ne__(self, other):

1253 # type: (Any) -> bool

1254 if not isinstance(other, self.__class__):

1255 return NotImplemented

1256 return not self.__eq__(other)

1257

1258 def __hash__(self):

1259 # type: () -> int

1260 return hash(

1261 (

1262 self.__class__,

1263 self.scheme,

1264 self.userinfo,

1265 self.host,

1266 self.path,

1267 self.query,

1268 self.fragment,

1269 self.port,

1270 self.rooted,

1271 self.uses_netloc,

1272 )

1273 )

1274

1275 @property

1276 def absolute(self):

1277 # type: () -> bool

1278 """Whether or not the URL is "absolute". Absolute URLs are complete

1279 enough to resolve to a network resource without being relative

1280 to a base URI.

1281

1282 >>> URL.from_text(u'http://wikipedia.org/').absolute

1283 True

1284 >>> URL.from_text(u'?a=b&c=d').absolute

1285 False

1286

1287 Absolute URLs must have both a scheme and a host set.

1288 """

1289 return bool(self.scheme and self.host)

1290

1291 def replace(

1292 self,

1293 scheme=_UNSET, # type: Optional[Text]

1294 host=_UNSET, # type: Optional[Text]

1295 path=_UNSET, # type: Iterable[Text]

1296 query=_UNSET, # type: QueryParameters

1297 fragment=_UNSET, # type: Text

1298 port=_UNSET, # type: Optional[int]

1299 rooted=_UNSET, # type: Optional[bool]

1300 userinfo=_UNSET, # type: Text

1301 uses_netloc=_UNSET, # type: Optional[bool]

1302 ):

1303 # type: (...) -> URL

1304 """:class:`URL` objects are immutable, which means that attributes

1305 are designed to be set only once, at construction. Instead of

1306 modifying an existing URL, one simply creates a copy with the

1307 desired changes.

1308

1309 If any of the following arguments is omitted, it defaults to

1310 the value on the current URL.

1311

1312 Args:

1313 scheme: The text name of the scheme.

1314 host: The host portion of the network location.

1315 path: A tuple of strings representing the slash-separated parts of

1316 the path.

1317 query: The query parameters, as a dictionary or as an sequence of

1318 key-value pairs.

1319 fragment: The fragment part of the URL.

1320 port: The port part of the network location.

1321 rooted: Whether or not the path begins with a slash.

1322 userinfo: The username or colon-separated username:password pair.

1323 uses_netloc: Indicates whether ``://`` (the "netloc separator")

1324 will appear to separate the scheme from the *path* in cases

1325 where no host is present.

1326 Setting this to ``True`` is a non-spec-compliant affordance for

1327 the common practice of having URIs that are *not* URLs (cannot

1328 have a 'host' part) but nevertheless use the common ``://``

1329 idiom that most people associate with URLs; e.g. ``message:``

1330 URIs like ``message://message-id`` being equivalent to

1331 ``message:message-id``.

1332 This may be inferred based on the scheme depending on whether

1333 :func:`register_scheme` has been used to register the scheme

1334 and should not be passed directly unless you know the scheme

1335 works like this and you know it has not been registered.

1336

1337 Returns:

1338 URL: A copy of the current :class:`URL`, with new values for

1339 parameters passed.

1340 """

1341 if scheme is not _UNSET and scheme != self.scheme:

1342 # when changing schemes, reset the explicit uses_netloc preference

1343 # to honor the new scheme.

1344 uses_netloc = None

1345 return self.__class__(

1346 scheme=_optional(scheme, self.scheme),

1347 host=_optional(host, self.host),

1348 path=_optional(path, self.path),

1349 query=_optional(query, self.query),

1350 fragment=_optional(fragment, self.fragment),

1351 port=_optional(port, self.port),

1352 rooted=_optional(rooted, self.rooted),

1353 userinfo=_optional(userinfo, self.userinfo),

1354 uses_netloc=_optional(uses_netloc, self.uses_netloc),

1355 )

1356

1357 @classmethod

1358 def from_text(cls, text):

1359 # type: (Text) -> URL

1360 """Whereas the :class:`URL` constructor is useful for constructing

1361 URLs from parts, :meth:`~URL.from_text` supports parsing whole

1362 URLs from their string form::

1363

1364 >>> URL.from_text(u'http://example.com')

1365 URL.from_text(u'http://example.com')

1366 >>> URL.from_text(u'?a=b&x=y')

1367 URL.from_text(u'?a=b&x=y')

1368

1369 As you can see above, it's also used as the :func:`repr` of

1370 :class:`URL` objects. The natural counterpart to

1371 :func:`~URL.to_text()`. This method only accepts *text*, so be

1372 sure to decode those bytestrings.

1373

1374 Args:

1375 text: A valid URL string.

1376

1377 Returns:

1378 URL: The structured object version of the parsed string.

1379

1380 .. note::

1381

1382 Somewhat unexpectedly, URLs are a far more permissive

1383 format than most would assume. Many strings which don't

1384 look like URLs are still valid URLs. As a result, this

1385 method only raises :class:`URLParseError` on invalid port

1386 and IPv6 values in the host portion of the URL.

1387 """

1388 um = _URL_RE.match(_textcheck("text", text))

1389 if um is None:

1390 raise URLParseError("could not parse url: %r" % text)

1391 gs = um.groupdict()

1392

1393 au_text = gs["authority"] or u""

1394 au_m = _AUTHORITY_RE.match(au_text)

1395 if au_m is None:

1396 raise URLParseError(

1397 "invalid authority %r in url: %r" % (au_text, text)

1398 )

1399 au_gs = au_m.groupdict()

1400 if au_gs["bad_host"]:

1401 raise URLParseError(

1402 "invalid host %r in url: %r" % (au_gs["bad_host"], text)

1403 )

1404

1405 userinfo = au_gs["userinfo"] or u""

1406

1407 host = au_gs["ipv6_host"] or au_gs["plain_host"]

1408 port = au_gs["port"]

1409 if port is not None:

1410 try:

1411 port = int(port) # type: ignore[assignment] # FIXME, see below

1412 except ValueError:

1413 if not port: # TODO: excessive?

1414 raise URLParseError("port must not be empty: %r" % au_text)

1415 raise URLParseError("expected integer for port, not %r" % port)

1416

1417 scheme = gs["scheme"] or u""

1418 fragment = gs["fragment"] or u""

1419 uses_netloc = bool(gs["_netloc_sep"])

1420

1421 if gs["path"]:

1422 path = tuple(gs["path"].split(u"/"))

1423 if not path[0]:

1424 path = path[1:]

1425 rooted = True

1426 else:

1427 rooted = False

1428 else:

1429 path = ()

1430 rooted = bool(au_text)

1431 if gs["query"]:

1432 query = tuple(

1433 (

1434 qe.split(u"=", 1) # type: ignore[misc]

1435 if u"=" in qe

1436 else (qe, None)

1437 )

1438 for qe in gs["query"].split(u"&")

1439 ) # type: QueryPairs

1440 else:

1441 query = ()

1442 return cls(

1443 scheme,

1444 host,

1445 path,

1446 query,

1447 fragment,

1448 port, # type: ignore[arg-type] # FIXME, see above

1449 rooted,

1450 userinfo,

1451 uses_netloc,

1452 )

1453

1454 def normalize(

1455 self,

1456 scheme=True,

1457 host=True,

1458 path=True,

1459 query=True,

1460 fragment=True,

1461 userinfo=True,

1462 percents=True,

1463 ):

1464 # type: (bool, bool, bool, bool, bool, bool, bool) -> URL

1465 """Return a new URL object with several standard normalizations

1466 applied:

1467

1468 * Decode unreserved characters (`RFC 3986 2.3`_)

1469 * Uppercase remaining percent-encoded octets (`RFC 3986 2.1`_)

1470 * Convert scheme and host casing to lowercase (`RFC 3986 3.2.2`_)

1471 * Resolve any "." and ".." references in the path (`RFC 3986 6.2.2.3`_)

1472 * Ensure an ending slash on URLs with an empty path (`RFC 3986 6.2.3`_)

1473 * Encode any stray percent signs (`%`) in percent-encoded

1474 fields (path, query, fragment, userinfo) (`RFC 3986 2.4`_)

1475

1476 All are applied by default, but normalizations can be disabled

1477 per-part by passing `False` for that part's corresponding

1478 name.

1479

1480 Args:

1481 scheme: Convert the scheme to lowercase

1482 host: Convert the host to lowercase

1483 path: Normalize the path (see above for details)

1484 query: Normalize the query string

1485 fragment: Normalize the fragment

1486 userinfo: Normalize the userinfo

1487 percents: Encode isolated percent signs for any percent-encoded

1488 fields which are being normalized (defaults to `True`).

1489

1490 >>> url = URL.from_text(u'Http://example.COM/a/../b/./c%2f?%61%')

1491 >>> print(url.normalize().to_text())

1492 http://example.com/b/c%2F?a%25

1493

1494 .. _RFC 3986 3.2.2: https://tools.ietf.org/html/rfc3986#section-3.2.2

1495 .. _RFC 3986 2.3: https://tools.ietf.org/html/rfc3986#section-2.3

1496 .. _RFC 3986 2.1: https://tools.ietf.org/html/rfc3986#section-2.1

1497 .. _RFC 3986 6.2.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.2.3

1498 .. _RFC 3986 6.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.3

1499 .. _RFC 3986 2.4: https://tools.ietf.org/html/rfc3986#section-2.4

1500 """ # noqa: E501

1501 kw = {} # type: Dict[str, Any]

1502 if scheme:

1503 kw["scheme"] = self.scheme.lower()

1504 if host:

1505 kw["host"] = self.host.lower()

1506

1507 def _dec_unres(target):

1508 # type: (Text) -> Text

1509 return _decode_unreserved(

1510 target, normalize_case=True, encode_stray_percents=percents

1511 )

1512

1513 if path:

1514 if self.path:

1515 kw["path"] = [

1516 _dec_unres(p) for p in _resolve_dot_segments(self.path)

1517 ]

1518 else:

1519 kw["path"] = (u"",)

1520 if query:

1521 kw["query"] = [

1522 (_dec_unres(k), _dec_unres(v) if v else v)

1523 for k, v in self.query

1524 ]

1525 if fragment:

1526 kw["fragment"] = _dec_unres(self.fragment)

1527 if userinfo:

1528 kw["userinfo"] = u":".join(

1529 [_dec_unres(p) for p in self.userinfo.split(":", 1)]

1530 )

1531

1532 return self.replace(**kw)

1533

1534 def child(self, *segments):

1535 # type: (Text) -> URL

1536 """Make a new :class:`URL` where the given path segments are a child

1537 of this URL, preserving other parts of the URL, including the

1538 query string and fragment.

1539

1540 For example::

1541

1542 >>> url = URL.from_text(u'http://localhost/a/b?x=y')

1543 >>> child_url = url.child(u"c", u"d")

1544 >>> child_url.to_text()

1545 u'http://localhost/a/b/c/d?x=y'

1546

1547 Args:

1548 segments: Additional parts to be joined and added to the path, like

1549 :func:`os.path.join`. Special characters in segments will be

1550 percent encoded.

1551

1552 Returns:

1553 URL: A copy of the current URL with the extra path segments.

1554 """

1555 if not segments:

1556 return self

1557

1558 segments = [ # type: ignore[assignment] # variable is tuple

1559 _textcheck("path segment", s) for s in segments

1560 ]

1561 new_path = tuple(self.path)

1562 if self.path and self.path[-1] == u"":

1563 new_path = new_path[:-1]

1564 new_path += tuple(_encode_path_parts(segments, maximal=False))

1565 return self.replace(path=new_path)

1566

1567 def sibling(self, segment):

1568 # type: (Text) -> URL

1569 """Make a new :class:`URL` with a single path segment that is a

1570 sibling of this URL path.

1571

1572 Args:

1573 segment: A single path segment.

1574

1575 Returns:

1576 URL: A copy of the current URL with the last path segment

1577 replaced by *segment*. Special characters such as

1578 ``/?#`` will be percent encoded.

1579 """

1580 _textcheck("path segment", segment)

1581 new_path = tuple(self.path)[:-1] + (_encode_path_part(segment),)

1582 return self.replace(path=new_path)

1583

1584 def click(self, href=u""):

1585 # type: (Union[Text, URL]) -> URL

1586 """Resolve the given URL relative to this URL.

1587

1588 The resulting URI should match what a web browser would

1589 generate if you visited the current URL and clicked on *href*.

1590

1591 >>> url = URL.from_text(u'http://blog.hatnote.com/')

1592 >>> url.click(u'/post/155074058790').to_text()

1593 u'http://blog.hatnote.com/post/155074058790'

1594 >>> url = URL.from_text(u'http://localhost/a/b/c/')

1595 >>> url.click(u'../d/./e').to_text()

1596 u'http://localhost/a/b/d/e'

1597

1598 Args (Text):

1599 href: A string representing a clicked URL.

1600

1601 Return:

1602 A copy of the current URL with navigation logic applied.

1603

1604 For more information, see `RFC 3986 section 5`_.

1605

1606 .. _RFC 3986 section 5: https://tools.ietf.org/html/rfc3986#section-5

1607 """

1608 if href:

1609 if isinstance(href, URL):

1610 clicked = href

1611 else:

1612 # TODO: This error message is not completely accurate,

1613 # as URL objects are now also valid, but Twisted's

1614 # test suite (wrongly) relies on this exact message.

1615 _textcheck("relative URL", href)

1616 clicked = URL.from_text(href)

1617 if clicked.absolute:

1618 return clicked

1619 else:

1620 clicked = self

1621

1622 query = clicked.query

1623 if clicked.scheme and not clicked.rooted:

1624 # Schemes with relative paths are not well-defined. RFC 3986 calls

1625 # them a "loophole in prior specifications" that should be avoided,

1626 # or supported only for backwards compatibility.

1627 raise NotImplementedError(

1628 "absolute URI with rootless path: %r" % (href,)

1629 )

1630 else:

1631 if clicked.rooted:

1632 path = clicked.path

1633 elif clicked.path:

1634 path = tuple(self.path)[:-1] + tuple(clicked.path)

1635 else:

1636 path = self.path

1637 if not query:

1638 query = self.query

1639 return self.replace(

1640 scheme=clicked.scheme or self.scheme,

1641 host=clicked.host or self.host,

1642 port=clicked.port or self.port,

1643 path=_resolve_dot_segments(path),

1644 query=query,

1645 fragment=clicked.fragment,

1646 )

1647

1648 def to_uri(self):

1649 # type: () -> URL

1650 u"""Make a new :class:`URL` instance with all non-ASCII characters

1651 appropriately percent-encoded. This is useful to do in preparation

1652 for sending a :class:`URL` over a network protocol.

1653

1654 For example::

1655

1656 >>> URL.from_text(u'https://ايران.com/foo⇧bar/').to_uri()

1657 URL.from_text(u'https://xn--mgba3a4fra.com/foo%E2%87%A7bar/')

1658

1659 Returns:

1660 URL: A new instance with its path segments, query parameters, and

1661 hostname encoded, so that they are all in the standard

1662 US-ASCII range.

1663 """

1664 new_userinfo = u":".join(

1665 [_encode_userinfo_part(p) for p in self.userinfo.split(":", 1)]

1666 )

1667 new_path = _encode_path_parts(

1668 self.path, has_scheme=bool(self.scheme), rooted=False, maximal=True

1669 )

1670 new_host = (

1671 self.host

1672 if not self.host

1673 else idna_encode(self.host, uts46=True).decode("ascii")

1674 )

1675 return self.replace(

1676 userinfo=new_userinfo,

1677 host=new_host,

1678 path=new_path,

1679 query=tuple(

1680 [

1681 (

1682 _encode_query_key(k, maximal=True),

1683 _encode_query_value(v, maximal=True)

1684 if v is not None

1685 else None,

1686 )

1687 for k, v in self.query

1688 ]

1689 ),

1690 fragment=_encode_fragment_part(self.fragment, maximal=True),

1691 )

1692

1693 def to_iri(self):

1694 # type: () -> URL

1695 u"""Make a new :class:`URL` instance with all but a few reserved

1696 characters decoded into human-readable format.

1697

1698 Percent-encoded Unicode and IDNA-encoded hostnames are

1699 decoded, like so::

1700

1701 >>> url = URL.from_text(u'https://xn--mgba3a4fra.example.com/foo%E2%87%A7bar/')

1702 >>> print(url.to_iri().to_text())

1703 https://ايران.example.com/foo⇧bar/

1704

1705 .. note::

1706

1707 As a general Python issue, "narrow" (UCS-2) builds of

1708 Python may not be able to fully decode certain URLs, and

1709 the in those cases, this method will return a best-effort,

1710 partially-decoded, URL which is still valid. This issue

1711 does not affect any Python builds 3.4+.

1712

1713 Returns:

1714 URL: A new instance with its path segments, query parameters, and

1715 hostname decoded for display purposes.

1716 """ # noqa: E501

1717 new_userinfo = u":".join(

1718 [_decode_userinfo_part(p) for p in self.userinfo.split(":", 1)]

1719 )

1720 host_text = _decode_host(self.host)

1721

1722 return self.replace(

1723 userinfo=new_userinfo,

1724 host=host_text,

1725 path=[_decode_path_part(segment) for segment in self.path],

1726 query=tuple(

1727 (

1728 _decode_query_key(k),

1729 _decode_query_value(v) if v is not None else None,

1730 )

1731 for k, v in self.query

1732 ),

1733 fragment=_decode_fragment_part(self.fragment),

1734 )

1735

1736 def to_text(self, with_password=False):

1737 # type: (bool) -> Text

1738 """Render this URL to its textual representation.

1739

1740 By default, the URL text will *not* include a password, if one

1741 is set. RFC 3986 considers using URLs to represent such

1742 sensitive information as deprecated. Quoting from RFC 3986,

1743 `section 3.2.1`:

1744

1745 "Applications should not render as clear text any data after the

1746 first colon (":") character found within a userinfo subcomponent

1747 unless the data after the colon is the empty string (indicating no

1748 password)."

1749

1750 Args (bool):

1751 with_password: Whether or not to include the password in the URL

1752 text. Defaults to False.

1753

1754 Returns:

1755 Text: The serialized textual representation of this URL, such as

1756 ``u"http://example.com/some/path?some=query"``.

1757

1758 The natural counterpart to :class:`URL.from_text()`.

1759

1760 .. _section 3.2.1: https://tools.ietf.org/html/rfc3986#section-3.2.1

1761 """

1762 scheme = self.scheme

1763 authority = self.authority(with_password)

1764 path = "/".join(

1765 _encode_path_parts(

1766 self.path,

1767 rooted=self.rooted,

1768 has_scheme=bool(scheme),

1769 has_authority=bool(authority),

1770 maximal=False,

1771 )

1772 )

1773 query_parts = []

1774 for k, v in self.query:

1775 if v is None:

1776 query_parts.append(_encode_query_key(k, maximal=False))

1777 else:

1778 query_parts.append(

1779 u"=".join(

1780 (

1781 _encode_query_key(k, maximal=False),

1782 _encode_query_value(v, maximal=False),

1783 )

1784 )

1785 )

1786 query_string = u"&".join(query_parts)

1787

1788 fragment = self.fragment

1789

1790 parts = [] # type: List[Text]

1791 _add = parts.append

1792 if scheme:

1793 _add(scheme)

1794 _add(":")

1795 if authority:

1796 _add("//")

1797 _add(authority)

1798 elif scheme and path[:2] != "//" and self.uses_netloc:

1799 _add("//")

1800 if path:

1801 if scheme and authority and path[:1] != "/":

1802 _add("/") # relpaths with abs authorities auto get '/'

1803 _add(path)

1804 if query_string:

1805 _add("?")

1806 _add(query_string)

1807 if fragment:

1808 _add("#")

1809 _add(fragment)

1810 return u"".join(parts)

1811

1812 def __repr__(self):

1813 # type: () -> str

1814 """Convert this URL to an representation that shows all of its

1815 constituent parts, as well as being a valid argument to

1816 :func:`eval`.

1817 """

1818 return "%s.from_text(%r)" % (self.__class__.__name__, self.to_text())

1819

1820 def _to_bytes(self):

1821 # type: () -> bytes

1822 """

1823 Allows for direct usage of URL objects with libraries like

1824 requests, which automatically stringify URL parameters. See

1825 issue #49.

1826 """

1827 return self.to_uri().to_text().encode("ascii")

1828

1829 if PY2:

1830 __str__ = _to_bytes

1831 __unicode__ = to_text

1832 else:

1833 __bytes__ = _to_bytes

1834 __str__ = to_text

1835

1836 # # Begin Twisted Compat Code

1837 asURI = to_uri

1838 asIRI = to_iri

1839

1840 @classmethod

1841 def fromText(cls, s):

1842 # type: (Text) -> URL

1843 return cls.from_text(s)

1844

1845 def asText(self, includeSecrets=False):

1846 # type: (bool) -> Text

1847 return self.to_text(with_password=includeSecrets)

1848

1849 def __dir__(self):

1850 # type: () -> Sequence[Text]

1851 try:

1852 ret = object.__dir__(self)

1853 except AttributeError:

1854 # object.__dir__ == AttributeError # pdw for py2

1855 ret = dir(self.__class__) + list(self.__dict__.keys())

1856 ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"]))

1857 return ret

1858

1859 # # End Twisted Compat Code

1860

1861 def add(self, name, value=None):

1862 # type: (Text, Optional[Text]) -> URL

1863 """Make a new :class:`URL` instance with a given query argument,

1864 *name*, added to it with the value *value*, like so::

1865

1866 >>> URL.from_text(u'https://example.com/?x=y').add(u'x')

1867 URL.from_text(u'https://example.com/?x=y&x')

1868 >>> URL.from_text(u'https://example.com/?x=y').add(u'x', u'z')

1869 URL.from_text(u'https://example.com/?x=y&x=z')

1870

1871 Args:

1872 name: The name of the query parameter to add.

1873 The part before the ``=``.

1874 value: The value of the query parameter to add.

1875 The part after the ``=``.

1876 Defaults to ``None``, meaning no value.

1877

1878 Returns:

1879 URL: A new :class:`URL` instance with the parameter added.

1880 """

1881 return self.replace(query=self.query + ((name, value),))

1882

1883 def set(self, name, value=None):

1884 # type: (Text, Optional[Text]) -> URL

1885 """Make a new :class:`URL` instance with the query parameter *name*

1886 set to *value*. All existing occurences, if any are replaced

1887 by the single name-value pair.

1888

1889 >>> URL.from_text(u'https://example.com/?x=y').set(u'x')

1890 URL.from_text(u'https://example.com/?x')

1891 >>> URL.from_text(u'https://example.com/?x=y').set(u'x', u'z')

1892 URL.from_text(u'https://example.com/?x=z')

1893

1894 Args:

1895 name: The name of the query parameter to set.

1896 The part before the ``=``.

1897 value: The value of the query parameter to set.

1898 The part after the ``=``.

1899 Defaults to ``None``, meaning no value.

1900

1901 Returns:

1902 URL: A new :class:`URL` instance with the parameter set.

1903 """

1904 # Preserve the original position of the query key in the list

1905 q = [(k, v) for (k, v) in self.query if k != name]

1906 idx = next(

1907 (i for (i, (k, v)) in enumerate(self.query) if k == name), -1

1908 )

1909 q[idx:idx] = [(name, value)]

1910 return self.replace(query=q)

1911

1912 def get(self, name):

1913 # type: (Text) -> List[Optional[Text]]

1914 """Get a list of values for the given query parameter, *name*::

1915

1916 >>> url = URL.from_text(u'?x=1&x=2')

1917 >>> url.get('x')

1918 [u'1', u'2']

1919 >>> url.get('y')

1920 []

1921

1922 If the given *name* is not set, an empty list is returned. A

1923 list is always returned, and this method raises no exceptions.

1924

1925 Args:

1926 name: The name of the query parameter to get.

1927

1928 Returns:

1929 List[Optional[Text]]: A list of all the values associated with the

1930 key, in string form.

1931 """

1932 return [value for (key, value) in self.query if name == key]

1933

1934 def remove(

1935 self,

1936 name, # type: Text

1937 value=_UNSET, # type: Text

1938 limit=None, # type: Optional[int]

1939 ):

1940 # type: (...) -> URL

1941 """Make a new :class:`URL` instance with occurrences of the query

1942 parameter *name* removed, or, if *value* is set, parameters

1943 matching *name* and *value*. No exception is raised if the

1944 parameter is not already set.

1945

1946 Args:

1947 name: The name of the query parameter to remove.

1948 value: Optional value to additionally filter on.

1949 Setting this removes query parameters which match both name

1950 and value.

1951 limit: Optional maximum number of parameters to remove.

1952

1953 Returns:

1954 URL: A new :class:`URL` instance with the parameter removed.

1955 """

1956 if limit is None:

1957 if value is _UNSET:

1958 nq = [(k, v) for (k, v) in self.query if k != name]

1959 else:

1960 nq = [

1961 (k, v)

1962 for (k, v) in self.query

1963 if not (k == name and v == value)

1964 ]

1965 else:

1966 nq, removed_count = [], 0

1967

1968 for k, v in self.query:

1969 if (

1970 k == name

1971 and (value is _UNSET or v == value)

1972 and removed_count < limit

1973 ):

1974 removed_count += 1 # drop it

1975 else:

1976 nq.append((k, v)) # keep it

1977

1978 return self.replace(query=nq)

1979

1980

1981EncodedURL = URL # An alias better describing what the URL really is

1982

1983_EMPTY_URL = URL()

1984

1985

1986def _replace_plus(text):

1987 # type: (Text) -> Text

1988 return text.replace("+", "%20")

1989

1990

1991def _no_op(text):

1992 # type: (Text) -> Text

1993 return text

1994

1995

1996class DecodedURL(object):

1997 """

1998 :class:`DecodedURL` is a type designed to act as a higher-level

1999 interface to :class:`URL` and the recommended type for most

2000 operations. By analogy, :class:`DecodedURL` is the

2001 :class:`unicode` to URL's :class:`bytes`.

2002

2003 :class:`DecodedURL` automatically handles encoding and decoding

2004 all its components, such that all inputs and outputs are in a

2005 maximally-decoded state. Note that this means, for some special

2006 cases, a URL may not "roundtrip" character-for-character, but this

2007 is considered a good tradeoff for the safety of automatic

2008 encoding.

2009

2010 Otherwise, :class:`DecodedURL` has almost exactly the same API as

2011 :class:`URL`.

2012

2013 Where applicable, a UTF-8 encoding is presumed. Be advised that

2014 some interactions can raise :exc:`UnicodeEncodeErrors` and

2015 :exc:`UnicodeDecodeErrors`, just like when working with

2016 bytestrings. Examples of such interactions include handling query

2017 strings encoding binary data, and paths containing segments with

2018 special characters encoded with codecs other than UTF-8.

2019

2020 Args:

2021 url: A :class:`URL` object to wrap.

2022 lazy: Set to True to avoid pre-decode all parts of the URL to check for

2023 validity.

2024 Defaults to False.

2025 query_plus_is_space: + characters in the query string should be treated

2026 as spaces when decoding. If unspecified, the default is taken from

2027 the scheme.

2028

2029 .. note::

2030

2031 The :class:`DecodedURL` initializer takes a :class:`URL` object,

2032 not URL components, like :class:`URL`. To programmatically

2033 construct a :class:`DecodedURL`, you can use this pattern:

2034

2035 >>> print(DecodedURL().replace(scheme=u'https',

2036 ... host=u'pypi.org', path=(u'projects', u'hyperlink')).to_text())

2037 https://pypi.org/projects/hyperlink

2038

2039 .. versionadded:: 18.0.0

2040 """

2041

2042 def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None):

2043 # type: (URL, bool, Optional[bool]) -> None

2044 self._url = url

2045 if query_plus_is_space is None:

2046 query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES

2047 self._query_plus_is_space = query_plus_is_space

2048 if not lazy:

2049 # cache the following, while triggering any decoding

2050 # issues with decodable fields

2051 self.host, self.userinfo, self.path, self.query, self.fragment

2052 return

2053

2054 @classmethod

2055 def from_text(cls, text, lazy=False, query_plus_is_space=None):

2056 # type: (Text, bool, Optional[bool]) -> DecodedURL

2057 """\

2058 Make a `DecodedURL` instance from any text string containing a URL.

2059

2060 Args:

2061 text: Text containing the URL

2062 lazy: Whether to pre-decode all parts of the URL to check for

2063 validity.

2064 Defaults to True.

2065 """

2066 _url = URL.from_text(text)

2067 return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space)

2068

2069 @property

2070 def encoded_url(self):

2071 # type: () -> URL

2072 """Access the underlying :class:`URL` object, which has any special

2073 characters encoded.

2074 """

2075 return self._url

2076

2077 def to_text(self, with_password=False):

2078 # type: (bool) -> Text

2079 "Passthrough to :meth:`~hyperlink.URL.to_text()`"

2080 return self._url.to_text(with_password)

2081

2082 def to_uri(self):

2083 # type: () -> URL

2084 "Passthrough to :meth:`~hyperlink.URL.to_uri()`"

2085 return self._url.to_uri()

2086

2087 def to_iri(self):

2088 # type: () -> URL

2089 "Passthrough to :meth:`~hyperlink.URL.to_iri()`"

2090 return self._url.to_iri()

2091

2092 def _clone(self, url):

2093 # type: (URL) -> DecodedURL

2094 return self.__class__(

2095 url,

2096 # TODO: propagate laziness?

2097 query_plus_is_space=self._query_plus_is_space,

2098 )

2099

2100 def click(self, href=u""):

2101 # type: (Union[Text, URL, DecodedURL]) -> DecodedURL

2102 """Return a new DecodedURL wrapping the result of

2103 :meth:`~hyperlink.URL.click()`

2104 """

2105 if isinstance(href, DecodedURL):

2106 href = href._url

2107 return self._clone(

2108 self._url.click(href=href),

2109 )

2110

2111 def sibling(self, segment):

2112 # type: (Text) -> DecodedURL

2113 """Automatically encode any reserved characters in *segment* and

2114 return a new `DecodedURL` wrapping the result of

2115 :meth:`~hyperlink.URL.sibling()`

2116 """

2117 return self._clone(

2118 self._url.sibling(_encode_reserved(segment)),

2119 )

2120

2121 def child(self, *segments):

2122 # type: (Text) -> DecodedURL

2123 """Automatically encode any reserved characters in *segments* and

2124 return a new `DecodedURL` wrapping the result of

2125 :meth:`~hyperlink.URL.child()`.

2126 """

2127 if not segments:

2128 return self

2129 new_segs = [_encode_reserved(s) for s in segments]

2130 return self._clone(self._url.child(*new_segs))

2131

2132 def normalize(

2133 self,

2134 scheme=True,

2135 host=True,

2136 path=True,

2137 query=True,

2138 fragment=True,

2139 userinfo=True,

2140 percents=True,

2141 ):

2142 # type: (bool, bool, bool, bool, bool, bool, bool) -> DecodedURL

2143 """Return a new `DecodedURL` wrapping the result of

2144 :meth:`~hyperlink.URL.normalize()`

2145 """

2146 return self._clone(

2147 self._url.normalize(

2148 scheme, host, path, query, fragment, userinfo, percents

2149 )

2150 )

2151

2152 @property

2153 def absolute(self):

2154 # type: () -> bool

2155 return self._url.absolute

2156

2157 @property

2158 def scheme(self):

2159 # type: () -> Text

2160 return self._url.scheme

2161

2162 @property

2163 def host(self):

2164 # type: () -> Text

2165 return _decode_host(self._url.host)

2166

2167 @property

2168 def port(self):

2169 # type: () -> Optional[int]

2170 return self._url.port

2171

2172 @property

2173 def rooted(self):

2174 # type: () -> bool

2175 return self._url.rooted

2176

2177 @property

2178 def path(self):

2179 # type: () -> Sequence[Text]

2180 if not hasattr(self, "_path"):

2181 self._path = tuple(

2182 [

2183 _percent_decode(p, raise_subencoding_exc=True)

2184 for p in self._url.path

2185 ]

2186 )

2187 return self._path

2188

2189 @property

2190 def query(self):

2191 # type: () -> QueryPairs

2192 if not hasattr(self, "_query"):

2193 if self._query_plus_is_space:

2194 predecode = _replace_plus

2195 else:

2196 predecode = _no_op

2197

2198 self._query = cast(

2199 QueryPairs,

2200 tuple(

2201 tuple(

2202 _percent_decode(

2203 predecode(x), raise_subencoding_exc=True

2204 )

2205 if x is not None

2206 else None

2207 for x in (k, v)

2208 )

2209 for k, v in self._url.query

2210 ),

2211 )

2212 return self._query

2213

2214 @property

2215 def fragment(self):

2216 # type: () -> Text

2217 if not hasattr(self, "_fragment"):

2218 frag = self._url.fragment

2219 self._fragment = _percent_decode(frag, raise_subencoding_exc=True)

2220 return self._fragment

2221

2222 @property

2223 def userinfo(self):

2224 # type: () -> Union[Tuple[str], Tuple[str, str]]

2225 if not hasattr(self, "_userinfo"):

2226 self._userinfo = cast(

2227 Union[Tuple[str], Tuple[str, str]],

2228 tuple(

2229 tuple(

2230 _percent_decode(p, raise_subencoding_exc=True)

2231 for p in self._url.userinfo.split(":", 1)

2232 )

2233 ),

2234 )

2235 return self._userinfo

2236

2237 @property

2238 def user(self):

2239 # type: () -> Text

2240 return self.userinfo[0]

2241

2242 @property

2243 def uses_netloc(self):

2244 # type: () -> Optional[bool]

2245 return self._url.uses_netloc

2246

2247 def replace(

2248 self,

2249 scheme=_UNSET, # type: Optional[Text]

2250 host=_UNSET, # type: Optional[Text]

2251 path=_UNSET, # type: Iterable[Text]

2252 query=_UNSET, # type: QueryParameters

2253 fragment=_UNSET, # type: Text

2254 port=_UNSET, # type: Optional[int]

2255 rooted=_UNSET, # type: Optional[bool]

2256 userinfo=_UNSET, # type: Union[Tuple[str], Tuple[str, str]]

2257 uses_netloc=_UNSET, # type: Optional[bool]

2258 ):

2259 # type: (...) -> DecodedURL

2260 """While the signature is the same, this `replace()` differs a little

2261 from URL.replace. For instance, it accepts userinfo as a

2262 tuple, not as a string, handling the case of having a username

2263 containing a `:`. As with the rest of the methods on

2264 DecodedURL, if you pass a reserved character, it will be

2265 automatically encoded instead of an error being raised.

2266 """

2267 if path is not _UNSET:

2268 path = tuple(_encode_reserved(p) for p in path)

2269 if query is not _UNSET:

2270 query = cast(

2271 QueryPairs,

2272 tuple(

2273 tuple(

2274 _encode_reserved(x) if x is not None else None

2275 for x in (k, v)

2276 )

2277 for k, v in iter_pairs(query)

2278 ),

2279 )

2280 if userinfo is not _UNSET:

2281 if len(userinfo) > 2:

2282 raise ValueError(

2283 'userinfo expected sequence of ["user"] or'

2284 ' ["user", "password"], got %r' % (userinfo,)

2285 )

2286 userinfo_text = u":".join([_encode_reserved(p) for p in userinfo])

2287 else:

2288 userinfo_text = _UNSET

2289 new_url = self._url.replace(

2290 scheme=scheme,

2291 host=host,

2292 path=path,

2293 query=query,

2294 fragment=fragment,

2295 port=port,

2296 rooted=rooted,

2297 userinfo=userinfo_text,

2298 uses_netloc=uses_netloc,

2299 )

2300 return self._clone(url=new_url)

2301

2302 def get(self, name):

2303 # type: (Text) -> List[Optional[Text]]

2304 "Get the value of all query parameters whose name matches *name*"

2305 return [v for (k, v) in self.query if name == k]

2306

2307 def add(self, name, value=None):

2308 # type: (Text, Optional[Text]) -> DecodedURL

2309 """Return a new DecodedURL with the query parameter *name* and *value*

2310 added."""

2311 return self.replace(query=self.query + ((name, value),))

2312

2313 def set(self, name, value=None):

2314 # type: (Text, Optional[Text]) -> DecodedURL

2315 "Return a new DecodedURL with query parameter *name* set to *value*"

2316 query = self.query

2317 q = [(k, v) for (k, v) in query if k != name]

2318 idx = next((i for (i, (k, v)) in enumerate(query) if k == name), -1)

2319 q[idx:idx] = [(name, value)]

2320 return self.replace(query=q)

2321

2322 def remove(

2323 self,

2324 name, # type: Text

2325 value=_UNSET, # type: Text

2326 limit=None, # type: Optional[int]

2327 ):

2328 # type: (...) -> DecodedURL

2329 """Return a new DecodedURL with query parameter *name* removed.

2330

2331 Optionally also filter for *value*, as well as cap the number

2332 of parameters removed with *limit*.

2333 """

2334 if limit is None:

2335 if value is _UNSET:

2336 nq = [(k, v) for (k, v) in self.query if k != name]

2337 else:

2338 nq = [

2339 (k, v)

2340 for (k, v) in self.query

2341 if not (k == name and v == value)

2342 ]

2343 else:

2344 nq, removed_count = [], 0

2345 for k, v in self.query:

2346 if (

2347 k == name

2348 and (value is _UNSET or v == value)

2349 and removed_count < limit

2350 ):

2351 removed_count += 1 # drop it

2352 else:

2353 nq.append((k, v)) # keep it

2354

2355 return self.replace(query=nq)

2356

2357 def __repr__(self):

2358 # type: () -> str

2359 cn = self.__class__.__name__

2360 return "%s(url=%r)" % (cn, self._url)

2361

2362 def __str__(self):

2363 # type: () -> str

2364 # TODO: the underlying URL's __str__ needs to change to make

2365 # this work as the URL, see #55

2366 return str(self._url)

2367

2368 def __eq__(self, other):

2369 # type: (Any) -> bool

2370 if not isinstance(other, self.__class__):

2371 return NotImplemented

2372 return self.normalize().to_uri() == other.normalize().to_uri()

2373

2374 def __ne__(self, other):

2375 # type: (Any) -> bool

2376 if not isinstance(other, self.__class__):

2377 return NotImplemented

2378 return not self.__eq__(other)

2379

2380 def __hash__(self):

2381 # type: () -> int

2382 return hash(

2383 (

2384 self.__class__,

2385 self.scheme,

2386 self.userinfo,

2387 self.host,

2388 self.path,

2389 self.query,

2390 self.fragment,

2391 self.port,

2392 self.rooted,

2393 self.uses_netloc,

2394 )

2395 )

2396

2397 # # Begin Twisted Compat Code

2398 asURI = to_uri

2399 asIRI = to_iri

2400

2401 @classmethod

2402 def fromText(cls, s, lazy=False):

2403 # type: (Text, bool) -> DecodedURL

2404 return cls.from_text(s, lazy=lazy)

2405

2406 def asText(self, includeSecrets=False):

2407 # type: (bool) -> Text

2408 return self.to_text(with_password=includeSecrets)

2409

2410 def __dir__(self):

2411 # type: () -> Sequence[Text]

2412 try:

2413 ret = object.__dir__(self)

2414 except AttributeError:

2415 # object.__dir__ == AttributeError # pdw for py2

2416 ret = dir(self.__class__) + list(self.__dict__.keys())

2417 ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"]))

2418 return ret

2419

2420 # # End Twisted Compat Code

2421

2422

2423# Add some overloads so that parse gives a better return value.

2424@overload

2425def parse(url, decoded, lazy=False):

2426 # type: (Text, Literal[False], bool) -> URL

2427 """Passing decoded=False returns URL."""

2428

2429

2430@overload

2431def parse(url, decoded=True, lazy=False):

2432 # type: (Text, Literal[True], bool) -> DecodedURL

2433 """Passing decoded=True (or the default value) returns DecodedURL."""

2434

2435

2436@overload

2437def parse(url, decoded=True, lazy=False):

2438 # type: (Text, bool, bool) -> Union[URL, DecodedURL]

2439 """If decoded is not a literal we don't know the return type."""

2440

2441

2442def parse(url, decoded=True, lazy=False):

2443 # type: (Text, bool, bool) -> Union[URL, DecodedURL]

2444 """

2445 Automatically turn text into a structured URL object.

2446

2447 >>> url = parse(u"https://github.com/python-hyper/hyperlink")

2448 >>> print(url.to_text())

2449 https://github.com/python-hyper/hyperlink

2450

2451 Args:

2452 url: A text string representation of a URL.

2453

2454 decoded: Whether or not to return a :class:`DecodedURL`,

2455 which automatically handles all

2456 encoding/decoding/quoting/unquoting for all the various

2457 accessors of parts of the URL, or a :class:`URL`,

2458 which has the same API, but requires handling of special

2459 characters for different parts of the URL.

2460

2461 lazy: In the case of `decoded=True`, this controls

2462 whether the URL is decoded immediately or as accessed. The

2463 default, `lazy=False`, checks all encoded parts of the URL

2464 for decodability.

2465

2466 .. versionadded:: 18.0.0

2467 """

2468 enc_url = EncodedURL.from_text(url)

2469 if not decoded:

2470 return enc_url

2471 dec_url = DecodedURL(enc_url, lazy=lazy)

2472 return dec_url

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hyperlink/_url.py: 54%

739 statements