Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hyperlink/_url.py: 54%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

739 statements  

1# -*- coding: utf-8 -*- 

2u"""Hyperlink provides Pythonic URL parsing, construction, and rendering. 

3 

4Usage is straightforward:: 

5 

6 >>> import hyperlink 

7 >>> url = hyperlink.parse(u'http://github.com/mahmoud/hyperlink?utm_source=docs') 

8 >>> url.host 

9 u'github.com' 

10 >>> secure_url = url.replace(scheme=u'https') 

11 >>> secure_url.get('utm_source')[0] 

12 u'docs' 

13 

14Hyperlink's API centers on the :class:`DecodedURL` type, which wraps 

15the lower-level :class:`URL`, both of which can be returned by the 

16:func:`parse()` convenience function. 

17 

18""" # noqa: E501 

19 

20import re 

21import sys 

22import string 

23import socket 

24from socket import AF_INET, AF_INET6 

25 

26try: 

27 from socket import AddressFamily 

28except ImportError: 

29 AddressFamily = int # type: ignore[assignment,misc] 

30from typing import ( 

31 Any, 

32 Callable, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Sequence, 

40 Text, 

41 Tuple, 

42 Type, 

43 TypeVar, 

44 Union, 

45 cast, 

46 TYPE_CHECKING, 

47 overload, 

48) 

49from unicodedata import normalize 

50from ._socket import inet_pton 

51 

52try: 

53 from collections.abc import Mapping as MappingABC 

54except ImportError: # Python 2 

55 from collections import Mapping as MappingABC 

56 

57from idna import encode as idna_encode, decode as idna_decode 

58 

59 

60PY2 = sys.version_info[0] == 2 

61try: 

62 unichr 

63except NameError: # Py3 

64 unichr = chr # type: Callable[[int], Text] 

65NoneType = type(None) # type: Type[None] 

66QueryPairs = Tuple[Tuple[Text, Optional[Text]], ...] # internal representation 

67QueryParameters = Union[ 

68 Mapping[Text, Optional[Text]], 

69 QueryPairs, 

70 Iterable[Tuple[Text, Optional[Text]]], 

71] 

72T = TypeVar("T") 

73# Literal is not available in all pythons so we only bring it in for mypy. 

74if TYPE_CHECKING: 

75 from typing import Literal 

76 

77 

78# from boltons.typeutils 

79def make_sentinel(name="_MISSING", var_name=""): 

80 # type: (str, str) -> object 

81 """Creates and returns a new **instance** of a new class, suitable for 

82 usage as a "sentinel", a kind of singleton often used to indicate 

83 a value is missing when ``None`` is a valid input. 

84 

85 Args: 

86 name: Name of the Sentinel 

87 var_name: Set this name to the name of the variable in its respective 

88 module enable pickle-ability. 

89 

90 >>> make_sentinel(var_name='_MISSING') 

91 _MISSING 

92 

93 The most common use cases here in boltons are as default values 

94 for optional function arguments, partly because of its 

95 less-confusing appearance in automatically generated 

96 documentation. Sentinels also function well as placeholders in queues 

97 and linked lists. 

98 

99 .. note:: 

100 

101 By design, additional calls to ``make_sentinel`` with the same 

102 values will not produce equivalent objects. 

103 

104 >>> make_sentinel('TEST') == make_sentinel('TEST') 

105 False 

106 >>> type(make_sentinel('TEST')) == type(make_sentinel('TEST')) 

107 False 

108 """ 

109 

110 class Sentinel(object): 

111 def __init__(self): 

112 # type: () -> None 

113 self.name = name 

114 self.var_name = var_name 

115 

116 def __repr__(self): 

117 # type: () -> str 

118 if self.var_name: 

119 return self.var_name 

120 return "%s(%r)" % (self.__class__.__name__, self.name) 

121 

122 if var_name: 

123 # superclass type hints don't allow str return type, but it is 

124 # allowed in the docs, hence the ignore[override] below 

125 def __reduce__(self): 

126 # type: () -> str 

127 return self.var_name 

128 

129 def __nonzero__(self): 

130 # type: () -> bool 

131 return False 

132 

133 __bool__ = __nonzero__ 

134 

135 return Sentinel() 

136 

137 

138_unspecified = _UNSET = make_sentinel("_UNSET") # type: Any 

139 

140 

141# RFC 3986 Section 2.3, Unreserved URI Characters 

142# https://tools.ietf.org/html/rfc3986#section-2.3 

143_UNRESERVED_CHARS = frozenset( 

144 "~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" 

145) 

146 

147 

148# URL parsing regex (based on RFC 3986 Appendix B, with modifications) 

149_URL_RE = re.compile( 

150 r"^((?P<scheme>[^:/?#]+):)?" 

151 r"((?P<_netloc_sep>//)" 

152 r"(?P<authority>[^/?#]*))?" 

153 r"(?P<path>[^?#]*)" 

154 r"(\?(?P<query>[^#]*))?" 

155 r"(#(?P<fragment>.*))?$" 

156) 

157_SCHEME_RE = re.compile(r"^[a-zA-Z0-9+-.]*$") 

158_AUTHORITY_RE = re.compile( 

159 r"^(?:(?P<userinfo>[^@/?#]*)@)?" 

160 r"(?P<host>" 

161 r"(?:\[(?P<ipv6_host>[^[\]/?#]*)\])" 

162 r"|(?P<plain_host>[^:/?#[\]]*)" 

163 r"|(?P<bad_host>.*?))?" 

164 r"(?::(?P<port>.*))?$" 

165) 

166 

167 

168_HEX_CHAR_MAP = dict( 

169 [ 

170 ((a + b).encode("ascii"), unichr(int(a + b, 16)).encode("charmap")) 

171 for a in string.hexdigits 

172 for b in string.hexdigits 

173 ] 

174) 

175_ASCII_RE = re.compile("([\x00-\x7f]+)") 

176 

177# RFC 3986 section 2.2, Reserved Characters 

178# https://tools.ietf.org/html/rfc3986#section-2.2 

179_GEN_DELIMS = frozenset(u":/?#[]@") 

180_SUB_DELIMS = frozenset(u"!$&'()*+,;=") 

181_ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS 

182 

183_USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u"%") 

184_USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE 

185_PATH_SAFE = _USERINFO_SAFE | set(u":@") 

186_PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE 

187_SCHEMELESS_PATH_SAFE = _PATH_SAFE - set(":") 

188_SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE 

189_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u"/?") 

190_FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE 

191_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&") 

192_QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE 

193_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u"=") 

194_QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE 

195 

196 

197def _make_decode_map(delims, allow_percent=False): 

198 # type: (Iterable[Text], bool) -> Mapping[bytes, bytes] 

199 ret = dict(_HEX_CHAR_MAP) 

200 if not allow_percent: 

201 delims = set(delims) | set([u"%"]) 

202 for delim in delims: 

203 _hexord = "{0:02X}".format(ord(delim)).encode("ascii") 

204 _hexord_lower = _hexord.lower() 

205 ret.pop(_hexord) 

206 if _hexord != _hexord_lower: 

207 ret.pop(_hexord_lower) 

208 return ret 

209 

210 

211def _make_quote_map(safe_chars): 

212 # type: (Iterable[Text]) -> Mapping[Union[int, Text], Text] 

213 ret = {} # type: Dict[Union[int, Text], Text] 

214 # v is included in the dict for py3 mostly, because bytestrings 

215 # are iterables of ints, of course! 

216 for i, v in zip(range(256), range(256)): 

217 c = chr(v) 

218 if c in safe_chars: 

219 ret[c] = ret[v] = c 

220 else: 

221 ret[c] = ret[v] = "%{0:02X}".format(i) 

222 return ret 

223 

224 

225_USERINFO_PART_QUOTE_MAP = _make_quote_map(_USERINFO_SAFE) 

226_USERINFO_DECODE_MAP = _make_decode_map(_USERINFO_DELIMS) 

227_PATH_PART_QUOTE_MAP = _make_quote_map(_PATH_SAFE) 

228_SCHEMELESS_PATH_PART_QUOTE_MAP = _make_quote_map(_SCHEMELESS_PATH_SAFE) 

229_PATH_DECODE_MAP = _make_decode_map(_PATH_DELIMS) 

230_QUERY_KEY_QUOTE_MAP = _make_quote_map(_QUERY_KEY_SAFE) 

231_QUERY_KEY_DECODE_MAP = _make_decode_map(_QUERY_KEY_DELIMS) 

232_QUERY_VALUE_QUOTE_MAP = _make_quote_map(_QUERY_VALUE_SAFE) 

233_QUERY_VALUE_DECODE_MAP = _make_decode_map(_QUERY_VALUE_DELIMS | set("+")) 

234_FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE) 

235_FRAGMENT_DECODE_MAP = _make_decode_map(_FRAGMENT_DELIMS) 

236_UNRESERVED_QUOTE_MAP = _make_quote_map(_UNRESERVED_CHARS) 

237_UNRESERVED_DECODE_MAP = dict( 

238 [ 

239 (k, v) 

240 for k, v in _HEX_CHAR_MAP.items() 

241 if v.decode("ascii", "replace") in _UNRESERVED_CHARS 

242 ] 

243) 

244 

245_ROOT_PATHS = frozenset(((), (u"",))) 

246 

247 

248def _encode_reserved(text, maximal=True): 

249 # type: (Text, bool) -> Text 

250 """A very comprehensive percent encoding for encoding all 

251 delimiters. Used for arguments to DecodedURL, where a % means a 

252 percent sign, and not the character used by URLs for escaping 

253 bytes. 

254 """ 

255 if maximal: 

256 bytestr = normalize("NFC", text).encode("utf8") 

257 return u"".join([_UNRESERVED_QUOTE_MAP[b] for b in bytestr]) 

258 return u"".join( 

259 [ 

260 _UNRESERVED_QUOTE_MAP[t] if t in _UNRESERVED_CHARS else t 

261 for t in text 

262 ] 

263 ) 

264 

265 

266def _encode_path_part(text, maximal=True): 

267 # type: (Text, bool) -> Text 

268 "Percent-encode a single segment of a URL path." 

269 if maximal: 

270 bytestr = normalize("NFC", text).encode("utf8") 

271 return u"".join([_PATH_PART_QUOTE_MAP[b] for b in bytestr]) 

272 return u"".join( 

273 [_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t for t in text] 

274 ) 

275 

276 

277def _encode_schemeless_path_part(text, maximal=True): 

278 # type: (Text, bool) -> Text 

279 """Percent-encode the first segment of a URL path for a URL without a 

280 scheme specified. 

281 """ 

282 if maximal: 

283 bytestr = normalize("NFC", text).encode("utf8") 

284 return u"".join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr]) 

285 return u"".join( 

286 [ 

287 _SCHEMELESS_PATH_PART_QUOTE_MAP[t] 

288 if t in _SCHEMELESS_PATH_DELIMS 

289 else t 

290 for t in text 

291 ] 

292 ) 

293 

294 

295def _encode_path_parts( 

296 text_parts, # type: Sequence[Text] 

297 rooted=False, # type: bool 

298 has_scheme=True, # type: bool 

299 has_authority=True, # type: bool 

300 maximal=True, # type: bool 

301): 

302 # type: (...) -> Sequence[Text] 

303 """ 

304 Percent-encode a tuple of path parts into a complete path. 

305 

306 Setting *maximal* to False percent-encodes only the reserved 

307 characters that are syntactically necessary for serialization, 

308 preserving any IRI-style textual data. 

309 

310 Leaving *maximal* set to its default True percent-encodes 

311 everything required to convert a portion of an IRI to a portion of 

312 a URI. 

313 

314 RFC 3986 3.3: 

315 

316 If a URI contains an authority component, then the path component 

317 must either be empty or begin with a slash ("/") character. If a URI 

318 does not contain an authority component, then the path cannot begin 

319 with two slash characters ("//"). In addition, a URI reference 

320 (Section 4.1) may be a relative-path reference, in which case the 

321 first path segment cannot contain a colon (":") character. 

322 """ 

323 if not text_parts: 

324 return () 

325 if rooted: 

326 text_parts = (u"",) + tuple(text_parts) 

327 # elif has_authority and text_parts: 

328 # raise Exception('see rfc above') # TODO: too late to fail like this? 

329 encoded_parts = [] # type: List[Text] 

330 if has_scheme: 

331 encoded_parts = [ 

332 _encode_path_part(part, maximal=maximal) if part else part 

333 for part in text_parts 

334 ] 

335 else: 

336 encoded_parts = [_encode_schemeless_path_part(text_parts[0])] 

337 encoded_parts.extend( 

338 [ 

339 _encode_path_part(part, maximal=maximal) if part else part 

340 for part in text_parts[1:] 

341 ] 

342 ) 

343 return tuple(encoded_parts) 

344 

345 

346def _encode_query_key(text, maximal=True): 

347 # type: (Text, bool) -> Text 

348 """ 

349 Percent-encode a single query string key or value. 

350 """ 

351 if maximal: 

352 bytestr = normalize("NFC", text).encode("utf8") 

353 return u"".join([_QUERY_KEY_QUOTE_MAP[b] for b in bytestr]) 

354 return u"".join( 

355 [_QUERY_KEY_QUOTE_MAP[t] if t in _QUERY_KEY_DELIMS else t for t in text] 

356 ) 

357 

358 

359def _encode_query_value(text, maximal=True): 

360 # type: (Text, bool) -> Text 

361 """ 

362 Percent-encode a single query string key or value. 

363 """ 

364 if maximal: 

365 bytestr = normalize("NFC", text).encode("utf8") 

366 return u"".join([_QUERY_VALUE_QUOTE_MAP[b] for b in bytestr]) 

367 return u"".join( 

368 [ 

369 _QUERY_VALUE_QUOTE_MAP[t] if t in _QUERY_VALUE_DELIMS else t 

370 for t in text 

371 ] 

372 ) 

373 

374 

375def _encode_fragment_part(text, maximal=True): 

376 # type: (Text, bool) -> Text 

377 """Quote the fragment part of the URL. Fragments don't have 

378 subdelimiters, so the whole URL fragment can be passed. 

379 """ 

380 if maximal: 

381 bytestr = normalize("NFC", text).encode("utf8") 

382 return u"".join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr]) 

383 return u"".join( 

384 [_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t for t in text] 

385 ) 

386 

387 

388def _encode_userinfo_part(text, maximal=True): 

389 # type: (Text, bool) -> Text 

390 """Quote special characters in either the username or password 

391 section of the URL. 

392 """ 

393 if maximal: 

394 bytestr = normalize("NFC", text).encode("utf8") 

395 return u"".join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr]) 

396 return u"".join( 

397 [ 

398 _USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS else t 

399 for t in text 

400 ] 

401 ) 

402 

403 

404# This port list painstakingly curated by hand searching through 

405# https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml 

406# and 

407# https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml 

408SCHEME_PORT_MAP = { 

409 "acap": 674, 

410 "afp": 548, 

411 "dict": 2628, 

412 "dns": 53, 

413 "file": None, 

414 "ftp": 21, 

415 "git": 9418, 

416 "gopher": 70, 

417 "http": 80, 

418 "https": 443, 

419 "imap": 143, 

420 "ipp": 631, 

421 "ipps": 631, 

422 "irc": 194, 

423 "ircs": 6697, 

424 "ldap": 389, 

425 "ldaps": 636, 

426 "mms": 1755, 

427 "msrp": 2855, 

428 "msrps": None, 

429 "mtqp": 1038, 

430 "nfs": 111, 

431 "nntp": 119, 

432 "nntps": 563, 

433 "pop": 110, 

434 "prospero": 1525, 

435 "redis": 6379, 

436 "rsync": 873, 

437 "rtsp": 554, 

438 "rtsps": 322, 

439 "rtspu": 5005, 

440 "sftp": 22, 

441 "smb": 445, 

442 "snmp": 161, 

443 "ssh": 22, 

444 "steam": None, 

445 "svn": 3690, 

446 "telnet": 23, 

447 "ventrilo": 3784, 

448 "vnc": 5900, 

449 "wais": 210, 

450 "ws": 80, 

451 "wss": 443, 

452 "xmpp": None, 

453} 

454 

455# This list of schemes that don't use authorities is also from the link above. 

456NO_NETLOC_SCHEMES = set( 

457 [ 

458 "urn", 

459 "about", 

460 "bitcoin", 

461 "blob", 

462 "data", 

463 "geo", 

464 "magnet", 

465 "mailto", 

466 "news", 

467 "pkcs11", 

468 "sip", 

469 "sips", 

470 "tel", 

471 ] 

472) 

473# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc 

474 

475NO_QUERY_PLUS_SCHEMES = set() 

476 

477 

478def register_scheme( 

479 text, uses_netloc=True, default_port=None, query_plus_is_space=True 

480): 

481 # type: (Text, bool, Optional[int], bool) -> None 

482 """Registers new scheme information, resulting in correct port and 

483 slash behavior from the URL object. There are dozens of standard 

484 schemes preregistered, so this function is mostly meant for 

485 proprietary internal customizations or stopgaps on missing 

486 standards information. If a scheme seems to be missing, please 

487 `file an issue`_! 

488 

489 Args: 

490 text: A string representation of the scheme. 

491 (the 'http' in 'http://hatnote.com') 

492 uses_netloc: Does the scheme support specifying a 

493 network host? For instance, "http" does, "mailto" does 

494 not. Defaults to True. 

495 default_port: The default port, if any, for 

496 netloc-using schemes. 

497 query_plus_is_space: If true, a "+" in the query string should be 

498 decoded as a space by DecodedURL. 

499 

500 .. _file an issue: https://github.com/mahmoud/hyperlink/issues 

501 """ 

502 text = text.lower() 

503 if default_port is not None: 

504 try: 

505 default_port = int(default_port) 

506 except (ValueError, TypeError): 

507 raise ValueError( 

508 "default_port expected integer or None, not %r" 

509 % (default_port,) 

510 ) 

511 

512 if uses_netloc is True: 

513 SCHEME_PORT_MAP[text] = default_port 

514 elif uses_netloc is False: 

515 if default_port is not None: 

516 raise ValueError( 

517 "unexpected default port while specifying" 

518 " non-netloc scheme: %r" % default_port 

519 ) 

520 NO_NETLOC_SCHEMES.add(text) 

521 else: 

522 raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc) 

523 

524 if not query_plus_is_space: 

525 NO_QUERY_PLUS_SCHEMES.add(text) 

526 

527 return 

528 

529 

530def scheme_uses_netloc(scheme, default=None): 

531 # type: (Text, Optional[bool]) -> Optional[bool] 

532 """Whether or not a URL uses :code:`:` or :code:`://` to separate the 

533 scheme from the rest of the URL depends on the scheme's own 

534 standard definition. There is no way to infer this behavior 

535 from other parts of the URL. A scheme either supports network 

536 locations or it does not. 

537 

538 The URL type's approach to this is to check for explicitly 

539 registered schemes, with common schemes like HTTP 

540 preregistered. This is the same approach taken by 

541 :mod:`urlparse`. 

542 

543 URL adds two additional heuristics if the scheme as a whole is 

544 not registered. First, it attempts to check the subpart of the 

545 scheme after the last ``+`` character. This adds intuitive 

546 behavior for schemes like ``git+ssh``. Second, if a URL with 

547 an unrecognized scheme is loaded, it will maintain the 

548 separator it sees. 

549 """ 

550 if not scheme: 

551 return False 

552 scheme = scheme.lower() 

553 if scheme in SCHEME_PORT_MAP: 

554 return True 

555 if scheme in NO_NETLOC_SCHEMES: 

556 return False 

557 if scheme.split("+")[-1] in SCHEME_PORT_MAP: 

558 return True 

559 return default 

560 

561 

562class URLParseError(ValueError): 

563 """Exception inheriting from :exc:`ValueError`, raised when failing to 

564 parse a URL. Mostly raised on invalid ports and IPv6 addresses. 

565 """ 

566 

567 pass 

568 

569 

570def _optional(argument, default): 

571 # type: (Any, Any) -> Any 

572 if argument is _UNSET: 

573 return default 

574 else: 

575 return argument 

576 

577 

578def _typecheck(name, value, *types): 

579 # type: (Text, T, Type[Any]) -> T 

580 """ 

581 Check that the given *value* is one of the given *types*, or raise an 

582 exception describing the problem using *name*. 

583 """ 

584 if not types: 

585 raise ValueError("expected one or more types, maybe use _textcheck?") 

586 if not isinstance(value, types): 

587 raise TypeError( 

588 "expected %s for %s, got %r" 

589 % (" or ".join([t.__name__ for t in types]), name, value) 

590 ) 

591 return value 

592 

593 

594def _textcheck(name, value, delims=frozenset(), nullable=False): 

595 # type: (Text, T, Iterable[Text], bool) -> T 

596 if not isinstance(value, Text): 

597 if nullable and value is None: 

598 # used by query string values 

599 return value # type: ignore[unreachable] 

600 else: 

601 str_name = "unicode" if PY2 else "str" 

602 exp = str_name + " or NoneType" if nullable else str_name 

603 raise TypeError("expected %s for %s, got %r" % (exp, name, value)) 

604 if delims and set(value) & set(delims): # TODO: test caching into regexes 

605 raise ValueError( 

606 "one or more reserved delimiters %s present in %s: %r" 

607 % ("".join(delims), name, value) 

608 ) 

609 return value # type: ignore[return-value] # T vs. Text 

610 

611 

612def iter_pairs(iterable): 

613 # type: (Iterable[Any]) -> Iterator[Any] 

614 """ 

615 Iterate over the (key, value) pairs in ``iterable``. 

616 

617 This handles dictionaries sensibly, and falls back to assuming the 

618 iterable yields (key, value) pairs. This behaviour is similar to 

619 what Python's ``dict()`` constructor does. 

620 """ 

621 if isinstance(iterable, MappingABC): 

622 iterable = iterable.items() 

623 return iter(iterable) 

624 

625 

626def _decode_unreserved(text, normalize_case=False, encode_stray_percents=False): 

627 # type: (Text, bool, bool) -> Text 

628 return _percent_decode( 

629 text, 

630 normalize_case=normalize_case, 

631 encode_stray_percents=encode_stray_percents, 

632 _decode_map=_UNRESERVED_DECODE_MAP, 

633 ) 

634 

635 

636def _decode_userinfo_part( 

637 text, normalize_case=False, encode_stray_percents=False 

638): 

639 # type: (Text, bool, bool) -> Text 

640 return _percent_decode( 

641 text, 

642 normalize_case=normalize_case, 

643 encode_stray_percents=encode_stray_percents, 

644 _decode_map=_USERINFO_DECODE_MAP, 

645 ) 

646 

647 

648def _decode_path_part(text, normalize_case=False, encode_stray_percents=False): 

649 # type: (Text, bool, bool) -> Text 

650 """ 

651 >>> _decode_path_part(u'%61%77%2f%7a') 

652 u'aw%2fz' 

653 >>> _decode_path_part(u'%61%77%2f%7a', normalize_case=True) 

654 u'aw%2Fz' 

655 """ 

656 return _percent_decode( 

657 text, 

658 normalize_case=normalize_case, 

659 encode_stray_percents=encode_stray_percents, 

660 _decode_map=_PATH_DECODE_MAP, 

661 ) 

662 

663 

664def _decode_query_key(text, normalize_case=False, encode_stray_percents=False): 

665 # type: (Text, bool, bool) -> Text 

666 return _percent_decode( 

667 text, 

668 normalize_case=normalize_case, 

669 encode_stray_percents=encode_stray_percents, 

670 _decode_map=_QUERY_KEY_DECODE_MAP, 

671 ) 

672 

673 

674def _decode_query_value( 

675 text, normalize_case=False, encode_stray_percents=False 

676): 

677 # type: (Text, bool, bool) -> Text 

678 return _percent_decode( 

679 text, 

680 normalize_case=normalize_case, 

681 encode_stray_percents=encode_stray_percents, 

682 _decode_map=_QUERY_VALUE_DECODE_MAP, 

683 ) 

684 

685 

686def _decode_fragment_part( 

687 text, normalize_case=False, encode_stray_percents=False 

688): 

689 # type: (Text, bool, bool) -> Text 

690 return _percent_decode( 

691 text, 

692 normalize_case=normalize_case, 

693 encode_stray_percents=encode_stray_percents, 

694 _decode_map=_FRAGMENT_DECODE_MAP, 

695 ) 

696 

697 

698def _percent_decode( 

699 text, # type: Text 

700 normalize_case=False, # type: bool 

701 subencoding="utf-8", # type: Text 

702 raise_subencoding_exc=False, # type: bool 

703 encode_stray_percents=False, # type: bool 

704 _decode_map=_HEX_CHAR_MAP, # type: Mapping[bytes, bytes] 

705): 

706 # type: (...) -> Text 

707 """Convert percent-encoded text characters to their normal, 

708 human-readable equivalents. 

709 

710 All characters in the input text must be encodable by 

711 *subencoding*. All special characters underlying the values in the 

712 percent-encoding must be decodable as *subencoding*. If a 

713 non-*subencoding*-valid string is passed, the original text is 

714 returned with no changes applied. 

715 

716 Only called by field-tailored variants, e.g., 

717 :func:`_decode_path_part`, as every percent-encodable part of the 

718 URL has characters which should not be percent decoded. 

719 

720 >>> _percent_decode(u'abc%20def') 

721 u'abc def' 

722 

723 Args: 

724 text: Text with percent-encoding present. 

725 normalize_case: Whether undecoded percent segments, such as encoded 

726 delimiters, should be uppercased, per RFC 3986 Section 2.1. 

727 See :func:`_decode_path_part` for an example. 

728 subencoding: The name of the encoding underlying the percent-encoding. 

729 raise_subencoding_exc: Whether an error in decoding the bytes 

730 underlying the percent-decoding should be raised. 

731 

732 Returns: 

733 Text: The percent-decoded version of *text*, decoded by *subencoding*. 

734 """ 

735 try: 

736 quoted_bytes = text.encode(subencoding) 

737 except UnicodeEncodeError: 

738 return text 

739 

740 bits = quoted_bytes.split(b"%") 

741 if len(bits) == 1: 

742 return text 

743 

744 res = [bits[0]] 

745 append = res.append 

746 

747 for item in bits[1:]: 

748 hexpair, rest = item[:2], item[2:] 

749 try: 

750 append(_decode_map[hexpair]) 

751 append(rest) 

752 except KeyError: 

753 pair_is_hex = hexpair in _HEX_CHAR_MAP 

754 if pair_is_hex or not encode_stray_percents: 

755 append(b"%") 

756 else: 

757 # if it's undecodable, treat as a real percent sign, 

758 # which is reserved (because it wasn't in the 

759 # context-aware _decode_map passed in), and should 

760 # stay in an encoded state. 

761 append(b"%25") 

762 if normalize_case and pair_is_hex: 

763 append(hexpair.upper()) 

764 append(rest) 

765 else: 

766 append(item) 

767 

768 unquoted_bytes = b"".join(res) 

769 

770 try: 

771 return unquoted_bytes.decode(subencoding) 

772 except UnicodeDecodeError: 

773 if raise_subencoding_exc: 

774 raise 

775 return text 

776 

777 

778def _decode_host(host): 

779 # type: (Text) -> Text 

780 """Decode a host from ASCII-encodable text to IDNA-decoded text. If 

781 the host text is not ASCII, it is returned unchanged, as it is 

782 presumed that it is already IDNA-decoded. 

783 

784 Some technical details: _decode_host is built on top of the "idna" 

785 package, which has some quirks: 

786 

787 Capital letters are not valid IDNA2008. The idna package will 

788 raise an exception like this on capital letters: 

789 

790 > idna.core.InvalidCodepoint: Codepoint U+004B at position 1 ... not allowed 

791 

792 However, if a segment of a host (i.e., something in 

793 url.host.split('.')) is already ASCII, idna doesn't perform its 

794 usual checks. In fact, for capital letters it automatically 

795 lowercases them. 

796 

797 This check and some other functionality can be bypassed by passing 

798 uts46=True to idna.encode/decode. This allows a more permissive and 

799 convenient interface. So far it seems like the balanced approach. 

800 

801 Example output (from idna==2.6): 

802 

803 >> idna.encode(u'mahmöud.io') 

804 'xn--mahmud-zxa.io' 

805 >> idna.encode(u'Mahmöud.io') 

806 Traceback (most recent call last): 

807 File "<stdin>", line 1, in <module> 

808 File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 355, in encode 

809 result.append(alabel(label)) 

810 File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 276, in alabel 

811 check_label(label) 

812 File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 253, in check_label 

813 raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label))) 

814 idna.core.InvalidCodepoint: Codepoint U+004D at position 1 of u'Mahm\xf6ud' not allowed 

815 >> idna.encode(u'Mahmoud.io') 

816 'Mahmoud.io' 

817 

818 # Similar behavior for decodes below 

819 >> idna.decode(u'Mahmoud.io') 

820 u'mahmoud.io 

821 >> idna.decode(u'Méhmoud.io', uts46=True) 

822 u'm\xe9hmoud.io' 

823 """ # noqa: E501 

824 if not host: 

825 return u"" 

826 try: 

827 host_bytes = host.encode("ascii") 

828 except UnicodeEncodeError: 

829 host_text = host 

830 else: 

831 try: 

832 host_text = idna_decode(host_bytes, uts46=True) 

833 except ValueError: 

834 # only reached on "narrow" (UCS-2) Python builds <3.4, see #7 

835 # NOTE: not going to raise here, because there's no 

836 # ambiguity in the IDNA, and the host is still 

837 # technically usable 

838 host_text = host 

839 return host_text 

840 

841 

842def _resolve_dot_segments(path): 

843 # type: (Sequence[Text]) -> Sequence[Text] 

844 """Normalize the URL path by resolving segments of '.' and '..'. For 

845 more details, see `RFC 3986 section 5.2.4, Remove Dot Segments`_. 

846 

847 Args: 

848 path: sequence of path segments in text form 

849 

850 Returns: 

851 A new sequence of path segments with the '.' and '..' elements removed 

852 and resolved. 

853 

854 .. _RFC 3986 section 5.2.4, Remove Dot Segments: https://tools.ietf.org/html/rfc3986#section-5.2.4 

855 """ # noqa: E501 

856 segs = [] # type: List[Text] 

857 

858 for seg in path: 

859 if seg == u".": 

860 pass 

861 elif seg == u"..": 

862 if segs: 

863 segs.pop() 

864 else: 

865 segs.append(seg) 

866 

867 if list(path[-1:]) in ([u"."], [u".."]): 

868 segs.append(u"") 

869 

870 return segs 

871 

872 

873def parse_host(host): 

874 # type: (Text) -> Tuple[Optional[AddressFamily], Text] 

875 """Parse the host into a tuple of ``(family, host)``, where family 

876 is the appropriate :mod:`socket` module constant when the host is 

877 an IP address. Family is ``None`` when the host is not an IP. 

878 

879 Will raise :class:`URLParseError` on invalid IPv6 constants. 

880 

881 Returns: 

882 family (socket constant or None), host (string) 

883 

884 >>> import socket 

885 >>> parse_host('googlewebsite.com') == (None, 'googlewebsite.com') 

886 True 

887 >>> parse_host('::1') == (socket.AF_INET6, '::1') 

888 True 

889 >>> parse_host('192.168.1.1') == (socket.AF_INET, '192.168.1.1') 

890 True 

891 """ 

892 if not host: 

893 return None, u"" 

894 

895 if u":" in host: 

896 try: 

897 inet_pton(AF_INET6, host) 

898 except socket.error as se: 

899 raise URLParseError("invalid IPv6 host: %r (%r)" % (host, se)) 

900 except UnicodeEncodeError: 

901 pass # TODO: this can't be a real host right? 

902 else: 

903 family = AF_INET6 # type: Optional[AddressFamily] 

904 else: 

905 try: 

906 inet_pton(AF_INET, host) 

907 except (socket.error, UnicodeEncodeError): 

908 family = None # not an IP 

909 else: 

910 family = AF_INET 

911 

912 return family, host 

913 

914 

915class URL(object): 

916 r"""From blogs to billboards, URLs are so common, that it's easy to 

917 overlook their complexity and power. With hyperlink's 

918 :class:`URL` type, working with URLs doesn't have to be hard. 

919 

920 URLs are made of many parts. Most of these parts are officially 

921 named in `RFC 3986`_ and this diagram may prove handy in identifying 

922 them:: 

923 

924 foo://user:pass@example.com:8042/over/there?name=ferret#nose 

925 \_/ \_______/ \_________/ \__/\_________/ \_________/ \__/ 

926 | | | | | | | 

927 scheme userinfo host port path query fragment 

928 

929 While :meth:`~URL.from_text` is used for parsing whole URLs, the 

930 :class:`URL` constructor builds a URL from the individual 

931 components, like so:: 

932 

933 >>> from hyperlink import URL 

934 >>> url = URL(scheme=u'https', host=u'example.com', path=[u'hello', u'world']) 

935 >>> print(url.to_text()) 

936 https://example.com/hello/world 

937 

938 The constructor runs basic type checks. All strings are expected 

939 to be text (:class:`str` in Python 3, :class:`unicode` in Python 2). All 

940 arguments are optional, defaulting to appropriately empty values. A full 

941 list of constructor arguments is below. 

942 

943 Args: 

944 scheme: The text name of the scheme. 

945 host: The host portion of the network location 

946 port: The port part of the network location. If ``None`` or no port is 

947 passed, the port will default to the default port of the scheme, if 

948 it is known. See the ``SCHEME_PORT_MAP`` and 

949 :func:`register_default_port` for more info. 

950 path: A tuple of strings representing the slash-separated parts of the 

951 path, each percent-encoded. 

952 query: The query parameters, as a dictionary or as an sequence of 

953 percent-encoded key-value pairs. 

954 fragment: The fragment part of the URL. 

955 rooted: A rooted URL is one which indicates an absolute path. 

956 This is True on any URL that includes a host, or any relative URL 

957 that starts with a slash. 

958 userinfo: The username or colon-separated username:password pair. 

959 uses_netloc: Indicates whether ``://`` (the "netloc separator") will 

960 appear to separate the scheme from the *path* in cases where no 

961 host is present. 

962 Setting this to ``True`` is a non-spec-compliant affordance for the 

963 common practice of having URIs that are *not* URLs (cannot have a 

964 'host' part) but nevertheless use the common ``://`` idiom that 

965 most people associate with URLs; e.g. ``message:`` URIs like 

966 ``message://message-id`` being equivalent to ``message:message-id``. 

967 This may be inferred based on the scheme depending on whether 

968 :func:`register_scheme` has been used to register the scheme and 

969 should not be passed directly unless you know the scheme works like 

970 this and you know it has not been registered. 

971 

972 All of these parts are also exposed as read-only attributes of :class:`URL` 

973 instances, along with several useful methods. 

974 

975 .. _RFC 3986: https://tools.ietf.org/html/rfc3986 

976 .. _RFC 3987: https://tools.ietf.org/html/rfc3987 

977 """ # noqa: E501 

978 

979 def __init__( 

980 self, 

981 scheme=None, # type: Optional[Text] 

982 host=None, # type: Optional[Text] 

983 path=(), # type: Iterable[Text] 

984 query=(), # type: QueryParameters 

985 fragment=u"", # type: Text 

986 port=None, # type: Optional[int] 

987 rooted=None, # type: Optional[bool] 

988 userinfo=u"", # type: Text 

989 uses_netloc=None, # type: Optional[bool] 

990 ): 

991 # type: (...) -> None 

992 if host is not None and scheme is None: 

993 scheme = u"http" # TODO: why 

994 if port is None and scheme is not None: 

995 port = SCHEME_PORT_MAP.get(scheme) 

996 if host and query and not path: 

997 # per RFC 3986 6.2.3, "a URI that uses the generic syntax 

998 # for authority with an empty path should be normalized to 

999 # a path of '/'." 

1000 path = (u"",) 

1001 

1002 # Now that we're done detecting whether they were passed, we can set 

1003 # them to their defaults: 

1004 if scheme is None: 

1005 scheme = u"" 

1006 if host is None: 

1007 host = u"" 

1008 if rooted is None: 

1009 rooted = bool(host) 

1010 

1011 # Set attributes. 

1012 self._scheme = _textcheck("scheme", scheme) 

1013 if self._scheme: 

1014 if not _SCHEME_RE.match(self._scheme): 

1015 raise ValueError( 

1016 'invalid scheme: %r. Only alphanumeric, "+",' 

1017 ' "-", and "." allowed. Did you meant to call' 

1018 " %s.from_text()?" % (self._scheme, self.__class__.__name__) 

1019 ) 

1020 

1021 _, self._host = parse_host(_textcheck("host", host, "/?#@")) 

1022 if isinstance(path, Text): 

1023 raise TypeError( 

1024 "expected iterable of text for path, not: %r" % (path,) 

1025 ) 

1026 self._path = tuple( 

1027 (_textcheck("path segment", segment, "/?#") for segment in path) 

1028 ) 

1029 self._query = tuple( 

1030 ( 

1031 _textcheck("query parameter name", k, "&=#"), 

1032 _textcheck("query parameter value", v, "&#", nullable=True), 

1033 ) 

1034 for k, v in iter_pairs(query) 

1035 ) 

1036 self._fragment = _textcheck("fragment", fragment) 

1037 self._port = _typecheck("port", port, int, NoneType) 

1038 self._rooted = _typecheck("rooted", rooted, bool) 

1039 self._userinfo = _textcheck("userinfo", userinfo, "/?#@") 

1040 

1041 if uses_netloc is None: 

1042 uses_netloc = scheme_uses_netloc(self._scheme, uses_netloc) 

1043 self._uses_netloc = _typecheck( 

1044 "uses_netloc", uses_netloc, bool, NoneType 

1045 ) 

1046 will_have_authority = self._host or ( 

1047 self._port and self._port != SCHEME_PORT_MAP.get(scheme) 

1048 ) 

1049 if will_have_authority: 

1050 # fixup for rooted consistency; if there's any 'authority' 

1051 # represented in the textual URL, then the path must be rooted, and 

1052 # we're definitely using a netloc (there must be a ://). 

1053 self._rooted = True 

1054 self._uses_netloc = True 

1055 if (not self._rooted) and self.path[:1] == (u"",): 

1056 self._rooted = True 

1057 self._path = self._path[1:] 

1058 if not will_have_authority and self._path and not self._rooted: 

1059 # If, after fixing up the path, there *is* a path and it *isn't* 

1060 # rooted, then we are definitely not using a netloc; if we did, it 

1061 # would make the path (erroneously) look like a hostname. 

1062 self._uses_netloc = False 

1063 

1064 def get_decoded_url(self, lazy=False): 

1065 # type: (bool) -> DecodedURL 

1066 try: 

1067 return self._decoded_url 

1068 except AttributeError: 

1069 self._decoded_url = DecodedURL(self, lazy=lazy) # type: DecodedURL 

1070 return self._decoded_url 

1071 

1072 @property 

1073 def scheme(self): 

1074 # type: () -> Text 

1075 """The scheme is a string, and the first part of an absolute URL, the 

1076 part before the first colon, and the part which defines the 

1077 semantics of the rest of the URL. Examples include "http", 

1078 "https", "ssh", "file", "mailto", and many others. See 

1079 :func:`~hyperlink.register_scheme()` for more info. 

1080 """ 

1081 return self._scheme 

1082 

1083 @property 

1084 def host(self): 

1085 # type: () -> Text 

1086 """The host is a string, and the second standard part of an absolute 

1087 URL. When present, a valid host must be a domain name, or an 

1088 IP (v4 or v6). It occurs before the first slash, or the second 

1089 colon, if a :attr:`~hyperlink.URL.port` is provided. 

1090 """ 

1091 return self._host 

1092 

1093 @property 

1094 def port(self): 

1095 # type: () -> Optional[int] 

1096 """The port is an integer that is commonly used in connecting to the 

1097 :attr:`host`, and almost never appears without it. 

1098 

1099 When not present in the original URL, this attribute defaults 

1100 to the scheme's default port. If the scheme's default port is 

1101 not known, and the port is not provided, this attribute will 

1102 be set to None. 

1103 

1104 >>> URL.from_text(u'http://example.com/pa/th').port 

1105 80 

1106 >>> URL.from_text(u'foo://example.com/pa/th').port 

1107 >>> URL.from_text(u'foo://example.com:8042/pa/th').port 

1108 8042 

1109 

1110 .. note:: 

1111 

1112 Per the standard, when the port is the same as the schemes 

1113 default port, it will be omitted in the text URL. 

1114 """ 

1115 return self._port 

1116 

1117 @property 

1118 def path(self): 

1119 # type: () -> Sequence[Text] 

1120 """A tuple of strings, created by splitting the slash-separated 

1121 hierarchical path. Started by the first slash after the host, 

1122 terminated by a "?", which indicates the start of the 

1123 :attr:`~hyperlink.URL.query` string. 

1124 """ 

1125 return self._path 

1126 

1127 @property 

1128 def query(self): 

1129 # type: () -> QueryPairs 

1130 """Tuple of pairs, created by splitting the ampersand-separated 

1131 mapping of keys and optional values representing 

1132 non-hierarchical data used to identify the resource. Keys are 

1133 always strings. Values are strings when present, or None when 

1134 missing. 

1135 

1136 For more operations on the mapping, see 

1137 :meth:`~hyperlink.URL.get()`, :meth:`~hyperlink.URL.add()`, 

1138 :meth:`~hyperlink.URL.set()`, and 

1139 :meth:`~hyperlink.URL.delete()`. 

1140 """ 

1141 return self._query 

1142 

1143 @property 

1144 def fragment(self): 

1145 # type: () -> Text 

1146 """A string, the last part of the URL, indicated by the first "#" 

1147 after the :attr:`~hyperlink.URL.path` or 

1148 :attr:`~hyperlink.URL.query`. Enables indirect identification 

1149 of a secondary resource, like an anchor within an HTML page. 

1150 """ 

1151 return self._fragment 

1152 

1153 @property 

1154 def rooted(self): 

1155 # type: () -> bool 

1156 """Whether or not the path starts with a forward slash (``/``). 

1157 

1158 This is taken from the terminology in the BNF grammar, 

1159 specifically the "path-rootless", rule, since "absolute path" 

1160 and "absolute URI" are somewhat ambiguous. :attr:`path` does 

1161 not contain the implicit prefixed ``"/"`` since that is 

1162 somewhat awkward to work with. 

1163 """ 

1164 return self._rooted 

1165 

1166 @property 

1167 def userinfo(self): 

1168 # type: () -> Text 

1169 """The colon-separated string forming the username-password 

1170 combination. 

1171 """ 

1172 return self._userinfo 

1173 

1174 @property 

1175 def uses_netloc(self): 

1176 # type: () -> Optional[bool] 

1177 """ 

1178 Indicates whether ``://`` (the "netloc separator") will appear to 

1179 separate the scheme from the *path* in cases where no host is present. 

1180 """ 

1181 return self._uses_netloc 

1182 

1183 @property 

1184 def user(self): 

1185 # type: () -> Text 

1186 """ 

1187 The user portion of :attr:`~hyperlink.URL.userinfo`. 

1188 """ 

1189 return self.userinfo.split(u":")[0] 

1190 

1191 def authority(self, with_password=False, **kw): 

1192 # type: (bool, Any) -> Text 

1193 """Compute and return the appropriate host/port/userinfo combination. 

1194 

1195 >>> url = URL.from_text(u'http://user:pass@localhost:8080/a/b?x=y') 

1196 >>> url.authority() 

1197 u'user:@localhost:8080' 

1198 >>> url.authority(with_password=True) 

1199 u'user:pass@localhost:8080' 

1200 

1201 Args: 

1202 with_password: Whether the return value of this method include the 

1203 password in the URL, if it is set. 

1204 Defaults to False. 

1205 

1206 Returns: 

1207 Text: The authority (network location and user information) portion 

1208 of the URL. 

1209 """ 

1210 # first, a bit of twisted compat 

1211 with_password = kw.pop("includeSecrets", with_password) 

1212 if kw: 

1213 raise TypeError("got unexpected keyword arguments: %r" % kw.keys()) 

1214 host = self.host 

1215 if ":" in host: 

1216 hostport = ["[" + host + "]"] 

1217 else: 

1218 hostport = [self.host] 

1219 if self.port != SCHEME_PORT_MAP.get(self.scheme): 

1220 hostport.append(Text(self.port)) 

1221 authority = [] 

1222 if self.userinfo: 

1223 userinfo = self.userinfo 

1224 if not with_password and u":" in userinfo: 

1225 userinfo = userinfo[: userinfo.index(u":") + 1] 

1226 authority.append(userinfo) 

1227 authority.append(u":".join(hostport)) 

1228 return u"@".join(authority) 

1229 

1230 def __eq__(self, other): 

1231 # type: (Any) -> bool 

1232 if not isinstance(other, self.__class__): 

1233 return NotImplemented 

1234 for attr in [ 

1235 "scheme", 

1236 "userinfo", 

1237 "host", 

1238 "query", 

1239 "fragment", 

1240 "port", 

1241 "uses_netloc", 

1242 "rooted", 

1243 ]: 

1244 if getattr(self, attr) != getattr(other, attr): 

1245 return False 

1246 if self.path == other.path or ( 

1247 self.path in _ROOT_PATHS and other.path in _ROOT_PATHS 

1248 ): 

1249 return True 

1250 return False 

1251 

1252 def __ne__(self, other): 

1253 # type: (Any) -> bool 

1254 if not isinstance(other, self.__class__): 

1255 return NotImplemented 

1256 return not self.__eq__(other) 

1257 

1258 def __hash__(self): 

1259 # type: () -> int 

1260 return hash( 

1261 ( 

1262 self.__class__, 

1263 self.scheme, 

1264 self.userinfo, 

1265 self.host, 

1266 self.path, 

1267 self.query, 

1268 self.fragment, 

1269 self.port, 

1270 self.rooted, 

1271 self.uses_netloc, 

1272 ) 

1273 ) 

1274 

1275 @property 

1276 def absolute(self): 

1277 # type: () -> bool 

1278 """Whether or not the URL is "absolute". Absolute URLs are complete 

1279 enough to resolve to a network resource without being relative 

1280 to a base URI. 

1281 

1282 >>> URL.from_text(u'http://wikipedia.org/').absolute 

1283 True 

1284 >>> URL.from_text(u'?a=b&c=d').absolute 

1285 False 

1286 

1287 Absolute URLs must have both a scheme and a host set. 

1288 """ 

1289 return bool(self.scheme and self.host) 

1290 

1291 def replace( 

1292 self, 

1293 scheme=_UNSET, # type: Optional[Text] 

1294 host=_UNSET, # type: Optional[Text] 

1295 path=_UNSET, # type: Iterable[Text] 

1296 query=_UNSET, # type: QueryParameters 

1297 fragment=_UNSET, # type: Text 

1298 port=_UNSET, # type: Optional[int] 

1299 rooted=_UNSET, # type: Optional[bool] 

1300 userinfo=_UNSET, # type: Text 

1301 uses_netloc=_UNSET, # type: Optional[bool] 

1302 ): 

1303 # type: (...) -> URL 

1304 """:class:`URL` objects are immutable, which means that attributes 

1305 are designed to be set only once, at construction. Instead of 

1306 modifying an existing URL, one simply creates a copy with the 

1307 desired changes. 

1308 

1309 If any of the following arguments is omitted, it defaults to 

1310 the value on the current URL. 

1311 

1312 Args: 

1313 scheme: The text name of the scheme. 

1314 host: The host portion of the network location. 

1315 path: A tuple of strings representing the slash-separated parts of 

1316 the path. 

1317 query: The query parameters, as a dictionary or as an sequence of 

1318 key-value pairs. 

1319 fragment: The fragment part of the URL. 

1320 port: The port part of the network location. 

1321 rooted: Whether or not the path begins with a slash. 

1322 userinfo: The username or colon-separated username:password pair. 

1323 uses_netloc: Indicates whether ``://`` (the "netloc separator") 

1324 will appear to separate the scheme from the *path* in cases 

1325 where no host is present. 

1326 Setting this to ``True`` is a non-spec-compliant affordance for 

1327 the common practice of having URIs that are *not* URLs (cannot 

1328 have a 'host' part) but nevertheless use the common ``://`` 

1329 idiom that most people associate with URLs; e.g. ``message:`` 

1330 URIs like ``message://message-id`` being equivalent to 

1331 ``message:message-id``. 

1332 This may be inferred based on the scheme depending on whether 

1333 :func:`register_scheme` has been used to register the scheme 

1334 and should not be passed directly unless you know the scheme 

1335 works like this and you know it has not been registered. 

1336 

1337 Returns: 

1338 URL: A copy of the current :class:`URL`, with new values for 

1339 parameters passed. 

1340 """ 

1341 if scheme is not _UNSET and scheme != self.scheme: 

1342 # when changing schemes, reset the explicit uses_netloc preference 

1343 # to honor the new scheme. 

1344 uses_netloc = None 

1345 return self.__class__( 

1346 scheme=_optional(scheme, self.scheme), 

1347 host=_optional(host, self.host), 

1348 path=_optional(path, self.path), 

1349 query=_optional(query, self.query), 

1350 fragment=_optional(fragment, self.fragment), 

1351 port=_optional(port, self.port), 

1352 rooted=_optional(rooted, self.rooted), 

1353 userinfo=_optional(userinfo, self.userinfo), 

1354 uses_netloc=_optional(uses_netloc, self.uses_netloc), 

1355 ) 

1356 

1357 @classmethod 

1358 def from_text(cls, text): 

1359 # type: (Text) -> URL 

1360 """Whereas the :class:`URL` constructor is useful for constructing 

1361 URLs from parts, :meth:`~URL.from_text` supports parsing whole 

1362 URLs from their string form:: 

1363 

1364 >>> URL.from_text(u'http://example.com') 

1365 URL.from_text(u'http://example.com') 

1366 >>> URL.from_text(u'?a=b&x=y') 

1367 URL.from_text(u'?a=b&x=y') 

1368 

1369 As you can see above, it's also used as the :func:`repr` of 

1370 :class:`URL` objects. The natural counterpart to 

1371 :func:`~URL.to_text()`. This method only accepts *text*, so be 

1372 sure to decode those bytestrings. 

1373 

1374 Args: 

1375 text: A valid URL string. 

1376 

1377 Returns: 

1378 URL: The structured object version of the parsed string. 

1379 

1380 .. note:: 

1381 

1382 Somewhat unexpectedly, URLs are a far more permissive 

1383 format than most would assume. Many strings which don't 

1384 look like URLs are still valid URLs. As a result, this 

1385 method only raises :class:`URLParseError` on invalid port 

1386 and IPv6 values in the host portion of the URL. 

1387 """ 

1388 um = _URL_RE.match(_textcheck("text", text)) 

1389 if um is None: 

1390 raise URLParseError("could not parse url: %r" % text) 

1391 gs = um.groupdict() 

1392 

1393 au_text = gs["authority"] or u"" 

1394 au_m = _AUTHORITY_RE.match(au_text) 

1395 if au_m is None: 

1396 raise URLParseError( 

1397 "invalid authority %r in url: %r" % (au_text, text) 

1398 ) 

1399 au_gs = au_m.groupdict() 

1400 if au_gs["bad_host"]: 

1401 raise URLParseError( 

1402 "invalid host %r in url: %r" % (au_gs["bad_host"], text) 

1403 ) 

1404 

1405 userinfo = au_gs["userinfo"] or u"" 

1406 

1407 host = au_gs["ipv6_host"] or au_gs["plain_host"] 

1408 port = au_gs["port"] 

1409 if port is not None: 

1410 try: 

1411 port = int(port) # type: ignore[assignment] # FIXME, see below 

1412 except ValueError: 

1413 if not port: # TODO: excessive? 

1414 raise URLParseError("port must not be empty: %r" % au_text) 

1415 raise URLParseError("expected integer for port, not %r" % port) 

1416 

1417 scheme = gs["scheme"] or u"" 

1418 fragment = gs["fragment"] or u"" 

1419 uses_netloc = bool(gs["_netloc_sep"]) 

1420 

1421 if gs["path"]: 

1422 path = tuple(gs["path"].split(u"/")) 

1423 if not path[0]: 

1424 path = path[1:] 

1425 rooted = True 

1426 else: 

1427 rooted = False 

1428 else: 

1429 path = () 

1430 rooted = bool(au_text) 

1431 if gs["query"]: 

1432 query = tuple( 

1433 ( 

1434 qe.split(u"=", 1) # type: ignore[misc] 

1435 if u"=" in qe 

1436 else (qe, None) 

1437 ) 

1438 for qe in gs["query"].split(u"&") 

1439 ) # type: QueryPairs 

1440 else: 

1441 query = () 

1442 return cls( 

1443 scheme, 

1444 host, 

1445 path, 

1446 query, 

1447 fragment, 

1448 port, # type: ignore[arg-type] # FIXME, see above 

1449 rooted, 

1450 userinfo, 

1451 uses_netloc, 

1452 ) 

1453 

1454 def normalize( 

1455 self, 

1456 scheme=True, 

1457 host=True, 

1458 path=True, 

1459 query=True, 

1460 fragment=True, 

1461 userinfo=True, 

1462 percents=True, 

1463 ): 

1464 # type: (bool, bool, bool, bool, bool, bool, bool) -> URL 

1465 """Return a new URL object with several standard normalizations 

1466 applied: 

1467 

1468 * Decode unreserved characters (`RFC 3986 2.3`_) 

1469 * Uppercase remaining percent-encoded octets (`RFC 3986 2.1`_) 

1470 * Convert scheme and host casing to lowercase (`RFC 3986 3.2.2`_) 

1471 * Resolve any "." and ".." references in the path (`RFC 3986 6.2.2.3`_) 

1472 * Ensure an ending slash on URLs with an empty path (`RFC 3986 6.2.3`_) 

1473 * Encode any stray percent signs (`%`) in percent-encoded 

1474 fields (path, query, fragment, userinfo) (`RFC 3986 2.4`_) 

1475 

1476 All are applied by default, but normalizations can be disabled 

1477 per-part by passing `False` for that part's corresponding 

1478 name. 

1479 

1480 Args: 

1481 scheme: Convert the scheme to lowercase 

1482 host: Convert the host to lowercase 

1483 path: Normalize the path (see above for details) 

1484 query: Normalize the query string 

1485 fragment: Normalize the fragment 

1486 userinfo: Normalize the userinfo 

1487 percents: Encode isolated percent signs for any percent-encoded 

1488 fields which are being normalized (defaults to `True`). 

1489 

1490 >>> url = URL.from_text(u'Http://example.COM/a/../b/./c%2f?%61%') 

1491 >>> print(url.normalize().to_text()) 

1492 http://example.com/b/c%2F?a%25 

1493 

1494 .. _RFC 3986 3.2.2: https://tools.ietf.org/html/rfc3986#section-3.2.2 

1495 .. _RFC 3986 2.3: https://tools.ietf.org/html/rfc3986#section-2.3 

1496 .. _RFC 3986 2.1: https://tools.ietf.org/html/rfc3986#section-2.1 

1497 .. _RFC 3986 6.2.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.2.3 

1498 .. _RFC 3986 6.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.3 

1499 .. _RFC 3986 2.4: https://tools.ietf.org/html/rfc3986#section-2.4 

1500 """ # noqa: E501 

1501 kw = {} # type: Dict[str, Any] 

1502 if scheme: 

1503 kw["scheme"] = self.scheme.lower() 

1504 if host: 

1505 kw["host"] = self.host.lower() 

1506 

1507 def _dec_unres(target): 

1508 # type: (Text) -> Text 

1509 return _decode_unreserved( 

1510 target, normalize_case=True, encode_stray_percents=percents 

1511 ) 

1512 

1513 if path: 

1514 if self.path: 

1515 kw["path"] = [ 

1516 _dec_unres(p) for p in _resolve_dot_segments(self.path) 

1517 ] 

1518 else: 

1519 kw["path"] = (u"",) 

1520 if query: 

1521 kw["query"] = [ 

1522 (_dec_unres(k), _dec_unres(v) if v else v) 

1523 for k, v in self.query 

1524 ] 

1525 if fragment: 

1526 kw["fragment"] = _dec_unres(self.fragment) 

1527 if userinfo: 

1528 kw["userinfo"] = u":".join( 

1529 [_dec_unres(p) for p in self.userinfo.split(":", 1)] 

1530 ) 

1531 

1532 return self.replace(**kw) 

1533 

1534 def child(self, *segments): 

1535 # type: (Text) -> URL 

1536 """Make a new :class:`URL` where the given path segments are a child 

1537 of this URL, preserving other parts of the URL, including the 

1538 query string and fragment. 

1539 

1540 For example:: 

1541 

1542 >>> url = URL.from_text(u'http://localhost/a/b?x=y') 

1543 >>> child_url = url.child(u"c", u"d") 

1544 >>> child_url.to_text() 

1545 u'http://localhost/a/b/c/d?x=y' 

1546 

1547 Args: 

1548 segments: Additional parts to be joined and added to the path, like 

1549 :func:`os.path.join`. Special characters in segments will be 

1550 percent encoded. 

1551 

1552 Returns: 

1553 URL: A copy of the current URL with the extra path segments. 

1554 """ 

1555 if not segments: 

1556 return self 

1557 

1558 segments = [ # type: ignore[assignment] # variable is tuple 

1559 _textcheck("path segment", s) for s in segments 

1560 ] 

1561 new_path = tuple(self.path) 

1562 if self.path and self.path[-1] == u"": 

1563 new_path = new_path[:-1] 

1564 new_path += tuple(_encode_path_parts(segments, maximal=False)) 

1565 return self.replace(path=new_path) 

1566 

1567 def sibling(self, segment): 

1568 # type: (Text) -> URL 

1569 """Make a new :class:`URL` with a single path segment that is a 

1570 sibling of this URL path. 

1571 

1572 Args: 

1573 segment: A single path segment. 

1574 

1575 Returns: 

1576 URL: A copy of the current URL with the last path segment 

1577 replaced by *segment*. Special characters such as 

1578 ``/?#`` will be percent encoded. 

1579 """ 

1580 _textcheck("path segment", segment) 

1581 new_path = tuple(self.path)[:-1] + (_encode_path_part(segment),) 

1582 return self.replace(path=new_path) 

1583 

1584 def click(self, href=u""): 

1585 # type: (Union[Text, URL]) -> URL 

1586 """Resolve the given URL relative to this URL. 

1587 

1588 The resulting URI should match what a web browser would 

1589 generate if you visited the current URL and clicked on *href*. 

1590 

1591 >>> url = URL.from_text(u'http://blog.hatnote.com/') 

1592 >>> url.click(u'/post/155074058790').to_text() 

1593 u'http://blog.hatnote.com/post/155074058790' 

1594 >>> url = URL.from_text(u'http://localhost/a/b/c/') 

1595 >>> url.click(u'../d/./e').to_text() 

1596 u'http://localhost/a/b/d/e' 

1597 

1598 Args (Text): 

1599 href: A string representing a clicked URL. 

1600 

1601 Return: 

1602 A copy of the current URL with navigation logic applied. 

1603 

1604 For more information, see `RFC 3986 section 5`_. 

1605 

1606 .. _RFC 3986 section 5: https://tools.ietf.org/html/rfc3986#section-5 

1607 """ 

1608 if href: 

1609 if isinstance(href, URL): 

1610 clicked = href 

1611 else: 

1612 # TODO: This error message is not completely accurate, 

1613 # as URL objects are now also valid, but Twisted's 

1614 # test suite (wrongly) relies on this exact message. 

1615 _textcheck("relative URL", href) 

1616 clicked = URL.from_text(href) 

1617 if clicked.absolute: 

1618 return clicked 

1619 else: 

1620 clicked = self 

1621 

1622 query = clicked.query 

1623 if clicked.scheme and not clicked.rooted: 

1624 # Schemes with relative paths are not well-defined. RFC 3986 calls 

1625 # them a "loophole in prior specifications" that should be avoided, 

1626 # or supported only for backwards compatibility. 

1627 raise NotImplementedError( 

1628 "absolute URI with rootless path: %r" % (href,) 

1629 ) 

1630 else: 

1631 if clicked.rooted: 

1632 path = clicked.path 

1633 elif clicked.path: 

1634 path = tuple(self.path)[:-1] + tuple(clicked.path) 

1635 else: 

1636 path = self.path 

1637 if not query: 

1638 query = self.query 

1639 return self.replace( 

1640 scheme=clicked.scheme or self.scheme, 

1641 host=clicked.host or self.host, 

1642 port=clicked.port or self.port, 

1643 path=_resolve_dot_segments(path), 

1644 query=query, 

1645 fragment=clicked.fragment, 

1646 ) 

1647 

1648 def to_uri(self): 

1649 # type: () -> URL 

1650 u"""Make a new :class:`URL` instance with all non-ASCII characters 

1651 appropriately percent-encoded. This is useful to do in preparation 

1652 for sending a :class:`URL` over a network protocol. 

1653 

1654 For example:: 

1655 

1656 >>> URL.from_text(u'https://ايران.com/foo⇧bar/').to_uri() 

1657 URL.from_text(u'https://xn--mgba3a4fra.com/foo%E2%87%A7bar/') 

1658 

1659 Returns: 

1660 URL: A new instance with its path segments, query parameters, and 

1661 hostname encoded, so that they are all in the standard 

1662 US-ASCII range. 

1663 """ 

1664 new_userinfo = u":".join( 

1665 [_encode_userinfo_part(p) for p in self.userinfo.split(":", 1)] 

1666 ) 

1667 new_path = _encode_path_parts( 

1668 self.path, has_scheme=bool(self.scheme), rooted=False, maximal=True 

1669 ) 

1670 new_host = ( 

1671 self.host 

1672 if not self.host 

1673 else idna_encode(self.host, uts46=True).decode("ascii") 

1674 ) 

1675 return self.replace( 

1676 userinfo=new_userinfo, 

1677 host=new_host, 

1678 path=new_path, 

1679 query=tuple( 

1680 [ 

1681 ( 

1682 _encode_query_key(k, maximal=True), 

1683 _encode_query_value(v, maximal=True) 

1684 if v is not None 

1685 else None, 

1686 ) 

1687 for k, v in self.query 

1688 ] 

1689 ), 

1690 fragment=_encode_fragment_part(self.fragment, maximal=True), 

1691 ) 

1692 

1693 def to_iri(self): 

1694 # type: () -> URL 

1695 u"""Make a new :class:`URL` instance with all but a few reserved 

1696 characters decoded into human-readable format. 

1697 

1698 Percent-encoded Unicode and IDNA-encoded hostnames are 

1699 decoded, like so:: 

1700 

1701 >>> url = URL.from_text(u'https://xn--mgba3a4fra.example.com/foo%E2%87%A7bar/') 

1702 >>> print(url.to_iri().to_text()) 

1703 https://ايران.example.com/foo⇧bar/ 

1704 

1705 .. note:: 

1706 

1707 As a general Python issue, "narrow" (UCS-2) builds of 

1708 Python may not be able to fully decode certain URLs, and 

1709 the in those cases, this method will return a best-effort, 

1710 partially-decoded, URL which is still valid. This issue 

1711 does not affect any Python builds 3.4+. 

1712 

1713 Returns: 

1714 URL: A new instance with its path segments, query parameters, and 

1715 hostname decoded for display purposes. 

1716 """ # noqa: E501 

1717 new_userinfo = u":".join( 

1718 [_decode_userinfo_part(p) for p in self.userinfo.split(":", 1)] 

1719 ) 

1720 host_text = _decode_host(self.host) 

1721 

1722 return self.replace( 

1723 userinfo=new_userinfo, 

1724 host=host_text, 

1725 path=[_decode_path_part(segment) for segment in self.path], 

1726 query=tuple( 

1727 ( 

1728 _decode_query_key(k), 

1729 _decode_query_value(v) if v is not None else None, 

1730 ) 

1731 for k, v in self.query 

1732 ), 

1733 fragment=_decode_fragment_part(self.fragment), 

1734 ) 

1735 

1736 def to_text(self, with_password=False): 

1737 # type: (bool) -> Text 

1738 """Render this URL to its textual representation. 

1739 

1740 By default, the URL text will *not* include a password, if one 

1741 is set. RFC 3986 considers using URLs to represent such 

1742 sensitive information as deprecated. Quoting from RFC 3986, 

1743 `section 3.2.1`: 

1744 

1745 "Applications should not render as clear text any data after the 

1746 first colon (":") character found within a userinfo subcomponent 

1747 unless the data after the colon is the empty string (indicating no 

1748 password)." 

1749 

1750 Args (bool): 

1751 with_password: Whether or not to include the password in the URL 

1752 text. Defaults to False. 

1753 

1754 Returns: 

1755 Text: The serialized textual representation of this URL, such as 

1756 ``u"http://example.com/some/path?some=query"``. 

1757 

1758 The natural counterpart to :class:`URL.from_text()`. 

1759 

1760 .. _section 3.2.1: https://tools.ietf.org/html/rfc3986#section-3.2.1 

1761 """ 

1762 scheme = self.scheme 

1763 authority = self.authority(with_password) 

1764 path = "/".join( 

1765 _encode_path_parts( 

1766 self.path, 

1767 rooted=self.rooted, 

1768 has_scheme=bool(scheme), 

1769 has_authority=bool(authority), 

1770 maximal=False, 

1771 ) 

1772 ) 

1773 query_parts = [] 

1774 for k, v in self.query: 

1775 if v is None: 

1776 query_parts.append(_encode_query_key(k, maximal=False)) 

1777 else: 

1778 query_parts.append( 

1779 u"=".join( 

1780 ( 

1781 _encode_query_key(k, maximal=False), 

1782 _encode_query_value(v, maximal=False), 

1783 ) 

1784 ) 

1785 ) 

1786 query_string = u"&".join(query_parts) 

1787 

1788 fragment = self.fragment 

1789 

1790 parts = [] # type: List[Text] 

1791 _add = parts.append 

1792 if scheme: 

1793 _add(scheme) 

1794 _add(":") 

1795 if authority: 

1796 _add("//") 

1797 _add(authority) 

1798 elif scheme and path[:2] != "//" and self.uses_netloc: 

1799 _add("//") 

1800 if path: 

1801 if scheme and authority and path[:1] != "/": 

1802 _add("/") # relpaths with abs authorities auto get '/' 

1803 _add(path) 

1804 if query_string: 

1805 _add("?") 

1806 _add(query_string) 

1807 if fragment: 

1808 _add("#") 

1809 _add(fragment) 

1810 return u"".join(parts) 

1811 

1812 def __repr__(self): 

1813 # type: () -> str 

1814 """Convert this URL to an representation that shows all of its 

1815 constituent parts, as well as being a valid argument to 

1816 :func:`eval`. 

1817 """ 

1818 return "%s.from_text(%r)" % (self.__class__.__name__, self.to_text()) 

1819 

1820 def _to_bytes(self): 

1821 # type: () -> bytes 

1822 """ 

1823 Allows for direct usage of URL objects with libraries like 

1824 requests, which automatically stringify URL parameters. See 

1825 issue #49. 

1826 """ 

1827 return self.to_uri().to_text().encode("ascii") 

1828 

1829 if PY2: 

1830 __str__ = _to_bytes 

1831 __unicode__ = to_text 

1832 else: 

1833 __bytes__ = _to_bytes 

1834 __str__ = to_text 

1835 

1836 # # Begin Twisted Compat Code 

1837 asURI = to_uri 

1838 asIRI = to_iri 

1839 

1840 @classmethod 

1841 def fromText(cls, s): 

1842 # type: (Text) -> URL 

1843 return cls.from_text(s) 

1844 

1845 def asText(self, includeSecrets=False): 

1846 # type: (bool) -> Text 

1847 return self.to_text(with_password=includeSecrets) 

1848 

1849 def __dir__(self): 

1850 # type: () -> Sequence[Text] 

1851 try: 

1852 ret = object.__dir__(self) 

1853 except AttributeError: 

1854 # object.__dir__ == AttributeError # pdw for py2 

1855 ret = dir(self.__class__) + list(self.__dict__.keys()) 

1856 ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"])) 

1857 return ret 

1858 

1859 # # End Twisted Compat Code 

1860 

1861 def add(self, name, value=None): 

1862 # type: (Text, Optional[Text]) -> URL 

1863 """Make a new :class:`URL` instance with a given query argument, 

1864 *name*, added to it with the value *value*, like so:: 

1865 

1866 >>> URL.from_text(u'https://example.com/?x=y').add(u'x') 

1867 URL.from_text(u'https://example.com/?x=y&x') 

1868 >>> URL.from_text(u'https://example.com/?x=y').add(u'x', u'z') 

1869 URL.from_text(u'https://example.com/?x=y&x=z') 

1870 

1871 Args: 

1872 name: The name of the query parameter to add. 

1873 The part before the ``=``. 

1874 value: The value of the query parameter to add. 

1875 The part after the ``=``. 

1876 Defaults to ``None``, meaning no value. 

1877 

1878 Returns: 

1879 URL: A new :class:`URL` instance with the parameter added. 

1880 """ 

1881 return self.replace(query=self.query + ((name, value),)) 

1882 

1883 def set(self, name, value=None): 

1884 # type: (Text, Optional[Text]) -> URL 

1885 """Make a new :class:`URL` instance with the query parameter *name* 

1886 set to *value*. All existing occurences, if any are replaced 

1887 by the single name-value pair. 

1888 

1889 >>> URL.from_text(u'https://example.com/?x=y').set(u'x') 

1890 URL.from_text(u'https://example.com/?x') 

1891 >>> URL.from_text(u'https://example.com/?x=y').set(u'x', u'z') 

1892 URL.from_text(u'https://example.com/?x=z') 

1893 

1894 Args: 

1895 name: The name of the query parameter to set. 

1896 The part before the ``=``. 

1897 value: The value of the query parameter to set. 

1898 The part after the ``=``. 

1899 Defaults to ``None``, meaning no value. 

1900 

1901 Returns: 

1902 URL: A new :class:`URL` instance with the parameter set. 

1903 """ 

1904 # Preserve the original position of the query key in the list 

1905 q = [(k, v) for (k, v) in self.query if k != name] 

1906 idx = next( 

1907 (i for (i, (k, v)) in enumerate(self.query) if k == name), -1 

1908 ) 

1909 q[idx:idx] = [(name, value)] 

1910 return self.replace(query=q) 

1911 

1912 def get(self, name): 

1913 # type: (Text) -> List[Optional[Text]] 

1914 """Get a list of values for the given query parameter, *name*:: 

1915 

1916 >>> url = URL.from_text(u'?x=1&x=2') 

1917 >>> url.get('x') 

1918 [u'1', u'2'] 

1919 >>> url.get('y') 

1920 [] 

1921 

1922 If the given *name* is not set, an empty list is returned. A 

1923 list is always returned, and this method raises no exceptions. 

1924 

1925 Args: 

1926 name: The name of the query parameter to get. 

1927 

1928 Returns: 

1929 List[Optional[Text]]: A list of all the values associated with the 

1930 key, in string form. 

1931 """ 

1932 return [value for (key, value) in self.query if name == key] 

1933 

1934 def remove( 

1935 self, 

1936 name, # type: Text 

1937 value=_UNSET, # type: Text 

1938 limit=None, # type: Optional[int] 

1939 ): 

1940 # type: (...) -> URL 

1941 """Make a new :class:`URL` instance with occurrences of the query 

1942 parameter *name* removed, or, if *value* is set, parameters 

1943 matching *name* and *value*. No exception is raised if the 

1944 parameter is not already set. 

1945 

1946 Args: 

1947 name: The name of the query parameter to remove. 

1948 value: Optional value to additionally filter on. 

1949 Setting this removes query parameters which match both name 

1950 and value. 

1951 limit: Optional maximum number of parameters to remove. 

1952 

1953 Returns: 

1954 URL: A new :class:`URL` instance with the parameter removed. 

1955 """ 

1956 if limit is None: 

1957 if value is _UNSET: 

1958 nq = [(k, v) for (k, v) in self.query if k != name] 

1959 else: 

1960 nq = [ 

1961 (k, v) 

1962 for (k, v) in self.query 

1963 if not (k == name and v == value) 

1964 ] 

1965 else: 

1966 nq, removed_count = [], 0 

1967 

1968 for k, v in self.query: 

1969 if ( 

1970 k == name 

1971 and (value is _UNSET or v == value) 

1972 and removed_count < limit 

1973 ): 

1974 removed_count += 1 # drop it 

1975 else: 

1976 nq.append((k, v)) # keep it 

1977 

1978 return self.replace(query=nq) 

1979 

1980 

1981EncodedURL = URL # An alias better describing what the URL really is 

1982 

1983_EMPTY_URL = URL() 

1984 

1985 

1986def _replace_plus(text): 

1987 # type: (Text) -> Text 

1988 return text.replace("+", "%20") 

1989 

1990 

1991def _no_op(text): 

1992 # type: (Text) -> Text 

1993 return text 

1994 

1995 

1996class DecodedURL(object): 

1997 """ 

1998 :class:`DecodedURL` is a type designed to act as a higher-level 

1999 interface to :class:`URL` and the recommended type for most 

2000 operations. By analogy, :class:`DecodedURL` is the 

2001 :class:`unicode` to URL's :class:`bytes`. 

2002 

2003 :class:`DecodedURL` automatically handles encoding and decoding 

2004 all its components, such that all inputs and outputs are in a 

2005 maximally-decoded state. Note that this means, for some special 

2006 cases, a URL may not "roundtrip" character-for-character, but this 

2007 is considered a good tradeoff for the safety of automatic 

2008 encoding. 

2009 

2010 Otherwise, :class:`DecodedURL` has almost exactly the same API as 

2011 :class:`URL`. 

2012 

2013 Where applicable, a UTF-8 encoding is presumed. Be advised that 

2014 some interactions can raise :exc:`UnicodeEncodeErrors` and 

2015 :exc:`UnicodeDecodeErrors`, just like when working with 

2016 bytestrings. Examples of such interactions include handling query 

2017 strings encoding binary data, and paths containing segments with 

2018 special characters encoded with codecs other than UTF-8. 

2019 

2020 Args: 

2021 url: A :class:`URL` object to wrap. 

2022 lazy: Set to True to avoid pre-decode all parts of the URL to check for 

2023 validity. 

2024 Defaults to False. 

2025 query_plus_is_space: + characters in the query string should be treated 

2026 as spaces when decoding. If unspecified, the default is taken from 

2027 the scheme. 

2028 

2029 .. note:: 

2030 

2031 The :class:`DecodedURL` initializer takes a :class:`URL` object, 

2032 not URL components, like :class:`URL`. To programmatically 

2033 construct a :class:`DecodedURL`, you can use this pattern: 

2034 

2035 >>> print(DecodedURL().replace(scheme=u'https', 

2036 ... host=u'pypi.org', path=(u'projects', u'hyperlink')).to_text()) 

2037 https://pypi.org/projects/hyperlink 

2038 

2039 .. versionadded:: 18.0.0 

2040 """ 

2041 

2042 def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None): 

2043 # type: (URL, bool, Optional[bool]) -> None 

2044 self._url = url 

2045 if query_plus_is_space is None: 

2046 query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES 

2047 self._query_plus_is_space = query_plus_is_space 

2048 if not lazy: 

2049 # cache the following, while triggering any decoding 

2050 # issues with decodable fields 

2051 self.host, self.userinfo, self.path, self.query, self.fragment 

2052 return 

2053 

2054 @classmethod 

2055 def from_text(cls, text, lazy=False, query_plus_is_space=None): 

2056 # type: (Text, bool, Optional[bool]) -> DecodedURL 

2057 """\ 

2058 Make a `DecodedURL` instance from any text string containing a URL. 

2059 

2060 Args: 

2061 text: Text containing the URL 

2062 lazy: Whether to pre-decode all parts of the URL to check for 

2063 validity. 

2064 Defaults to True. 

2065 """ 

2066 _url = URL.from_text(text) 

2067 return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space) 

2068 

2069 @property 

2070 def encoded_url(self): 

2071 # type: () -> URL 

2072 """Access the underlying :class:`URL` object, which has any special 

2073 characters encoded. 

2074 """ 

2075 return self._url 

2076 

2077 def to_text(self, with_password=False): 

2078 # type: (bool) -> Text 

2079 "Passthrough to :meth:`~hyperlink.URL.to_text()`" 

2080 return self._url.to_text(with_password) 

2081 

2082 def to_uri(self): 

2083 # type: () -> URL 

2084 "Passthrough to :meth:`~hyperlink.URL.to_uri()`" 

2085 return self._url.to_uri() 

2086 

2087 def to_iri(self): 

2088 # type: () -> URL 

2089 "Passthrough to :meth:`~hyperlink.URL.to_iri()`" 

2090 return self._url.to_iri() 

2091 

2092 def _clone(self, url): 

2093 # type: (URL) -> DecodedURL 

2094 return self.__class__( 

2095 url, 

2096 # TODO: propagate laziness? 

2097 query_plus_is_space=self._query_plus_is_space, 

2098 ) 

2099 

2100 def click(self, href=u""): 

2101 # type: (Union[Text, URL, DecodedURL]) -> DecodedURL 

2102 """Return a new DecodedURL wrapping the result of 

2103 :meth:`~hyperlink.URL.click()` 

2104 """ 

2105 if isinstance(href, DecodedURL): 

2106 href = href._url 

2107 return self._clone( 

2108 self._url.click(href=href), 

2109 ) 

2110 

2111 def sibling(self, segment): 

2112 # type: (Text) -> DecodedURL 

2113 """Automatically encode any reserved characters in *segment* and 

2114 return a new `DecodedURL` wrapping the result of 

2115 :meth:`~hyperlink.URL.sibling()` 

2116 """ 

2117 return self._clone( 

2118 self._url.sibling(_encode_reserved(segment)), 

2119 ) 

2120 

2121 def child(self, *segments): 

2122 # type: (Text) -> DecodedURL 

2123 """Automatically encode any reserved characters in *segments* and 

2124 return a new `DecodedURL` wrapping the result of 

2125 :meth:`~hyperlink.URL.child()`. 

2126 """ 

2127 if not segments: 

2128 return self 

2129 new_segs = [_encode_reserved(s) for s in segments] 

2130 return self._clone(self._url.child(*new_segs)) 

2131 

2132 def normalize( 

2133 self, 

2134 scheme=True, 

2135 host=True, 

2136 path=True, 

2137 query=True, 

2138 fragment=True, 

2139 userinfo=True, 

2140 percents=True, 

2141 ): 

2142 # type: (bool, bool, bool, bool, bool, bool, bool) -> DecodedURL 

2143 """Return a new `DecodedURL` wrapping the result of 

2144 :meth:`~hyperlink.URL.normalize()` 

2145 """ 

2146 return self._clone( 

2147 self._url.normalize( 

2148 scheme, host, path, query, fragment, userinfo, percents 

2149 ) 

2150 ) 

2151 

2152 @property 

2153 def absolute(self): 

2154 # type: () -> bool 

2155 return self._url.absolute 

2156 

2157 @property 

2158 def scheme(self): 

2159 # type: () -> Text 

2160 return self._url.scheme 

2161 

2162 @property 

2163 def host(self): 

2164 # type: () -> Text 

2165 return _decode_host(self._url.host) 

2166 

2167 @property 

2168 def port(self): 

2169 # type: () -> Optional[int] 

2170 return self._url.port 

2171 

2172 @property 

2173 def rooted(self): 

2174 # type: () -> bool 

2175 return self._url.rooted 

2176 

2177 @property 

2178 def path(self): 

2179 # type: () -> Sequence[Text] 

2180 if not hasattr(self, "_path"): 

2181 self._path = tuple( 

2182 [ 

2183 _percent_decode(p, raise_subencoding_exc=True) 

2184 for p in self._url.path 

2185 ] 

2186 ) 

2187 return self._path 

2188 

2189 @property 

2190 def query(self): 

2191 # type: () -> QueryPairs 

2192 if not hasattr(self, "_query"): 

2193 if self._query_plus_is_space: 

2194 predecode = _replace_plus 

2195 else: 

2196 predecode = _no_op 

2197 

2198 self._query = cast( 

2199 QueryPairs, 

2200 tuple( 

2201 tuple( 

2202 _percent_decode( 

2203 predecode(x), raise_subencoding_exc=True 

2204 ) 

2205 if x is not None 

2206 else None 

2207 for x in (k, v) 

2208 ) 

2209 for k, v in self._url.query 

2210 ), 

2211 ) 

2212 return self._query 

2213 

2214 @property 

2215 def fragment(self): 

2216 # type: () -> Text 

2217 if not hasattr(self, "_fragment"): 

2218 frag = self._url.fragment 

2219 self._fragment = _percent_decode(frag, raise_subencoding_exc=True) 

2220 return self._fragment 

2221 

2222 @property 

2223 def userinfo(self): 

2224 # type: () -> Union[Tuple[str], Tuple[str, str]] 

2225 if not hasattr(self, "_userinfo"): 

2226 self._userinfo = cast( 

2227 Union[Tuple[str], Tuple[str, str]], 

2228 tuple( 

2229 tuple( 

2230 _percent_decode(p, raise_subencoding_exc=True) 

2231 for p in self._url.userinfo.split(":", 1) 

2232 ) 

2233 ), 

2234 ) 

2235 return self._userinfo 

2236 

2237 @property 

2238 def user(self): 

2239 # type: () -> Text 

2240 return self.userinfo[0] 

2241 

2242 @property 

2243 def uses_netloc(self): 

2244 # type: () -> Optional[bool] 

2245 return self._url.uses_netloc 

2246 

2247 def replace( 

2248 self, 

2249 scheme=_UNSET, # type: Optional[Text] 

2250 host=_UNSET, # type: Optional[Text] 

2251 path=_UNSET, # type: Iterable[Text] 

2252 query=_UNSET, # type: QueryParameters 

2253 fragment=_UNSET, # type: Text 

2254 port=_UNSET, # type: Optional[int] 

2255 rooted=_UNSET, # type: Optional[bool] 

2256 userinfo=_UNSET, # type: Union[Tuple[str], Tuple[str, str]] 

2257 uses_netloc=_UNSET, # type: Optional[bool] 

2258 ): 

2259 # type: (...) -> DecodedURL 

2260 """While the signature is the same, this `replace()` differs a little 

2261 from URL.replace. For instance, it accepts userinfo as a 

2262 tuple, not as a string, handling the case of having a username 

2263 containing a `:`. As with the rest of the methods on 

2264 DecodedURL, if you pass a reserved character, it will be 

2265 automatically encoded instead of an error being raised. 

2266 """ 

2267 if path is not _UNSET: 

2268 path = tuple(_encode_reserved(p) for p in path) 

2269 if query is not _UNSET: 

2270 query = cast( 

2271 QueryPairs, 

2272 tuple( 

2273 tuple( 

2274 _encode_reserved(x) if x is not None else None 

2275 for x in (k, v) 

2276 ) 

2277 for k, v in iter_pairs(query) 

2278 ), 

2279 ) 

2280 if userinfo is not _UNSET: 

2281 if len(userinfo) > 2: 

2282 raise ValueError( 

2283 'userinfo expected sequence of ["user"] or' 

2284 ' ["user", "password"], got %r' % (userinfo,) 

2285 ) 

2286 userinfo_text = u":".join([_encode_reserved(p) for p in userinfo]) 

2287 else: 

2288 userinfo_text = _UNSET 

2289 new_url = self._url.replace( 

2290 scheme=scheme, 

2291 host=host, 

2292 path=path, 

2293 query=query, 

2294 fragment=fragment, 

2295 port=port, 

2296 rooted=rooted, 

2297 userinfo=userinfo_text, 

2298 uses_netloc=uses_netloc, 

2299 ) 

2300 return self._clone(url=new_url) 

2301 

2302 def get(self, name): 

2303 # type: (Text) -> List[Optional[Text]] 

2304 "Get the value of all query parameters whose name matches *name*" 

2305 return [v for (k, v) in self.query if name == k] 

2306 

2307 def add(self, name, value=None): 

2308 # type: (Text, Optional[Text]) -> DecodedURL 

2309 """Return a new DecodedURL with the query parameter *name* and *value* 

2310 added.""" 

2311 return self.replace(query=self.query + ((name, value),)) 

2312 

2313 def set(self, name, value=None): 

2314 # type: (Text, Optional[Text]) -> DecodedURL 

2315 "Return a new DecodedURL with query parameter *name* set to *value*" 

2316 query = self.query 

2317 q = [(k, v) for (k, v) in query if k != name] 

2318 idx = next((i for (i, (k, v)) in enumerate(query) if k == name), -1) 

2319 q[idx:idx] = [(name, value)] 

2320 return self.replace(query=q) 

2321 

2322 def remove( 

2323 self, 

2324 name, # type: Text 

2325 value=_UNSET, # type: Text 

2326 limit=None, # type: Optional[int] 

2327 ): 

2328 # type: (...) -> DecodedURL 

2329 """Return a new DecodedURL with query parameter *name* removed. 

2330 

2331 Optionally also filter for *value*, as well as cap the number 

2332 of parameters removed with *limit*. 

2333 """ 

2334 if limit is None: 

2335 if value is _UNSET: 

2336 nq = [(k, v) for (k, v) in self.query if k != name] 

2337 else: 

2338 nq = [ 

2339 (k, v) 

2340 for (k, v) in self.query 

2341 if not (k == name and v == value) 

2342 ] 

2343 else: 

2344 nq, removed_count = [], 0 

2345 for k, v in self.query: 

2346 if ( 

2347 k == name 

2348 and (value is _UNSET or v == value) 

2349 and removed_count < limit 

2350 ): 

2351 removed_count += 1 # drop it 

2352 else: 

2353 nq.append((k, v)) # keep it 

2354 

2355 return self.replace(query=nq) 

2356 

2357 def __repr__(self): 

2358 # type: () -> str 

2359 cn = self.__class__.__name__ 

2360 return "%s(url=%r)" % (cn, self._url) 

2361 

2362 def __str__(self): 

2363 # type: () -> str 

2364 # TODO: the underlying URL's __str__ needs to change to make 

2365 # this work as the URL, see #55 

2366 return str(self._url) 

2367 

2368 def __eq__(self, other): 

2369 # type: (Any) -> bool 

2370 if not isinstance(other, self.__class__): 

2371 return NotImplemented 

2372 return self.normalize().to_uri() == other.normalize().to_uri() 

2373 

2374 def __ne__(self, other): 

2375 # type: (Any) -> bool 

2376 if not isinstance(other, self.__class__): 

2377 return NotImplemented 

2378 return not self.__eq__(other) 

2379 

2380 def __hash__(self): 

2381 # type: () -> int 

2382 return hash( 

2383 ( 

2384 self.__class__, 

2385 self.scheme, 

2386 self.userinfo, 

2387 self.host, 

2388 self.path, 

2389 self.query, 

2390 self.fragment, 

2391 self.port, 

2392 self.rooted, 

2393 self.uses_netloc, 

2394 ) 

2395 ) 

2396 

2397 # # Begin Twisted Compat Code 

2398 asURI = to_uri 

2399 asIRI = to_iri 

2400 

2401 @classmethod 

2402 def fromText(cls, s, lazy=False): 

2403 # type: (Text, bool) -> DecodedURL 

2404 return cls.from_text(s, lazy=lazy) 

2405 

2406 def asText(self, includeSecrets=False): 

2407 # type: (bool) -> Text 

2408 return self.to_text(with_password=includeSecrets) 

2409 

2410 def __dir__(self): 

2411 # type: () -> Sequence[Text] 

2412 try: 

2413 ret = object.__dir__(self) 

2414 except AttributeError: 

2415 # object.__dir__ == AttributeError # pdw for py2 

2416 ret = dir(self.__class__) + list(self.__dict__.keys()) 

2417 ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"])) 

2418 return ret 

2419 

2420 # # End Twisted Compat Code 

2421 

2422 

2423# Add some overloads so that parse gives a better return value. 

2424@overload 

2425def parse(url, decoded, lazy=False): 

2426 # type: (Text, Literal[False], bool) -> URL 

2427 """Passing decoded=False returns URL.""" 

2428 

2429 

2430@overload 

2431def parse(url, decoded=True, lazy=False): 

2432 # type: (Text, Literal[True], bool) -> DecodedURL 

2433 """Passing decoded=True (or the default value) returns DecodedURL.""" 

2434 

2435 

2436@overload 

2437def parse(url, decoded=True, lazy=False): 

2438 # type: (Text, bool, bool) -> Union[URL, DecodedURL] 

2439 """If decoded is not a literal we don't know the return type.""" 

2440 

2441 

2442def parse(url, decoded=True, lazy=False): 

2443 # type: (Text, bool, bool) -> Union[URL, DecodedURL] 

2444 """ 

2445 Automatically turn text into a structured URL object. 

2446 

2447 >>> url = parse(u"https://github.com/python-hyper/hyperlink") 

2448 >>> print(url.to_text()) 

2449 https://github.com/python-hyper/hyperlink 

2450 

2451 Args: 

2452 url: A text string representation of a URL. 

2453 

2454 decoded: Whether or not to return a :class:`DecodedURL`, 

2455 which automatically handles all 

2456 encoding/decoding/quoting/unquoting for all the various 

2457 accessors of parts of the URL, or a :class:`URL`, 

2458 which has the same API, but requires handling of special 

2459 characters for different parts of the URL. 

2460 

2461 lazy: In the case of `decoded=True`, this controls 

2462 whether the URL is decoded immediately or as accessed. The 

2463 default, `lazy=False`, checks all encoded parts of the URL 

2464 for decodability. 

2465 

2466 .. versionadded:: 18.0.0 

2467 """ 

2468 enc_url = EncodedURL.from_text(url) 

2469 if not decoded: 

2470 return enc_url 

2471 dec_url = DecodedURL(enc_url, lazy=lazy) 

2472 return dec_url