Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/httpx/_urls.py: 39%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

194 statements  

1from __future__ import annotations 

2 

3import typing 

4from urllib.parse import parse_qs, unquote, urlencode 

5 

6import idna 

7 

8from ._types import QueryParamTypes 

9from ._urlparse import urlparse 

10from ._utils import primitive_value_to_str 

11 

12__all__ = ["URL", "QueryParams"] 

13 

14 

15class URL: 

16 """ 

17 url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink") 

18 

19 assert url.scheme == "https" 

20 assert url.username == "jo@email.com" 

21 assert url.password == "a secret" 

22 assert url.userinfo == b"jo%40email.com:a%20secret" 

23 assert url.host == "müller.de" 

24 assert url.raw_host == b"xn--mller-kva.de" 

25 assert url.port == 1234 

26 assert url.netloc == b"xn--mller-kva.de:1234" 

27 assert url.path == "/pa th" 

28 assert url.query == b"?search=ab" 

29 assert url.raw_path == b"/pa%20th?search=ab" 

30 assert url.fragment == "anchorlink" 

31 

32 The components of a URL are broken down like this: 

33 

34 https://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink 

35 [scheme] [ username ] [password] [ host ][port][ path ] [ query ] [fragment] 

36 [ userinfo ] [ netloc ][ raw_path ] 

37 

38 Note that: 

39 

40 * `url.scheme` is normalized to always be lowercased. 

41 

42 * `url.host` is normalized to always be lowercased. Internationalized domain 

43 names are represented in unicode, without IDNA encoding applied. For instance: 

44 

45 url = httpx.URL("http://中国.icom.museum") 

46 assert url.host == "中国.icom.museum" 

47 url = httpx.URL("http://xn--fiqs8s.icom.museum") 

48 assert url.host == "中国.icom.museum" 

49 

50 * `url.raw_host` is normalized to always be lowercased, and is IDNA encoded. 

51 

52 url = httpx.URL("http://中国.icom.museum") 

53 assert url.raw_host == b"xn--fiqs8s.icom.museum" 

54 url = httpx.URL("http://xn--fiqs8s.icom.museum") 

55 assert url.raw_host == b"xn--fiqs8s.icom.museum" 

56 

57 * `url.port` is either None or an integer. URLs that include the default port for 

58 "http", "https", "ws", "wss", and "ftp" schemes have their port 

59 normalized to `None`. 

60 

61 assert httpx.URL("http://example.com") == httpx.URL("http://example.com:80") 

62 assert httpx.URL("http://example.com").port is None 

63 assert httpx.URL("http://example.com:80").port is None 

64 

65 * `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work 

66 with `url.username` and `url.password` instead, which handle the URL escaping. 

67 

68 * `url.raw_path` is raw bytes of both the path and query, without URL escaping. 

69 This portion is used as the target when constructing HTTP requests. Usually you'll 

70 want to work with `url.path` instead. 

71 

72 * `url.query` is raw bytes, without URL escaping. A URL query string portion can 

73 only be properly URL escaped when decoding the parameter names and values 

74 themselves. 

75 """ 

76 

77 def __init__(self, url: URL | str = "", **kwargs: typing.Any) -> None: 

78 if kwargs: 

79 allowed = { 

80 "scheme": str, 

81 "username": str, 

82 "password": str, 

83 "userinfo": bytes, 

84 "host": str, 

85 "port": int, 

86 "netloc": bytes, 

87 "path": str, 

88 "query": bytes, 

89 "raw_path": bytes, 

90 "fragment": str, 

91 "params": object, 

92 } 

93 

94 # Perform type checking for all supported keyword arguments. 

95 for key, value in kwargs.items(): 

96 if key not in allowed: 

97 message = f"{key!r} is an invalid keyword argument for URL()" 

98 raise TypeError(message) 

99 if value is not None and not isinstance(value, allowed[key]): 

100 expected = allowed[key].__name__ 

101 seen = type(value).__name__ 

102 message = f"Argument {key!r} must be {expected} but got {seen}" 

103 raise TypeError(message) 

104 if isinstance(value, bytes): 

105 kwargs[key] = value.decode("ascii") 

106 

107 if "params" in kwargs: 

108 # Replace any "params" keyword with the raw "query" instead. 

109 # 

110 # Ensure that empty params use `kwargs["query"] = None` rather 

111 # than `kwargs["query"] = ""`, so that generated URLs do not 

112 # include an empty trailing "?". 

113 params = kwargs.pop("params") 

114 kwargs["query"] = None if not params else str(QueryParams(params)) 

115 

116 if isinstance(url, str): 

117 self._uri_reference = urlparse(url, **kwargs) 

118 elif isinstance(url, URL): 

119 self._uri_reference = url._uri_reference.copy_with(**kwargs) 

120 else: 

121 raise TypeError( 

122 "Invalid type for url. Expected str or httpx.URL," 

123 f" got {type(url)}: {url!r}" 

124 ) 

125 

126 @property 

127 def scheme(self) -> str: 

128 """ 

129 The URL scheme, such as "http", "https". 

130 Always normalised to lowercase. 

131 """ 

132 return self._uri_reference.scheme 

133 

134 @property 

135 def raw_scheme(self) -> bytes: 

136 """ 

137 The raw bytes representation of the URL scheme, such as b"http", b"https". 

138 Always normalised to lowercase. 

139 """ 

140 return self._uri_reference.scheme.encode("ascii") 

141 

142 @property 

143 def userinfo(self) -> bytes: 

144 """ 

145 The URL userinfo as a raw bytestring. 

146 For example: b"jo%40email.com:a%20secret". 

147 """ 

148 return self._uri_reference.userinfo.encode("ascii") 

149 

150 @property 

151 def username(self) -> str: 

152 """ 

153 The URL username as a string, with URL decoding applied. 

154 For example: "jo@email.com" 

155 """ 

156 userinfo = self._uri_reference.userinfo 

157 return unquote(userinfo.partition(":")[0]) 

158 

159 @property 

160 def password(self) -> str: 

161 """ 

162 The URL password as a string, with URL decoding applied. 

163 For example: "a secret" 

164 """ 

165 userinfo = self._uri_reference.userinfo 

166 return unquote(userinfo.partition(":")[2]) 

167 

168 @property 

169 def host(self) -> str: 

170 """ 

171 The URL host as a string. 

172 Always normalized to lowercase, with IDNA hosts decoded into unicode. 

173 

174 Examples: 

175 

176 url = httpx.URL("http://www.EXAMPLE.org") 

177 assert url.host == "www.example.org" 

178 

179 url = httpx.URL("http://中国.icom.museum") 

180 assert url.host == "中国.icom.museum" 

181 

182 url = httpx.URL("http://xn--fiqs8s.icom.museum") 

183 assert url.host == "中国.icom.museum" 

184 

185 url = httpx.URL("https://[::ffff:192.168.0.1]") 

186 assert url.host == "::ffff:192.168.0.1" 

187 """ 

188 host: str = self._uri_reference.host 

189 

190 if host.startswith("xn--"): 

191 host = idna.decode(host) 

192 

193 return host 

194 

195 @property 

196 def raw_host(self) -> bytes: 

197 """ 

198 The raw bytes representation of the URL host. 

199 Always normalized to lowercase, and IDNA encoded. 

200 

201 Examples: 

202 

203 url = httpx.URL("http://www.EXAMPLE.org") 

204 assert url.raw_host == b"www.example.org" 

205 

206 url = httpx.URL("http://中国.icom.museum") 

207 assert url.raw_host == b"xn--fiqs8s.icom.museum" 

208 

209 url = httpx.URL("http://xn--fiqs8s.icom.museum") 

210 assert url.raw_host == b"xn--fiqs8s.icom.museum" 

211 

212 url = httpx.URL("https://[::ffff:192.168.0.1]") 

213 assert url.raw_host == b"::ffff:192.168.0.1" 

214 """ 

215 return self._uri_reference.host.encode("ascii") 

216 

217 @property 

218 def port(self) -> int | None: 

219 """ 

220 The URL port as an integer. 

221 

222 Note that the URL class performs port normalization as per the WHATWG spec. 

223 Default ports for "http", "https", "ws", "wss", and "ftp" schemes are always 

224 treated as `None`. 

225 

226 For example: 

227 

228 assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80") 

229 assert httpx.URL("http://www.example.com:80").port is None 

230 """ 

231 return self._uri_reference.port 

232 

233 @property 

234 def netloc(self) -> bytes: 

235 """ 

236 Either `<host>` or `<host>:<port>` as bytes. 

237 Always normalized to lowercase, and IDNA encoded. 

238 

239 This property may be used for generating the value of a request 

240 "Host" header. 

241 """ 

242 return self._uri_reference.netloc.encode("ascii") 

243 

244 @property 

245 def path(self) -> str: 

246 """ 

247 The URL path as a string. Excluding the query string, and URL decoded. 

248 

249 For example: 

250 

251 url = httpx.URL("https://example.com/pa%20th") 

252 assert url.path == "/pa th" 

253 """ 

254 path = self._uri_reference.path or "/" 

255 return unquote(path) 

256 

257 @property 

258 def query(self) -> bytes: 

259 """ 

260 The URL query string, as raw bytes, excluding the leading b"?". 

261 

262 This is necessarily a bytewise interface, because we cannot 

263 perform URL decoding of this representation until we've parsed 

264 the keys and values into a QueryParams instance. 

265 

266 For example: 

267 

268 url = httpx.URL("https://example.com/?filter=some%20search%20terms") 

269 assert url.query == b"filter=some%20search%20terms" 

270 """ 

271 query = self._uri_reference.query or "" 

272 return query.encode("ascii") 

273 

274 @property 

275 def params(self) -> QueryParams: 

276 """ 

277 The URL query parameters, neatly parsed and packaged into an immutable 

278 multidict representation. 

279 """ 

280 return QueryParams(self._uri_reference.query) 

281 

282 @property 

283 def raw_path(self) -> bytes: 

284 """ 

285 The complete URL path and query string as raw bytes. 

286 Used as the target when constructing HTTP requests. 

287 

288 For example: 

289 

290 GET /users?search=some%20text HTTP/1.1 

291 Host: www.example.org 

292 Connection: close 

293 """ 

294 path = self._uri_reference.path or "/" 

295 if self._uri_reference.query is not None: 

296 path += "?" + self._uri_reference.query 

297 return path.encode("ascii") 

298 

299 @property 

300 def fragment(self) -> str: 

301 """ 

302 The URL fragments, as used in HTML anchors. 

303 As a string, without the leading '#'. 

304 """ 

305 return unquote(self._uri_reference.fragment or "") 

306 

307 @property 

308 def is_absolute_url(self) -> bool: 

309 """ 

310 Return `True` for absolute URLs such as 'http://example.com/path', 

311 and `False` for relative URLs such as '/path'. 

312 """ 

313 # We don't use `.is_absolute` from `rfc3986` because it treats 

314 # URLs with a fragment portion as not absolute. 

315 # What we actually care about is if the URL provides 

316 # a scheme and hostname to which connections should be made. 

317 return bool(self._uri_reference.scheme and self._uri_reference.host) 

318 

319 @property 

320 def is_relative_url(self) -> bool: 

321 """ 

322 Return `False` for absolute URLs such as 'http://example.com/path', 

323 and `True` for relative URLs such as '/path'. 

324 """ 

325 return not self.is_absolute_url 

326 

327 def copy_with(self, **kwargs: typing.Any) -> URL: 

328 """ 

329 Copy this URL, returning a new URL with some components altered. 

330 Accepts the same set of parameters as the components that are made 

331 available via properties on the `URL` class. 

332 

333 For example: 

334 

335 url = httpx.URL("https://www.example.com").copy_with( 

336 username="jo@gmail.com", password="a secret" 

337 ) 

338 assert url == "https://jo%40email.com:a%20secret@www.example.com" 

339 """ 

340 return URL(self, **kwargs) 

341 

342 def copy_set_param(self, key: str, value: typing.Any = None) -> URL: 

343 return self.copy_with(params=self.params.set(key, value)) 

344 

345 def copy_add_param(self, key: str, value: typing.Any = None) -> URL: 

346 return self.copy_with(params=self.params.add(key, value)) 

347 

348 def copy_remove_param(self, key: str) -> URL: 

349 return self.copy_with(params=self.params.remove(key)) 

350 

351 def copy_merge_params(self, params: QueryParamTypes) -> URL: 

352 return self.copy_with(params=self.params.merge(params)) 

353 

354 def join(self, url: URL | str) -> URL: 

355 """ 

356 Return an absolute URL, using this URL as the base. 

357 

358 Eg. 

359 

360 url = httpx.URL("https://www.example.com/test") 

361 url = url.join("/new/path") 

362 assert url == "https://www.example.com/new/path" 

363 """ 

364 from urllib.parse import urljoin 

365 

366 return URL(urljoin(str(self), str(URL(url)))) 

367 

368 def __hash__(self) -> int: 

369 return hash(str(self)) 

370 

371 def __eq__(self, other: typing.Any) -> bool: 

372 return isinstance(other, (URL, str)) and str(self) == str(URL(other)) 

373 

374 def __str__(self) -> str: 

375 return str(self._uri_reference) 

376 

377 def __repr__(self) -> str: 

378 scheme, userinfo, host, port, path, query, fragment = self._uri_reference 

379 

380 if ":" in userinfo: 

381 # Mask any password component. 

382 userinfo = f'{userinfo.split(":")[0]}:[secure]' 

383 

384 authority = "".join( 

385 [ 

386 f"{userinfo}@" if userinfo else "", 

387 f"[{host}]" if ":" in host else host, 

388 f":{port}" if port is not None else "", 

389 ] 

390 ) 

391 url = "".join( 

392 [ 

393 f"{self.scheme}:" if scheme else "", 

394 f"//{authority}" if authority else "", 

395 path, 

396 f"?{query}" if query is not None else "", 

397 f"#{fragment}" if fragment is not None else "", 

398 ] 

399 ) 

400 

401 return f"{self.__class__.__name__}({url!r})" 

402 

403 @property 

404 def raw(self) -> tuple[bytes, bytes, int, bytes]: # pragma: nocover 

405 import collections 

406 import warnings 

407 

408 warnings.warn("URL.raw is deprecated.") 

409 RawURL = collections.namedtuple( 

410 "RawURL", ["raw_scheme", "raw_host", "port", "raw_path"] 

411 ) 

412 return RawURL( 

413 raw_scheme=self.raw_scheme, 

414 raw_host=self.raw_host, 

415 port=self.port, 

416 raw_path=self.raw_path, 

417 ) 

418 

419 

420class QueryParams(typing.Mapping[str, str]): 

421 """ 

422 URL query parameters, as a multi-dict. 

423 """ 

424 

425 def __init__(self, *args: QueryParamTypes | None, **kwargs: typing.Any) -> None: 

426 assert len(args) < 2, "Too many arguments." 

427 assert not (args and kwargs), "Cannot mix named and unnamed arguments." 

428 

429 value = args[0] if args else kwargs 

430 

431 if value is None or isinstance(value, (str, bytes)): 

432 value = value.decode("ascii") if isinstance(value, bytes) else value 

433 self._dict = parse_qs(value, keep_blank_values=True) 

434 elif isinstance(value, QueryParams): 

435 self._dict = {k: list(v) for k, v in value._dict.items()} 

436 else: 

437 dict_value: dict[typing.Any, list[typing.Any]] = {} 

438 if isinstance(value, (list, tuple)): 

439 # Convert list inputs like: 

440 # [("a", "123"), ("a", "456"), ("b", "789")] 

441 # To a dict representation, like: 

442 # {"a": ["123", "456"], "b": ["789"]} 

443 for item in value: 

444 dict_value.setdefault(item[0], []).append(item[1]) 

445 else: 

446 # Convert dict inputs like: 

447 # {"a": "123", "b": ["456", "789"]} 

448 # To dict inputs where values are always lists, like: 

449 # {"a": ["123"], "b": ["456", "789"]} 

450 dict_value = { 

451 k: list(v) if isinstance(v, (list, tuple)) else [v] 

452 for k, v in value.items() 

453 } 

454 

455 # Ensure that keys and values are neatly coerced to strings. 

456 # We coerce values `True` and `False` to JSON-like "true" and "false" 

457 # representations, and coerce `None` values to the empty string. 

458 self._dict = { 

459 str(k): [primitive_value_to_str(item) for item in v] 

460 for k, v in dict_value.items() 

461 } 

462 

463 def keys(self) -> typing.KeysView[str]: 

464 """ 

465 Return all the keys in the query params. 

466 

467 Usage: 

468 

469 q = httpx.QueryParams("a=123&a=456&b=789") 

470 assert list(q.keys()) == ["a", "b"] 

471 """ 

472 return self._dict.keys() 

473 

474 def values(self) -> typing.ValuesView[str]: 

475 """ 

476 Return all the values in the query params. If a key occurs more than once 

477 only the first item for that key is returned. 

478 

479 Usage: 

480 

481 q = httpx.QueryParams("a=123&a=456&b=789") 

482 assert list(q.values()) == ["123", "789"] 

483 """ 

484 return {k: v[0] for k, v in self._dict.items()}.values() 

485 

486 def items(self) -> typing.ItemsView[str, str]: 

487 """ 

488 Return all items in the query params. If a key occurs more than once 

489 only the first item for that key is returned. 

490 

491 Usage: 

492 

493 q = httpx.QueryParams("a=123&a=456&b=789") 

494 assert list(q.items()) == [("a", "123"), ("b", "789")] 

495 """ 

496 return {k: v[0] for k, v in self._dict.items()}.items() 

497 

498 def multi_items(self) -> list[tuple[str, str]]: 

499 """ 

500 Return all items in the query params. Allow duplicate keys to occur. 

501 

502 Usage: 

503 

504 q = httpx.QueryParams("a=123&a=456&b=789") 

505 assert list(q.multi_items()) == [("a", "123"), ("a", "456"), ("b", "789")] 

506 """ 

507 multi_items: list[tuple[str, str]] = [] 

508 for k, v in self._dict.items(): 

509 multi_items.extend([(k, i) for i in v]) 

510 return multi_items 

511 

512 def get(self, key: typing.Any, default: typing.Any = None) -> typing.Any: 

513 """ 

514 Get a value from the query param for a given key. If the key occurs 

515 more than once, then only the first value is returned. 

516 

517 Usage: 

518 

519 q = httpx.QueryParams("a=123&a=456&b=789") 

520 assert q.get("a") == "123" 

521 """ 

522 if key in self._dict: 

523 return self._dict[str(key)][0] 

524 return default 

525 

526 def get_list(self, key: str) -> list[str]: 

527 """ 

528 Get all values from the query param for a given key. 

529 

530 Usage: 

531 

532 q = httpx.QueryParams("a=123&a=456&b=789") 

533 assert q.get_list("a") == ["123", "456"] 

534 """ 

535 return list(self._dict.get(str(key), [])) 

536 

537 def set(self, key: str, value: typing.Any = None) -> QueryParams: 

538 """ 

539 Return a new QueryParams instance, setting the value of a key. 

540 

541 Usage: 

542 

543 q = httpx.QueryParams("a=123") 

544 q = q.set("a", "456") 

545 assert q == httpx.QueryParams("a=456") 

546 """ 

547 q = QueryParams() 

548 q._dict = dict(self._dict) 

549 q._dict[str(key)] = [primitive_value_to_str(value)] 

550 return q 

551 

552 def add(self, key: str, value: typing.Any = None) -> QueryParams: 

553 """ 

554 Return a new QueryParams instance, setting or appending the value of a key. 

555 

556 Usage: 

557 

558 q = httpx.QueryParams("a=123") 

559 q = q.add("a", "456") 

560 assert q == httpx.QueryParams("a=123&a=456") 

561 """ 

562 q = QueryParams() 

563 q._dict = dict(self._dict) 

564 q._dict[str(key)] = q.get_list(key) + [primitive_value_to_str(value)] 

565 return q 

566 

567 def remove(self, key: str) -> QueryParams: 

568 """ 

569 Return a new QueryParams instance, removing the value of a key. 

570 

571 Usage: 

572 

573 q = httpx.QueryParams("a=123") 

574 q = q.remove("a") 

575 assert q == httpx.QueryParams("") 

576 """ 

577 q = QueryParams() 

578 q._dict = dict(self._dict) 

579 q._dict.pop(str(key), None) 

580 return q 

581 

582 def merge(self, params: QueryParamTypes | None = None) -> QueryParams: 

583 """ 

584 Return a new QueryParams instance, updated with. 

585 

586 Usage: 

587 

588 q = httpx.QueryParams("a=123") 

589 q = q.merge({"b": "456"}) 

590 assert q == httpx.QueryParams("a=123&b=456") 

591 

592 q = httpx.QueryParams("a=123") 

593 q = q.merge({"a": "456", "b": "789"}) 

594 assert q == httpx.QueryParams("a=456&b=789") 

595 """ 

596 q = QueryParams(params) 

597 q._dict = {**self._dict, **q._dict} 

598 return q 

599 

600 def __getitem__(self, key: typing.Any) -> str: 

601 return self._dict[key][0] 

602 

603 def __contains__(self, key: typing.Any) -> bool: 

604 return key in self._dict 

605 

606 def __iter__(self) -> typing.Iterator[typing.Any]: 

607 return iter(self.keys()) 

608 

609 def __len__(self) -> int: 

610 return len(self._dict) 

611 

612 def __bool__(self) -> bool: 

613 return bool(self._dict) 

614 

615 def __hash__(self) -> int: 

616 return hash(str(self)) 

617 

618 def __eq__(self, other: typing.Any) -> bool: 

619 if not isinstance(other, self.__class__): 

620 return False 

621 return sorted(self.multi_items()) == sorted(other.multi_items()) 

622 

623 def __str__(self) -> str: 

624 return urlencode(self.multi_items()) 

625 

626 def __repr__(self) -> str: 

627 class_name = self.__class__.__name__ 

628 query_string = str(self) 

629 return f"{class_name}({query_string!r})" 

630 

631 def update(self, params: QueryParamTypes | None = None) -> None: 

632 raise RuntimeError( 

633 "QueryParams are immutable since 0.18.0. " 

634 "Use `q = q.merge(...)` to create an updated copy." 

635 ) 

636 

637 def __setitem__(self, key: str, value: str) -> None: 

638 raise RuntimeError( 

639 "QueryParams are immutable since 0.18.0. " 

640 "Use `q = q.set(key, value)` to create an updated copy." 

641 )