1from __future__ import annotations
2
3import base64
4import ssl
5import typing
6import urllib.parse
7
8# Functions for typechecking...
9
10
11ByteOrStr = typing.Union[bytes, str]
12HeadersAsSequence = typing.Sequence[typing.Tuple[ByteOrStr, ByteOrStr]]
13HeadersAsMapping = typing.Mapping[ByteOrStr, ByteOrStr]
14HeaderTypes = typing.Union[HeadersAsSequence, HeadersAsMapping, None]
15
16Extensions = typing.MutableMapping[str, typing.Any]
17
18
19def enforce_bytes(value: bytes | str, *, name: str) -> bytes:
20 """
21 Any arguments that are ultimately represented as bytes can be specified
22 either as bytes or as strings.
23
24 However we enforce that any string arguments must only contain characters in
25 the plain ASCII range. chr(0)...chr(127). If you need to use characters
26 outside that range then be precise, and use a byte-wise argument.
27 """
28 if isinstance(value, str):
29 try:
30 return value.encode("ascii")
31 except UnicodeEncodeError:
32 raise TypeError(f"{name} strings may not include unicode characters.")
33 elif isinstance(value, bytes):
34 return value
35
36 seen_type = type(value).__name__
37 raise TypeError(f"{name} must be bytes or str, but got {seen_type}.")
38
39
40def enforce_url(value: URL | bytes | str, *, name: str) -> URL:
41 """
42 Type check for URL parameters.
43 """
44 if isinstance(value, (bytes, str)):
45 return URL(value)
46 elif isinstance(value, URL):
47 return value
48
49 seen_type = type(value).__name__
50 raise TypeError(f"{name} must be a URL, bytes, or str, but got {seen_type}.")
51
52
53def enforce_headers(
54 value: HeadersAsMapping | HeadersAsSequence | None = None, *, name: str
55) -> list[tuple[bytes, bytes]]:
56 """
57 Convienence function that ensure all items in request or response headers
58 are either bytes or strings in the plain ASCII range.
59 """
60 if value is None:
61 return []
62 elif isinstance(value, typing.Mapping):
63 return [
64 (
65 enforce_bytes(k, name="header name"),
66 enforce_bytes(v, name="header value"),
67 )
68 for k, v in value.items()
69 ]
70 elif isinstance(value, typing.Sequence):
71 return [
72 (
73 enforce_bytes(k, name="header name"),
74 enforce_bytes(v, name="header value"),
75 )
76 for k, v in value
77 ]
78
79 seen_type = type(value).__name__
80 raise TypeError(
81 f"{name} must be a mapping or sequence of two-tuples, but got {seen_type}."
82 )
83
84
85def enforce_stream(
86 value: bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes] | None,
87 *,
88 name: str,
89) -> typing.Iterable[bytes] | typing.AsyncIterable[bytes]:
90 if value is None:
91 return ByteStream(b"")
92 elif isinstance(value, bytes):
93 return ByteStream(value)
94 return value
95
96
97# * https://tools.ietf.org/html/rfc3986#section-3.2.3
98# * https://url.spec.whatwg.org/#url-miscellaneous
99# * https://url.spec.whatwg.org/#scheme-state
100DEFAULT_PORTS = {
101 b"ftp": 21,
102 b"http": 80,
103 b"https": 443,
104 b"ws": 80,
105 b"wss": 443,
106}
107
108
109def include_request_headers(
110 headers: list[tuple[bytes, bytes]],
111 *,
112 url: "URL",
113 content: None | bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes],
114) -> list[tuple[bytes, bytes]]:
115 headers_set = set(k.lower() for k, v in headers)
116
117 if b"host" not in headers_set:
118 default_port = DEFAULT_PORTS.get(url.scheme)
119 if url.port is None or url.port == default_port:
120 header_value = url.host
121 else:
122 header_value = b"%b:%d" % (url.host, url.port)
123 headers = [(b"Host", header_value)] + headers
124
125 if (
126 content is not None
127 and b"content-length" not in headers_set
128 and b"transfer-encoding" not in headers_set
129 ):
130 if isinstance(content, bytes):
131 content_length = str(len(content)).encode("ascii")
132 headers += [(b"Content-Length", content_length)]
133 else:
134 headers += [(b"Transfer-Encoding", b"chunked")] # pragma: nocover
135
136 return headers
137
138
139# Interfaces for byte streams...
140
141
142class ByteStream:
143 """
144 A container for non-streaming content, and that supports both sync and async
145 stream iteration.
146 """
147
148 def __init__(self, content: bytes) -> None:
149 self._content = content
150
151 def __iter__(self) -> typing.Iterator[bytes]:
152 yield self._content
153
154 async def __aiter__(self) -> typing.AsyncIterator[bytes]:
155 yield self._content
156
157 def __repr__(self) -> str:
158 return f"<{self.__class__.__name__} [{len(self._content)} bytes]>"
159
160
161class Origin:
162 def __init__(self, scheme: bytes, host: bytes, port: int) -> None:
163 self.scheme = scheme
164 self.host = host
165 self.port = port
166
167 def __eq__(self, other: typing.Any) -> bool:
168 return (
169 isinstance(other, Origin)
170 and self.scheme == other.scheme
171 and self.host == other.host
172 and self.port == other.port
173 )
174
175 def __str__(self) -> str:
176 scheme = self.scheme.decode("ascii")
177 host = self.host.decode("ascii")
178 port = str(self.port)
179 return f"{scheme}://{host}:{port}"
180
181
182class URL:
183 """
184 Represents the URL against which an HTTP request may be made.
185
186 The URL may either be specified as a plain string, for convienence:
187
188 ```python
189 url = httpcore.URL("https://www.example.com/")
190 ```
191
192 Or be constructed with explicitily pre-parsed components:
193
194 ```python
195 url = httpcore.URL(scheme=b'https', host=b'www.example.com', port=None, target=b'/')
196 ```
197
198 Using this second more explicit style allows integrations that are using
199 `httpcore` to pass through URLs that have already been parsed in order to use
200 libraries such as `rfc-3986` rather than relying on the stdlib. It also ensures
201 that URL parsing is treated identically at both the networking level and at any
202 higher layers of abstraction.
203
204 The four components are important here, as they allow the URL to be precisely
205 specified in a pre-parsed format. They also allow certain types of request to
206 be created that could not otherwise be expressed.
207
208 For example, an HTTP request to `http://www.example.com/` forwarded via a proxy
209 at `http://localhost:8080`...
210
211 ```python
212 # Constructs an HTTP request with a complete URL as the target:
213 # GET https://www.example.com/ HTTP/1.1
214 url = httpcore.URL(
215 scheme=b'http',
216 host=b'localhost',
217 port=8080,
218 target=b'https://www.example.com/'
219 )
220 request = httpcore.Request(
221 method="GET",
222 url=url
223 )
224 ```
225
226 Another example is constructing an `OPTIONS *` request...
227
228 ```python
229 # Constructs an 'OPTIONS *' HTTP request:
230 # OPTIONS * HTTP/1.1
231 url = httpcore.URL(scheme=b'https', host=b'www.example.com', target=b'*')
232 request = httpcore.Request(method="OPTIONS", url=url)
233 ```
234
235 This kind of request is not possible to formulate with a URL string,
236 because the `/` delimiter is always used to demark the target from the
237 host/port portion of the URL.
238
239 For convenience, string-like arguments may be specified either as strings or
240 as bytes. However, once a request is being issue over-the-wire, the URL
241 components are always ultimately required to be a bytewise representation.
242
243 In order to avoid any ambiguity over character encodings, when strings are used
244 as arguments, they must be strictly limited to the ASCII range `chr(0)`-`chr(127)`.
245 If you require a bytewise representation that is outside this range you must
246 handle the character encoding directly, and pass a bytes instance.
247 """
248
249 def __init__(
250 self,
251 url: bytes | str = "",
252 *,
253 scheme: bytes | str = b"",
254 host: bytes | str = b"",
255 port: int | None = None,
256 target: bytes | str = b"",
257 ) -> None:
258 """
259 Parameters:
260 url: The complete URL as a string or bytes.
261 scheme: The URL scheme as a string or bytes.
262 Typically either `"http"` or `"https"`.
263 host: The URL host as a string or bytes. Such as `"www.example.com"`.
264 port: The port to connect to. Either an integer or `None`.
265 target: The target of the HTTP request. Such as `"/items?search=red"`.
266 """
267 if url:
268 parsed = urllib.parse.urlparse(enforce_bytes(url, name="url"))
269 self.scheme = parsed.scheme
270 self.host = parsed.hostname or b""
271 self.port = parsed.port
272 self.target = (parsed.path or b"/") + (
273 b"?" + parsed.query if parsed.query else b""
274 )
275 else:
276 self.scheme = enforce_bytes(scheme, name="scheme")
277 self.host = enforce_bytes(host, name="host")
278 self.port = port
279 self.target = enforce_bytes(target, name="target")
280
281 @property
282 def origin(self) -> Origin:
283 default_port = {
284 b"http": 80,
285 b"https": 443,
286 b"ws": 80,
287 b"wss": 443,
288 b"socks5": 1080,
289 b"socks5h": 1080,
290 }[self.scheme]
291 return Origin(
292 scheme=self.scheme, host=self.host, port=self.port or default_port
293 )
294
295 def __eq__(self, other: typing.Any) -> bool:
296 return (
297 isinstance(other, URL)
298 and other.scheme == self.scheme
299 and other.host == self.host
300 and other.port == self.port
301 and other.target == self.target
302 )
303
304 def __bytes__(self) -> bytes:
305 if self.port is None:
306 return b"%b://%b%b" % (self.scheme, self.host, self.target)
307 return b"%b://%b:%d%b" % (self.scheme, self.host, self.port, self.target)
308
309 def __repr__(self) -> str:
310 return (
311 f"{self.__class__.__name__}(scheme={self.scheme!r}, "
312 f"host={self.host!r}, port={self.port!r}, target={self.target!r})"
313 )
314
315
316class Request:
317 """
318 An HTTP request.
319 """
320
321 def __init__(
322 self,
323 method: bytes | str,
324 url: URL | bytes | str,
325 *,
326 headers: HeaderTypes = None,
327 content: bytes
328 | typing.Iterable[bytes]
329 | typing.AsyncIterable[bytes]
330 | None = None,
331 extensions: Extensions | None = None,
332 ) -> None:
333 """
334 Parameters:
335 method: The HTTP request method, either as a string or bytes.
336 For example: `GET`.
337 url: The request URL, either as a `URL` instance, or as a string or bytes.
338 For example: `"https://www.example.com".`
339 headers: The HTTP request headers.
340 content: The content of the request body.
341 extensions: A dictionary of optional extra information included on
342 the request. Possible keys include `"timeout"`, and `"trace"`.
343 """
344 self.method: bytes = enforce_bytes(method, name="method")
345 self.url: URL = enforce_url(url, name="url")
346 self.headers: list[tuple[bytes, bytes]] = enforce_headers(
347 headers, name="headers"
348 )
349 self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = (
350 enforce_stream(content, name="content")
351 )
352 self.extensions = {} if extensions is None else extensions
353
354 if "target" in self.extensions:
355 self.url = URL(
356 scheme=self.url.scheme,
357 host=self.url.host,
358 port=self.url.port,
359 target=self.extensions["target"],
360 )
361
362 def __repr__(self) -> str:
363 return f"<{self.__class__.__name__} [{self.method!r}]>"
364
365
366class Response:
367 """
368 An HTTP response.
369 """
370
371 def __init__(
372 self,
373 status: int,
374 *,
375 headers: HeaderTypes = None,
376 content: bytes
377 | typing.Iterable[bytes]
378 | typing.AsyncIterable[bytes]
379 | None = None,
380 extensions: Extensions | None = None,
381 ) -> None:
382 """
383 Parameters:
384 status: The HTTP status code of the response. For example `200`.
385 headers: The HTTP response headers.
386 content: The content of the response body.
387 extensions: A dictionary of optional extra information included on
388 the responseself.Possible keys include `"http_version"`,
389 `"reason_phrase"`, and `"network_stream"`.
390 """
391 self.status: int = status
392 self.headers: list[tuple[bytes, bytes]] = enforce_headers(
393 headers, name="headers"
394 )
395 self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = (
396 enforce_stream(content, name="content")
397 )
398 self.extensions = {} if extensions is None else extensions
399
400 self._stream_consumed = False
401
402 @property
403 def content(self) -> bytes:
404 if not hasattr(self, "_content"):
405 if isinstance(self.stream, typing.Iterable):
406 raise RuntimeError(
407 "Attempted to access 'response.content' on a streaming response. "
408 "Call 'response.read()' first."
409 )
410 else:
411 raise RuntimeError(
412 "Attempted to access 'response.content' on a streaming response. "
413 "Call 'await response.aread()' first."
414 )
415 return self._content
416
417 def __repr__(self) -> str:
418 return f"<{self.__class__.__name__} [{self.status}]>"
419
420 # Sync interface...
421
422 def read(self) -> bytes:
423 if not isinstance(self.stream, typing.Iterable): # pragma: nocover
424 raise RuntimeError(
425 "Attempted to read an asynchronous response using 'response.read()'. "
426 "You should use 'await response.aread()' instead."
427 )
428 if not hasattr(self, "_content"):
429 self._content = b"".join([part for part in self.iter_stream()])
430 return self._content
431
432 def iter_stream(self) -> typing.Iterator[bytes]:
433 if not isinstance(self.stream, typing.Iterable): # pragma: nocover
434 raise RuntimeError(
435 "Attempted to stream an asynchronous response using 'for ... in "
436 "response.iter_stream()'. "
437 "You should use 'async for ... in response.aiter_stream()' instead."
438 )
439 if self._stream_consumed:
440 raise RuntimeError(
441 "Attempted to call 'for ... in response.iter_stream()' more than once."
442 )
443 self._stream_consumed = True
444 for chunk in self.stream:
445 yield chunk
446
447 def close(self) -> None:
448 if not isinstance(self.stream, typing.Iterable): # pragma: nocover
449 raise RuntimeError(
450 "Attempted to close an asynchronous response using 'response.close()'. "
451 "You should use 'await response.aclose()' instead."
452 )
453 if hasattr(self.stream, "close"):
454 self.stream.close()
455
456 # Async interface...
457
458 async def aread(self) -> bytes:
459 if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover
460 raise RuntimeError(
461 "Attempted to read an synchronous response using "
462 "'await response.aread()'. "
463 "You should use 'response.read()' instead."
464 )
465 if not hasattr(self, "_content"):
466 self._content = b"".join([part async for part in self.aiter_stream()])
467 return self._content
468
469 async def aiter_stream(self) -> typing.AsyncIterator[bytes]:
470 if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover
471 raise RuntimeError(
472 "Attempted to stream an synchronous response using 'async for ... in "
473 "response.aiter_stream()'. "
474 "You should use 'for ... in response.iter_stream()' instead."
475 )
476 if self._stream_consumed:
477 raise RuntimeError(
478 "Attempted to call 'async for ... in response.aiter_stream()' "
479 "more than once."
480 )
481 self._stream_consumed = True
482 async for chunk in self.stream:
483 yield chunk
484
485 async def aclose(self) -> None:
486 if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover
487 raise RuntimeError(
488 "Attempted to close a synchronous response using "
489 "'await response.aclose()'. "
490 "You should use 'response.close()' instead."
491 )
492 if hasattr(self.stream, "aclose"):
493 await self.stream.aclose()
494
495
496class Proxy:
497 def __init__(
498 self,
499 url: URL | bytes | str,
500 auth: tuple[bytes | str, bytes | str] | None = None,
501 headers: HeadersAsMapping | HeadersAsSequence | None = None,
502 ssl_context: ssl.SSLContext | None = None,
503 ):
504 self.url = enforce_url(url, name="url")
505 self.headers = enforce_headers(headers, name="headers")
506 self.ssl_context = ssl_context
507
508 if auth is not None:
509 username = enforce_bytes(auth[0], name="auth")
510 password = enforce_bytes(auth[1], name="auth")
511 userpass = username + b":" + password
512 authorization = b"Basic " + base64.b64encode(userpass)
513 self.auth: tuple[bytes, bytes] | None = (username, password)
514 self.headers = [(b"Proxy-Authorization", authorization)] + self.headers
515 else:
516 self.auth = None