1# Copyright 2021, New York University and the TUF contributors
2# SPDX-License-Identifier: MIT OR Apache-2.0
3
4"""Provides an implementation of ``FetcherInterface`` using the urllib3 HTTP
5library.
6"""
7
8from __future__ import annotations
9
10import logging
11from typing import TYPE_CHECKING
12
13# Imports
14import urllib3
15
16import tuf
17from tuf.api import exceptions
18from tuf.ngclient._internal.proxy import ProxyEnvironment
19from tuf.ngclient.fetcher import FetcherInterface
20
21if TYPE_CHECKING:
22 from collections.abc import Iterator
23
24# Globals
25logger = logging.getLogger(__name__)
26
27
28# Classes
29class Urllib3Fetcher(FetcherInterface):
30 """An implementation of ``FetcherInterface`` based on the urllib3 library.
31
32 Attributes:
33 socket_timeout: Timeout in seconds, used for both initial connection
34 delay and the maximum delay between bytes received.
35 chunk_size: Chunk size in bytes used when downloading.
36 """
37
38 def __init__(
39 self,
40 socket_timeout: int = 30,
41 chunk_size: int = 400000,
42 app_user_agent: str | None = None,
43 ) -> None:
44 # Default settings
45 self.socket_timeout: int = socket_timeout # seconds
46 self.chunk_size: int = chunk_size # bytes
47
48 # Create User-Agent.
49 ua = f"python-tuf/{tuf.__version__}"
50 if app_user_agent is not None:
51 ua = f"{app_user_agent} {ua}"
52
53 self._proxy_env = ProxyEnvironment(headers={"User-Agent": ua})
54
55 def _fetch(self, url: str) -> Iterator[bytes]:
56 """Fetch the contents of HTTP/HTTPS url from a remote server.
57
58 Args:
59 url: URL string that represents a file location.
60
61 Raises:
62 exceptions.SlowRetrievalError: Timeout occurs while receiving
63 data.
64 exceptions.DownloadHTTPError: HTTP error code is received.
65
66 Returns:
67 Bytes iterator
68 """
69
70 # Defer downloading the response body with preload_content=False.
71 # Always set the timeout. This timeout value is interpreted by
72 # urllib3 as:
73 # - connect timeout (max delay before first byte is received)
74 # - read (gap) timeout (max delay between bytes received)
75 try:
76 response = self._proxy_env.request(
77 "GET",
78 url,
79 preload_content=False,
80 timeout=urllib3.Timeout(self.socket_timeout),
81 )
82 except urllib3.exceptions.MaxRetryError as e:
83 if isinstance(e.reason, urllib3.exceptions.TimeoutError):
84 raise exceptions.SlowRetrievalError from e
85
86 if response.status >= 400:
87 response.close()
88 raise exceptions.DownloadHTTPError(
89 f"HTTP error occurred with status {response.status}",
90 response.status,
91 )
92
93 return self._chunks(response)
94
95 def _chunks(
96 self, response: urllib3.response.BaseHTTPResponse
97 ) -> Iterator[bytes]:
98 """A generator function to be returned by fetch.
99
100 This way the caller of fetch can differentiate between connection
101 and actual data download.
102 """
103
104 try:
105 yield from response.stream(self.chunk_size)
106 except urllib3.exceptions.MaxRetryError as e:
107 if isinstance(e.reason, urllib3.exceptions.TimeoutError):
108 raise exceptions.SlowRetrievalError from e
109
110 finally:
111 response.release_conn()