Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/resumable_media/_download.py: 29%
136 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
1# Copyright 2017 Google Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15"""Virtual bases classes for downloading media from Google APIs."""
18import http.client
19import re
21from google.resumable_media import _helpers
22from google.resumable_media import common
25_CONTENT_RANGE_RE = re.compile(
26 r"bytes (?P<start_byte>\d+)-(?P<end_byte>\d+)/(?P<total_bytes>\d+)",
27 flags=re.IGNORECASE,
28)
29_ACCEPTABLE_STATUS_CODES = (http.client.OK, http.client.PARTIAL_CONTENT)
30_GET = "GET"
31_ZERO_CONTENT_RANGE_HEADER = "bytes */0"
34class DownloadBase(object):
35 """Base class for download helpers.
37 Defines core shared behavior across different download types.
39 Args:
40 media_url (str): The URL containing the media to be downloaded.
41 stream (IO[bytes]): A write-able stream (i.e. file-like object) that
42 the downloaded resource can be written to.
43 start (int): The first byte in a range to be downloaded.
44 end (int): The last byte in a range to be downloaded.
45 headers (Optional[Mapping[str, str]]): Extra headers that should
46 be sent with the request, e.g. headers for encrypted data.
48 Attributes:
49 media_url (str): The URL containing the media to be downloaded.
50 start (Optional[int]): The first byte in a range to be downloaded.
51 end (Optional[int]): The last byte in a range to be downloaded.
52 """
54 def __init__(self, media_url, stream=None, start=None, end=None, headers=None):
55 self.media_url = media_url
56 self._stream = stream
57 self.start = start
58 self.end = end
59 if headers is None:
60 headers = {}
61 self._headers = headers
62 self._finished = False
63 self._retry_strategy = common.RetryStrategy()
65 @property
66 def finished(self):
67 """bool: Flag indicating if the download has completed."""
68 return self._finished
70 @staticmethod
71 def _get_status_code(response):
72 """Access the status code from an HTTP response.
74 Args:
75 response (object): The HTTP response object.
77 Raises:
78 NotImplementedError: Always, since virtual.
79 """
80 raise NotImplementedError("This implementation is virtual.")
82 @staticmethod
83 def _get_headers(response):
84 """Access the headers from an HTTP response.
86 Args:
87 response (object): The HTTP response object.
89 Raises:
90 NotImplementedError: Always, since virtual.
91 """
92 raise NotImplementedError("This implementation is virtual.")
94 @staticmethod
95 def _get_body(response):
96 """Access the response body from an HTTP response.
98 Args:
99 response (object): The HTTP response object.
101 Raises:
102 NotImplementedError: Always, since virtual.
103 """
104 raise NotImplementedError("This implementation is virtual.")
107class Download(DownloadBase):
108 """Helper to manage downloading a resource from a Google API.
110 "Slices" of the resource can be retrieved by specifying a range
111 with ``start`` and / or ``end``. However, in typical usage, neither
112 ``start`` nor ``end`` is expected to be provided.
114 Args:
115 media_url (str): The URL containing the media to be downloaded.
116 stream (IO[bytes]): A write-able stream (i.e. file-like object) that
117 the downloaded resource can be written to.
118 start (int): The first byte in a range to be downloaded. If not
119 provided, but ``end`` is provided, will download from the
120 beginning to ``end`` of the media.
121 end (int): The last byte in a range to be downloaded. If not
122 provided, but ``start`` is provided, will download from the
123 ``start`` to the end of the media.
124 headers (Optional[Mapping[str, str]]): Extra headers that should
125 be sent with the request, e.g. headers for encrypted data.
126 checksum Optional([str]): The type of checksum to compute to verify
127 the integrity of the object. The response headers must contain
128 a checksum of the requested type. If the headers lack an
129 appropriate checksum (for instance in the case of transcoded or
130 ranged downloads where the remote service does not know the
131 correct checksum) an INFO-level log will be emitted. Supported
132 values are "md5", "crc32c" and None.
133 """
135 def __init__(
136 self, media_url, stream=None, start=None, end=None, headers=None, checksum="md5"
137 ):
138 super(Download, self).__init__(
139 media_url, stream=stream, start=start, end=end, headers=headers
140 )
141 self.checksum = checksum
142 self._bytes_downloaded = 0
143 self._expected_checksum = None
144 self._checksum_object = None
145 self._object_generation = None
147 def _prepare_request(self):
148 """Prepare the contents of an HTTP request.
150 This is everything that must be done before a request that doesn't
151 require network I/O (or other I/O). This is based on the `sans-I/O`_
152 philosophy.
154 Returns:
155 Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple
157 * HTTP verb for the request (always GET)
158 * the URL for the request
159 * the body of the request (always :data:`None`)
160 * headers for the request
162 Raises:
163 ValueError: If the current :class:`Download` has already
164 finished.
166 .. _sans-I/O: https://sans-io.readthedocs.io/
167 """
168 if self.finished:
169 raise ValueError("A download can only be used once.")
171 add_bytes_range(self.start, self.end, self._headers)
172 return _GET, self.media_url, None, self._headers
174 def _process_response(self, response):
175 """Process the response from an HTTP request.
177 This is everything that must be done after a request that doesn't
178 require network I/O (or other I/O). This is based on the `sans-I/O`_
179 philosophy.
181 Args:
182 response (object): The HTTP response object.
184 .. _sans-I/O: https://sans-io.readthedocs.io/
185 """
186 # Tombstone the current Download so it cannot be used again.
187 self._finished = True
188 _helpers.require_status_code(
189 response, _ACCEPTABLE_STATUS_CODES, self._get_status_code
190 )
192 def consume(self, transport, timeout=None):
193 """Consume the resource to be downloaded.
195 If a ``stream`` is attached to this download, then the downloaded
196 resource will be written to the stream.
198 Args:
199 transport (object): An object which can make authenticated
200 requests.
201 timeout (Optional[Union[float, Tuple[float, float]]]):
202 The number of seconds to wait for the server response.
203 Depending on the retry strategy, a request may be repeated
204 several times using the same timeout each time.
206 Can also be passed as a tuple (connect_timeout, read_timeout).
207 See :meth:`requests.Session.request` documentation for details.
209 Raises:
210 NotImplementedError: Always, since virtual.
211 """
212 raise NotImplementedError("This implementation is virtual.")
215class ChunkedDownload(DownloadBase):
216 """Download a resource in chunks from a Google API.
218 Args:
219 media_url (str): The URL containing the media to be downloaded.
220 chunk_size (int): The number of bytes to be retrieved in each
221 request.
222 stream (IO[bytes]): A write-able stream (i.e. file-like object) that
223 will be used to concatenate chunks of the resource as they are
224 downloaded.
225 start (int): The first byte in a range to be downloaded. If not
226 provided, defaults to ``0``.
227 end (int): The last byte in a range to be downloaded. If not
228 provided, will download to the end of the media.
229 headers (Optional[Mapping[str, str]]): Extra headers that should
230 be sent with each request, e.g. headers for data encryption
231 key headers.
233 Attributes:
234 media_url (str): The URL containing the media to be downloaded.
235 start (Optional[int]): The first byte in a range to be downloaded.
236 end (Optional[int]): The last byte in a range to be downloaded.
237 chunk_size (int): The number of bytes to be retrieved in each request.
239 Raises:
240 ValueError: If ``start`` is negative.
241 """
243 def __init__(self, media_url, chunk_size, stream, start=0, end=None, headers=None):
244 if start < 0:
245 raise ValueError(
246 "On a chunked download the starting " "value cannot be negative."
247 )
248 super(ChunkedDownload, self).__init__(
249 media_url, stream=stream, start=start, end=end, headers=headers
250 )
251 self.chunk_size = chunk_size
252 self._bytes_downloaded = 0
253 self._total_bytes = None
254 self._invalid = False
256 @property
257 def bytes_downloaded(self):
258 """int: Number of bytes that have been downloaded."""
259 return self._bytes_downloaded
261 @property
262 def total_bytes(self):
263 """Optional[int]: The total number of bytes to be downloaded."""
264 return self._total_bytes
266 @property
267 def invalid(self):
268 """bool: Indicates if the download is in an invalid state.
270 This will occur if a call to :meth:`consume_next_chunk` fails.
271 """
272 return self._invalid
274 def _get_byte_range(self):
275 """Determines the byte range for the next request.
277 Returns:
278 Tuple[int, int]: The pair of begin and end byte for the next
279 chunked request.
280 """
281 curr_start = self.start + self.bytes_downloaded
282 curr_end = curr_start + self.chunk_size - 1
283 # Make sure ``curr_end`` does not exceed ``end``.
284 if self.end is not None:
285 curr_end = min(curr_end, self.end)
286 # Make sure ``curr_end`` does not exceed ``total_bytes - 1``.
287 if self.total_bytes is not None:
288 curr_end = min(curr_end, self.total_bytes - 1)
289 return curr_start, curr_end
291 def _prepare_request(self):
292 """Prepare the contents of an HTTP request.
294 This is everything that must be done before a request that doesn't
295 require network I/O (or other I/O). This is based on the `sans-I/O`_
296 philosophy.
298 .. note:
300 This method will be used multiple times, so ``headers`` will
301 be mutated in between requests. However, we don't make a copy
302 since the same keys are being updated.
304 Returns:
305 Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple
307 * HTTP verb for the request (always GET)
308 * the URL for the request
309 * the body of the request (always :data:`None`)
310 * headers for the request
312 Raises:
313 ValueError: If the current download has finished.
314 ValueError: If the current download is invalid.
316 .. _sans-I/O: https://sans-io.readthedocs.io/
317 """
318 if self.finished:
319 raise ValueError("Download has finished.")
320 if self.invalid:
321 raise ValueError("Download is invalid and cannot be re-used.")
323 curr_start, curr_end = self._get_byte_range()
324 add_bytes_range(curr_start, curr_end, self._headers)
325 return _GET, self.media_url, None, self._headers
327 def _make_invalid(self):
328 """Simple setter for ``invalid``.
330 This is intended to be passed along as a callback to helpers that
331 raise an exception so they can mark this instance as invalid before
332 raising.
333 """
334 self._invalid = True
336 def _process_response(self, response):
337 """Process the response from an HTTP request.
339 This is everything that must be done after a request that doesn't
340 require network I/O. This is based on the `sans-I/O`_ philosophy.
342 For the time being, this **does require** some form of I/O to write
343 a chunk to ``stream``. However, this will (almost) certainly not be
344 network I/O.
346 Updates the current state after consuming a chunk. First,
347 increments ``bytes_downloaded`` by the number of bytes in the
348 ``content-length`` header.
350 If ``total_bytes`` is already set, this assumes (but does not check)
351 that we already have the correct value and doesn't bother to check
352 that it agrees with the headers.
354 We expect the **total** length to be in the ``content-range`` header,
355 but this header is only present on requests which sent the ``range``
356 header. This response header should be of the form
357 ``bytes {start}-{end}/{total}`` and ``{end} - {start} + 1``
358 should be the same as the ``Content-Length``.
360 Args:
361 response (object): The HTTP response object (need headers).
363 Raises:
364 ~google.resumable_media.common.InvalidResponse: If the number
365 of bytes in the body doesn't match the content length header.
367 .. _sans-I/O: https://sans-io.readthedocs.io/
368 """
369 # Verify the response before updating the current instance.
370 if _check_for_zero_content_range(
371 response, self._get_status_code, self._get_headers
372 ):
373 self._finished = True
374 return
376 _helpers.require_status_code(
377 response,
378 _ACCEPTABLE_STATUS_CODES,
379 self._get_status_code,
380 callback=self._make_invalid,
381 )
382 headers = self._get_headers(response)
383 response_body = self._get_body(response)
385 start_byte, end_byte, total_bytes = get_range_info(
386 response, self._get_headers, callback=self._make_invalid
387 )
389 transfer_encoding = headers.get("transfer-encoding")
391 if transfer_encoding is None:
392 content_length = _helpers.header_required(
393 response,
394 "content-length",
395 self._get_headers,
396 callback=self._make_invalid,
397 )
398 num_bytes = int(content_length)
399 if len(response_body) != num_bytes:
400 self._make_invalid()
401 raise common.InvalidResponse(
402 response,
403 "Response is different size than content-length",
404 "Expected",
405 num_bytes,
406 "Received",
407 len(response_body),
408 )
409 else:
410 # 'content-length' header not allowed with chunked encoding.
411 num_bytes = end_byte - start_byte + 1
413 # First update ``bytes_downloaded``.
414 self._bytes_downloaded += num_bytes
415 # If the end byte is past ``end`` or ``total_bytes - 1`` we are done.
416 if self.end is not None and end_byte >= self.end:
417 self._finished = True
418 elif end_byte >= total_bytes - 1:
419 self._finished = True
420 # NOTE: We only use ``total_bytes`` if not already known.
421 if self.total_bytes is None:
422 self._total_bytes = total_bytes
423 # Write the response body to the stream.
424 self._stream.write(response_body)
426 def consume_next_chunk(self, transport, timeout=None):
427 """Consume the next chunk of the resource to be downloaded.
429 Args:
430 transport (object): An object which can make authenticated
431 requests.
432 timeout (Optional[Union[float, Tuple[float, float]]]):
433 The number of seconds to wait for the server response.
434 Depending on the retry strategy, a request may be repeated
435 several times using the same timeout each time.
437 Can also be passed as a tuple (connect_timeout, read_timeout).
438 See :meth:`requests.Session.request` documentation for details.
440 Raises:
441 NotImplementedError: Always, since virtual.
442 """
443 raise NotImplementedError("This implementation is virtual.")
446def add_bytes_range(start, end, headers):
447 """Add a bytes range to a header dictionary.
449 Some possible inputs and the corresponding bytes ranges::
451 >>> headers = {}
452 >>> add_bytes_range(None, None, headers)
453 >>> headers
454 {}
455 >>> add_bytes_range(500, 999, headers)
456 >>> headers['range']
457 'bytes=500-999'
458 >>> add_bytes_range(None, 499, headers)
459 >>> headers['range']
460 'bytes=0-499'
461 >>> add_bytes_range(-500, None, headers)
462 >>> headers['range']
463 'bytes=-500'
464 >>> add_bytes_range(9500, None, headers)
465 >>> headers['range']
466 'bytes=9500-'
468 Args:
469 start (Optional[int]): The first byte in a range. Can be zero,
470 positive, negative or :data:`None`.
471 end (Optional[int]): The last byte in a range. Assumed to be
472 positive.
473 headers (Mapping[str, str]): A headers mapping which can have the
474 bytes range added if at least one of ``start`` or ``end``
475 is not :data:`None`.
476 """
477 if start is None:
478 if end is None:
479 # No range to add.
480 return
481 else:
482 # NOTE: This assumes ``end`` is non-negative.
483 bytes_range = "0-{:d}".format(end)
484 else:
485 if end is None:
486 if start < 0:
487 bytes_range = "{:d}".format(start)
488 else:
489 bytes_range = "{:d}-".format(start)
490 else:
491 # NOTE: This is invalid if ``start < 0``.
492 bytes_range = "{:d}-{:d}".format(start, end)
494 headers[_helpers.RANGE_HEADER] = "bytes=" + bytes_range
497def get_range_info(response, get_headers, callback=_helpers.do_nothing):
498 """Get the start, end and total bytes from a content range header.
500 Args:
501 response (object): An HTTP response object.
502 get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
503 from an HTTP response.
504 callback (Optional[Callable]): A callback that takes no arguments,
505 to be executed when an exception is being raised.
507 Returns:
508 Tuple[int, int, int]: The start byte, end byte and total bytes.
510 Raises:
511 ~google.resumable_media.common.InvalidResponse: If the
512 ``Content-Range`` header is not of the form
513 ``bytes {start}-{end}/{total}``.
514 """
515 content_range = _helpers.header_required(
516 response, _helpers.CONTENT_RANGE_HEADER, get_headers, callback=callback
517 )
518 match = _CONTENT_RANGE_RE.match(content_range)
519 if match is None:
520 callback()
521 raise common.InvalidResponse(
522 response,
523 "Unexpected content-range header",
524 content_range,
525 'Expected to be of the form "bytes {start}-{end}/{total}"',
526 )
528 return (
529 int(match.group("start_byte")),
530 int(match.group("end_byte")),
531 int(match.group("total_bytes")),
532 )
535def _check_for_zero_content_range(response, get_status_code, get_headers):
536 """Validate if response status code is 416 and content range is zero.
538 This is the special case for handling zero bytes files.
540 Args:
541 response (object): An HTTP response object.
542 get_status_code (Callable[Any, int]): Helper to get a status code
543 from a response.
544 get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
545 from an HTTP response.
547 Returns:
548 bool: True if content range total bytes is zero, false otherwise.
549 """
550 if get_status_code(response) == http.client.REQUESTED_RANGE_NOT_SATISFIABLE:
551 content_range = _helpers.header_required(
552 response,
553 _helpers.CONTENT_RANGE_HEADER,
554 get_headers,
555 callback=_helpers.do_nothing,
556 )
557 if content_range == _ZERO_CONTENT_RANGE_HEADER:
558 return True
559 return False