Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/resumable_media/requests/__init__.py: 9%
11 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
1# Copyright 2017 Google Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15"""``requests`` utilities for Google Media Downloads and Resumable Uploads.
17This sub-package assumes callers will use the `requests`_ library
18as transport and `google-auth`_ for sending authenticated HTTP traffic
19with ``requests``.
21.. _requests: http://docs.python-requests.org/
22.. _google-auth: https://google-auth.readthedocs.io/
24====================
25Authorized Transport
26====================
28To use ``google-auth`` and ``requests`` to create an authorized transport
29that has read-only access to Google Cloud Storage (GCS):
31.. testsetup:: get-credentials
33 import google.auth
34 import google.auth.credentials as creds_mod
35 import mock
37 def mock_default(scopes=None):
38 credentials = mock.Mock(spec=creds_mod.Credentials)
39 return credentials, 'mock-project'
41 # Patch the ``default`` function on the module.
42 original_default = google.auth.default
43 google.auth.default = mock_default
45.. doctest:: get-credentials
47 >>> import google.auth
48 >>> import google.auth.transport.requests as tr_requests
49 >>>
50 >>> ro_scope = 'https://www.googleapis.com/auth/devstorage.read_only'
51 >>> credentials, _ = google.auth.default(scopes=(ro_scope,))
52 >>> transport = tr_requests.AuthorizedSession(credentials)
53 >>> transport
54 <google.auth.transport.requests.AuthorizedSession object at 0x...>
56.. testcleanup:: get-credentials
58 # Put back the correct ``default`` function on the module.
59 google.auth.default = original_default
61================
62Simple Downloads
63================
65To download an object from Google Cloud Storage, construct the media URL
66for the GCS object and download it with an authorized transport that has
67access to the resource:
69.. testsetup:: basic-download
71 import mock
72 import requests
73 import http.client
75 bucket = 'bucket-foo'
76 blob_name = 'file.txt'
78 fake_response = requests.Response()
79 fake_response.status_code = int(http.client.OK)
80 fake_response.headers['Content-Length'] = '1364156'
81 fake_content = mock.MagicMock(spec=['__len__'])
82 fake_content.__len__.return_value = 1364156
83 fake_response._content = fake_content
85 get_method = mock.Mock(return_value=fake_response, spec=[])
86 transport = mock.Mock(request=get_method, spec=['request'])
88.. doctest:: basic-download
90 >>> from google.resumable_media.requests import Download
91 >>>
92 >>> url_template = (
93 ... 'https://www.googleapis.com/download/storage/v1/b/'
94 ... '{bucket}/o/{blob_name}?alt=media')
95 >>> media_url = url_template.format(
96 ... bucket=bucket, blob_name=blob_name)
97 >>>
98 >>> download = Download(media_url)
99 >>> response = download.consume(transport)
100 >>> download.finished
101 True
102 >>> response
103 <Response [200]>
104 >>> response.headers['Content-Length']
105 '1364156'
106 >>> len(response.content)
107 1364156
109To download only a portion of the bytes in the object,
110specify ``start`` and ``end`` byte positions (both optional):
112.. testsetup:: basic-download-with-slice
114 import mock
115 import requests
116 import http.client
118 from google.resumable_media.requests import Download
120 media_url = 'http://test.invalid'
121 start = 4096
122 end = 8191
123 slice_size = end - start + 1
125 fake_response = requests.Response()
126 fake_response.status_code = int(http.client.PARTIAL_CONTENT)
127 fake_response.headers['Content-Length'] = '{:d}'.format(slice_size)
128 content_range = 'bytes {:d}-{:d}/1364156'.format(start, end)
129 fake_response.headers['Content-Range'] = content_range
130 fake_content = mock.MagicMock(spec=['__len__'])
131 fake_content.__len__.return_value = slice_size
132 fake_response._content = fake_content
134 get_method = mock.Mock(return_value=fake_response, spec=[])
135 transport = mock.Mock(request=get_method, spec=['request'])
137.. doctest:: basic-download-with-slice
139 >>> download = Download(media_url, start=4096, end=8191)
140 >>> response = download.consume(transport)
141 >>> download.finished
142 True
143 >>> response
144 <Response [206]>
145 >>> response.headers['Content-Length']
146 '4096'
147 >>> response.headers['Content-Range']
148 'bytes 4096-8191/1364156'
149 >>> len(response.content)
150 4096
152=================
153Chunked Downloads
154=================
156For very large objects or objects of unknown size, it may make more sense
157to download the object in chunks rather than all at once. This can be done
158to avoid dropped connections with a poor internet connection or can allow
159multiple chunks to be downloaded in parallel to speed up the total
160download.
162A :class:`.ChunkedDownload` uses the same media URL and authorized
163transport that a basic :class:`.Download` would use, but also
164requires a chunk size and a write-able byte ``stream``. The chunk size is used
165to determine how much of the resouce to consume with each request and the
166stream is to allow the resource to be written out (e.g. to disk) without
167having to fit in memory all at once.
169.. testsetup:: chunked-download
171 import io
173 import mock
174 import requests
175 import http.client
177 media_url = 'http://test.invalid'
179 fifty_mb = 50 * 1024 * 1024
180 one_gb = 1024 * 1024 * 1024
181 fake_response = requests.Response()
182 fake_response.status_code = int(http.client.PARTIAL_CONTENT)
183 fake_response.headers['Content-Length'] = '{:d}'.format(fifty_mb)
184 content_range = 'bytes 0-{:d}/{:d}'.format(fifty_mb - 1, one_gb)
185 fake_response.headers['Content-Range'] = content_range
186 fake_content_begin = b'The beginning of the chunk...'
187 fake_content = fake_content_begin + b'1' * (fifty_mb - 29)
188 fake_response._content = fake_content
190 get_method = mock.Mock(return_value=fake_response, spec=[])
191 transport = mock.Mock(request=get_method, spec=['request'])
193.. doctest:: chunked-download
195 >>> from google.resumable_media.requests import ChunkedDownload
196 >>>
197 >>> chunk_size = 50 * 1024 * 1024 # 50MB
198 >>> stream = io.BytesIO()
199 >>> download = ChunkedDownload(
200 ... media_url, chunk_size, stream)
201 >>> # Check the state of the download before starting.
202 >>> download.bytes_downloaded
203 0
204 >>> download.total_bytes is None
205 True
206 >>> response = download.consume_next_chunk(transport)
207 >>> # Check the state of the download after consuming one chunk.
208 >>> download.finished
209 False
210 >>> download.bytes_downloaded # chunk_size
211 52428800
212 >>> download.total_bytes # 1GB
213 1073741824
214 >>> response
215 <Response [206]>
216 >>> response.headers['Content-Length']
217 '52428800'
218 >>> response.headers['Content-Range']
219 'bytes 0-52428799/1073741824'
220 >>> len(response.content) == chunk_size
221 True
222 >>> stream.seek(0)
223 0
224 >>> stream.read(29)
225 b'The beginning of the chunk...'
227The download will change it's ``finished`` status to :data:`True`
228once the final chunk is consumed. In some cases, the final chunk may
229not be the same size as the other chunks:
231.. testsetup:: chunked-download-end
233 import mock
234 import requests
235 import http.client
237 from google.resumable_media.requests import ChunkedDownload
239 media_url = 'http://test.invalid'
241 fifty_mb = 50 * 1024 * 1024
242 one_gb = 1024 * 1024 * 1024
243 stream = mock.Mock(spec=['write'])
244 download = ChunkedDownload(media_url, fifty_mb, stream)
245 download._bytes_downloaded = 20 * fifty_mb
246 download._total_bytes = one_gb
248 fake_response = requests.Response()
249 fake_response.status_code = int(http.client.PARTIAL_CONTENT)
250 slice_size = one_gb - 20 * fifty_mb
251 fake_response.headers['Content-Length'] = '{:d}'.format(slice_size)
252 content_range = 'bytes {:d}-{:d}/{:d}'.format(
253 20 * fifty_mb, one_gb - 1, one_gb)
254 fake_response.headers['Content-Range'] = content_range
255 fake_content = mock.MagicMock(spec=['__len__'])
256 fake_content.__len__.return_value = slice_size
257 fake_response._content = fake_content
259 get_method = mock.Mock(return_value=fake_response, spec=[])
260 transport = mock.Mock(request=get_method, spec=['request'])
262.. doctest:: chunked-download-end
264 >>> # The state of the download in progress.
265 >>> download.finished
266 False
267 >>> download.bytes_downloaded # 20 chunks at 50MB
268 1048576000
269 >>> download.total_bytes # 1GB
270 1073741824
271 >>> response = download.consume_next_chunk(transport)
272 >>> # The state of the download after consuming the final chunk.
273 >>> download.finished
274 True
275 >>> download.bytes_downloaded == download.total_bytes
276 True
277 >>> response
278 <Response [206]>
279 >>> response.headers['Content-Length']
280 '25165824'
281 >>> response.headers['Content-Range']
282 'bytes 1048576000-1073741823/1073741824'
283 >>> len(response.content) < download.chunk_size
284 True
286In addition, a :class:`.ChunkedDownload` can also take optional
287``start`` and ``end`` byte positions.
289Usually, no checksum is returned with a chunked download. Even if one is returned,
290it is not validated. If you need to validate the checksum, you can do so
291by buffering the chunks and validating the checksum against the completed download.
293==============
294Simple Uploads
295==============
297Among the three supported upload classes, the simplest is
298:class:`.SimpleUpload`. A simple upload should be used when the resource
299being uploaded is small and when there is no metadata (other than the name)
300associated with the resource.
302.. testsetup:: simple-upload
304 import json
306 import mock
307 import requests
308 import http.client
310 bucket = 'some-bucket'
311 blob_name = 'file.txt'
313 fake_response = requests.Response()
314 fake_response.status_code = int(http.client.OK)
315 payload = {
316 'bucket': bucket,
317 'contentType': 'text/plain',
318 'md5Hash': 'M0XLEsX9/sMdiI+4pB4CAQ==',
319 'name': blob_name,
320 'size': '27',
321 }
322 fake_response._content = json.dumps(payload).encode('utf-8')
324 post_method = mock.Mock(return_value=fake_response, spec=[])
325 transport = mock.Mock(request=post_method, spec=['request'])
327.. doctest:: simple-upload
328 :options: +NORMALIZE_WHITESPACE
330 >>> from google.resumable_media.requests import SimpleUpload
331 >>>
332 >>> url_template = (
333 ... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
334 ... 'uploadType=media&'
335 ... 'name={blob_name}')
336 >>> upload_url = url_template.format(
337 ... bucket=bucket, blob_name=blob_name)
338 >>>
339 >>> upload = SimpleUpload(upload_url)
340 >>> data = b'Some not too large content.'
341 >>> content_type = 'text/plain'
342 >>> response = upload.transmit(transport, data, content_type)
343 >>> upload.finished
344 True
345 >>> response
346 <Response [200]>
347 >>> json_response = response.json()
348 >>> json_response['bucket'] == bucket
349 True
350 >>> json_response['name'] == blob_name
351 True
352 >>> json_response['contentType'] == content_type
353 True
354 >>> json_response['md5Hash']
355 'M0XLEsX9/sMdiI+4pB4CAQ=='
356 >>> int(json_response['size']) == len(data)
357 True
359In the rare case that an upload fails, an :exc:`.InvalidResponse`
360will be raised:
362.. testsetup:: simple-upload-fail
364 import time
366 import mock
367 import requests
368 import http.client
370 from google import resumable_media
371 from google.resumable_media import _helpers
372 from google.resumable_media.requests import SimpleUpload as constructor
374 upload_url = 'http://test.invalid'
375 data = b'Some not too large content.'
376 content_type = 'text/plain'
378 fake_response = requests.Response()
379 fake_response.status_code = int(http.client.SERVICE_UNAVAILABLE)
381 post_method = mock.Mock(return_value=fake_response, spec=[])
382 transport = mock.Mock(request=post_method, spec=['request'])
384 time_sleep = time.sleep
385 def dont_sleep(seconds):
386 raise RuntimeError('No sleep', seconds)
388 def SimpleUpload(*args, **kwargs):
389 upload = constructor(*args, **kwargs)
390 # Mock the cumulative sleep to avoid retries (and `time.sleep()`).
391 upload._retry_strategy = resumable_media.RetryStrategy(
392 max_cumulative_retry=-1.0)
393 return upload
395 time.sleep = dont_sleep
397.. doctest:: simple-upload-fail
398 :options: +NORMALIZE_WHITESPACE
400 >>> upload = SimpleUpload(upload_url)
401 >>> error = None
402 >>> try:
403 ... upload.transmit(transport, data, content_type)
404 ... except resumable_media.InvalidResponse as caught_exc:
405 ... error = caught_exc
406 ...
407 >>> error
408 InvalidResponse('Request failed with status code', 503,
409 'Expected one of', <HTTPStatus.OK: 200>)
410 >>> error.response
411 <Response [503]>
412 >>>
413 >>> upload.finished
414 True
416.. testcleanup:: simple-upload-fail
418 # Put back the correct ``sleep`` function on the ``time`` module.
419 time.sleep = time_sleep
421Even in the case of failure, we see that the upload is
422:attr:`~.SimpleUpload.finished`, i.e. it cannot be re-used.
424=================
425Multipart Uploads
426=================
428After the simple upload, the :class:`.MultipartUpload` can be used to
429achieve essentially the same task. However, a multipart upload allows some
430metadata about the resource to be sent along as well. (This is the "multi":
431we send a first part with the metadata and a second part with the actual
432bytes in the resource.)
434Usage is similar to the simple upload, but :meth:`~.MultipartUpload.transmit`
435accepts an extra required argument: ``metadata``.
437.. testsetup:: multipart-upload
439 import json
441 import mock
442 import requests
443 import http.client
445 bucket = 'some-bucket'
446 blob_name = 'file.txt'
447 data = b'Some not too large content.'
448 content_type = 'text/plain'
450 fake_response = requests.Response()
451 fake_response.status_code = int(http.client.OK)
452 payload = {
453 'bucket': bucket,
454 'name': blob_name,
455 'metadata': {'color': 'grurple'},
456 }
457 fake_response._content = json.dumps(payload).encode('utf-8')
459 post_method = mock.Mock(return_value=fake_response, spec=[])
460 transport = mock.Mock(request=post_method, spec=['request'])
462.. doctest:: multipart-upload
464 >>> from google.resumable_media.requests import MultipartUpload
465 >>>
466 >>> url_template = (
467 ... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
468 ... 'uploadType=multipart')
469 >>> upload_url = url_template.format(bucket=bucket)
470 >>>
471 >>> upload = MultipartUpload(upload_url)
472 >>> metadata = {
473 ... 'name': blob_name,
474 ... 'metadata': {
475 ... 'color': 'grurple',
476 ... },
477 ... }
478 >>> response = upload.transmit(transport, data, metadata, content_type)
479 >>> upload.finished
480 True
481 >>> response
482 <Response [200]>
483 >>> json_response = response.json()
484 >>> json_response['bucket'] == bucket
485 True
486 >>> json_response['name'] == blob_name
487 True
488 >>> json_response['metadata'] == metadata['metadata']
489 True
491As with the simple upload, in the case of failure an :exc:`.InvalidResponse`
492is raised, enclosing the :attr:`~.InvalidResponse.response` that caused
493the failure and the ``upload`` object cannot be re-used after a failure.
495=================
496Resumable Uploads
497=================
499A :class:`.ResumableUpload` deviates from the other two upload classes:
500it transmits a resource over the course of multiple requests. This
501is intended to be used in cases where:
503* the size of the resource is not known (i.e. it is generated on the fly)
504* requests must be short-lived
505* the client has request **size** limitations
506* the resource is too large to fit into memory
508In general, a resource should be sent in a **single** request to avoid
509latency and reduce QPS. See `GCS best practices`_ for more things to
510consider when using a resumable upload.
512.. _GCS best practices: https://cloud.google.com/storage/docs/\
513 best-practices#uploading
515After creating a :class:`.ResumableUpload` instance, a
516**resumable upload session** must be initiated to let the server know that
517a series of chunked upload requests will be coming and to obtain an
518``upload_id`` for the session. In contrast to the other two upload classes,
519:meth:`~.ResumableUpload.initiate` takes a byte ``stream`` as input rather
520than raw bytes as ``data``. This can be a file object, a :class:`~io.BytesIO`
521object or any other stream implementing the same interface.
523.. testsetup:: resumable-initiate
525 import io
527 import mock
528 import requests
529 import http.client
531 bucket = 'some-bucket'
532 blob_name = 'file.txt'
533 data = b'Some resumable bytes.'
534 content_type = 'text/plain'
536 fake_response = requests.Response()
537 fake_response.status_code = int(http.client.OK)
538 fake_response._content = b''
539 upload_id = 'ABCdef189XY_super_serious'
540 resumable_url_template = (
541 'https://www.googleapis.com/upload/storage/v1/b/{bucket}'
542 '/o?uploadType=resumable&upload_id={upload_id}')
543 resumable_url = resumable_url_template.format(
544 bucket=bucket, upload_id=upload_id)
545 fake_response.headers['location'] = resumable_url
546 fake_response.headers['x-guploader-uploadid'] = upload_id
548 post_method = mock.Mock(return_value=fake_response, spec=[])
549 transport = mock.Mock(request=post_method, spec=['request'])
551.. doctest:: resumable-initiate
553 >>> from google.resumable_media.requests import ResumableUpload
554 >>>
555 >>> url_template = (
556 ... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
557 ... 'uploadType=resumable')
558 >>> upload_url = url_template.format(bucket=bucket)
559 >>>
560 >>> chunk_size = 1024 * 1024 # 1MB
561 >>> upload = ResumableUpload(upload_url, chunk_size)
562 >>> stream = io.BytesIO(data)
563 >>> # The upload doesn't know how "big" it is until seeing a stream.
564 >>> upload.total_bytes is None
565 True
566 >>> metadata = {'name': blob_name}
567 >>> response = upload.initiate(transport, stream, metadata, content_type)
568 >>> response
569 <Response [200]>
570 >>> upload.resumable_url == response.headers['Location']
571 True
572 >>> upload.total_bytes == len(data)
573 True
574 >>> upload_id = response.headers['X-GUploader-UploadID']
575 >>> upload_id
576 'ABCdef189XY_super_serious'
577 >>> upload.resumable_url == upload_url + '&upload_id=' + upload_id
578 True
580Once a :class:`.ResumableUpload` has been initiated, the resource is
581transmitted in chunks until completion:
583.. testsetup:: resumable-transmit
585 import io
586 import json
588 import mock
589 import requests
590 import http.client
592 from google import resumable_media
593 import google.resumable_media.requests.upload as upload_mod
595 data = b'01234567891'
596 stream = io.BytesIO(data)
597 # Create an "already initiated" upload.
598 upload_url = 'http://test.invalid'
599 chunk_size = 256 * 1024 # 256KB
600 upload = upload_mod.ResumableUpload(upload_url, chunk_size)
601 upload._resumable_url = 'http://test.invalid?upload_id=mocked'
602 upload._stream = stream
603 upload._content_type = 'text/plain'
604 upload._total_bytes = len(data)
606 # After-the-fact update the chunk size so that len(data)
607 # is split into three.
608 upload._chunk_size = 4
609 # Make three fake responses.
610 fake_response0 = requests.Response()
611 fake_response0.status_code = http.client.PERMANENT_REDIRECT
612 fake_response0.headers['range'] = 'bytes=0-3'
614 fake_response1 = requests.Response()
615 fake_response1.status_code = http.client.PERMANENT_REDIRECT
616 fake_response1.headers['range'] = 'bytes=0-7'
618 fake_response2 = requests.Response()
619 fake_response2.status_code = int(http.client.OK)
620 bucket = 'some-bucket'
621 blob_name = 'file.txt'
622 payload = {
623 'bucket': bucket,
624 'name': blob_name,
625 'size': '{:d}'.format(len(data)),
626 }
627 fake_response2._content = json.dumps(payload).encode('utf-8')
629 # Use the fake responses to mock a transport.
630 responses = [fake_response0, fake_response1, fake_response2]
631 put_method = mock.Mock(side_effect=responses, spec=[])
632 transport = mock.Mock(request=put_method, spec=['request'])
634.. doctest:: resumable-transmit
636 >>> response0 = upload.transmit_next_chunk(transport)
637 >>> response0
638 <Response [308]>
639 >>> upload.finished
640 False
641 >>> upload.bytes_uploaded == upload.chunk_size
642 True
643 >>>
644 >>> response1 = upload.transmit_next_chunk(transport)
645 >>> response1
646 <Response [308]>
647 >>> upload.finished
648 False
649 >>> upload.bytes_uploaded == 2 * upload.chunk_size
650 True
651 >>>
652 >>> response2 = upload.transmit_next_chunk(transport)
653 >>> response2
654 <Response [200]>
655 >>> upload.finished
656 True
657 >>> upload.bytes_uploaded == upload.total_bytes
658 True
659 >>> json_response = response2.json()
660 >>> json_response['bucket'] == bucket
661 True
662 >>> json_response['name'] == blob_name
663 True
664"""
665from google.resumable_media.requests.download import ChunkedDownload
666from google.resumable_media.requests.download import Download
667from google.resumable_media.requests.upload import MultipartUpload
668from google.resumable_media.requests.download import RawChunkedDownload
669from google.resumable_media.requests.download import RawDownload
670from google.resumable_media.requests.upload import ResumableUpload
671from google.resumable_media.requests.upload import SimpleUpload
672from google.resumable_media.requests.upload import XMLMPUContainer
673from google.resumable_media.requests.upload import XMLMPUPart
675__all__ = [
676 "ChunkedDownload",
677 "Download",
678 "MultipartUpload",
679 "RawChunkedDownload",
680 "RawDownload",
681 "ResumableUpload",
682 "SimpleUpload",
683 "XMLMPUContainer",
684 "XMLMPUPart",
685]