1# Copyright 2017 Google Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""``requests`` utilities for Google Media Downloads and Resumable Uploads.
16
17This sub-package assumes callers will use the `requests`_ library
18as transport and `google-auth`_ for sending authenticated HTTP traffic
19with ``requests``.
20
21.. _requests: http://docs.python-requests.org/
22.. _google-auth: https://google-auth.readthedocs.io/
23
24====================
25Authorized Transport
26====================
27
28To use ``google-auth`` and ``requests`` to create an authorized transport
29that has read-only access to Google Cloud Storage (GCS):
30
31.. testsetup:: get-credentials
32
33 import google.auth
34 import google.auth.credentials as creds_mod
35 import mock
36
37 def mock_default(scopes=None):
38 credentials = mock.Mock(spec=creds_mod.Credentials)
39 return credentials, 'mock-project'
40
41 # Patch the ``default`` function on the module.
42 original_default = google.auth.default
43 google.auth.default = mock_default
44
45.. doctest:: get-credentials
46
47 >>> import google.auth
48 >>> import google.auth.transport.requests as tr_requests
49 >>>
50 >>> ro_scope = 'https://www.googleapis.com/auth/devstorage.read_only'
51 >>> credentials, _ = google.auth.default(scopes=(ro_scope,))
52 >>> transport = tr_requests.AuthorizedSession(credentials)
53 >>> transport
54 <google.auth.transport.requests.AuthorizedSession object at 0x...>
55
56.. testcleanup:: get-credentials
57
58 # Put back the correct ``default`` function on the module.
59 google.auth.default = original_default
60
61================
62Simple Downloads
63================
64
65To download an object from Google Cloud Storage, construct the media URL
66for the GCS object and download it with an authorized transport that has
67access to the resource:
68
69.. testsetup:: basic-download
70
71 import mock
72 import requests
73 import http.client
74
75 bucket = 'bucket-foo'
76 blob_name = 'file.txt'
77
78 fake_response = requests.Response()
79 fake_response.status_code = int(http.client.OK)
80 fake_response.headers['Content-Length'] = '1364156'
81 fake_content = mock.MagicMock(spec=['__len__'])
82 fake_content.__len__.return_value = 1364156
83 fake_response._content = fake_content
84
85 get_method = mock.Mock(return_value=fake_response, spec=[])
86 transport = mock.Mock(request=get_method, spec=['request'])
87
88.. doctest:: basic-download
89
90 >>> from google.resumable_media.requests import Download
91 >>>
92 >>> url_template = (
93 ... 'https://www.googleapis.com/download/storage/v1/b/'
94 ... '{bucket}/o/{blob_name}?alt=media')
95 >>> media_url = url_template.format(
96 ... bucket=bucket, blob_name=blob_name)
97 >>>
98 >>> download = Download(media_url)
99 >>> response = download.consume(transport)
100 >>> download.finished
101 True
102 >>> response
103 <Response [200]>
104 >>> response.headers['Content-Length']
105 '1364156'
106 >>> len(response.content)
107 1364156
108
109To download only a portion of the bytes in the object,
110specify ``start`` and ``end`` byte positions (both optional):
111
112.. testsetup:: basic-download-with-slice
113
114 import mock
115 import requests
116 import http.client
117
118 from google.resumable_media.requests import Download
119
120 media_url = 'http://test.invalid'
121 start = 4096
122 end = 8191
123 slice_size = end - start + 1
124
125 fake_response = requests.Response()
126 fake_response.status_code = int(http.client.PARTIAL_CONTENT)
127 fake_response.headers['Content-Length'] = '{:d}'.format(slice_size)
128 content_range = 'bytes {:d}-{:d}/1364156'.format(start, end)
129 fake_response.headers['Content-Range'] = content_range
130 fake_content = mock.MagicMock(spec=['__len__'])
131 fake_content.__len__.return_value = slice_size
132 fake_response._content = fake_content
133
134 get_method = mock.Mock(return_value=fake_response, spec=[])
135 transport = mock.Mock(request=get_method, spec=['request'])
136
137.. doctest:: basic-download-with-slice
138
139 >>> download = Download(media_url, start=4096, end=8191)
140 >>> response = download.consume(transport)
141 >>> download.finished
142 True
143 >>> response
144 <Response [206]>
145 >>> response.headers['Content-Length']
146 '4096'
147 >>> response.headers['Content-Range']
148 'bytes 4096-8191/1364156'
149 >>> len(response.content)
150 4096
151
152=================
153Chunked Downloads
154=================
155
156For very large objects or objects of unknown size, it may make more sense
157to download the object in chunks rather than all at once. This can be done
158to avoid dropped connections with a poor internet connection or can allow
159multiple chunks to be downloaded in parallel to speed up the total
160download.
161
162A :class:`.ChunkedDownload` uses the same media URL and authorized
163transport that a basic :class:`.Download` would use, but also
164requires a chunk size and a write-able byte ``stream``. The chunk size is used
165to determine how much of the resouce to consume with each request and the
166stream is to allow the resource to be written out (e.g. to disk) without
167having to fit in memory all at once.
168
169.. testsetup:: chunked-download
170
171 import io
172
173 import mock
174 import requests
175 import http.client
176
177 media_url = 'http://test.invalid'
178
179 fifty_mb = 50 * 1024 * 1024
180 one_gb = 1024 * 1024 * 1024
181 fake_response = requests.Response()
182 fake_response.status_code = int(http.client.PARTIAL_CONTENT)
183 fake_response.headers['Content-Length'] = '{:d}'.format(fifty_mb)
184 content_range = 'bytes 0-{:d}/{:d}'.format(fifty_mb - 1, one_gb)
185 fake_response.headers['Content-Range'] = content_range
186 fake_content_begin = b'The beginning of the chunk...'
187 fake_content = fake_content_begin + b'1' * (fifty_mb - 29)
188 fake_response._content = fake_content
189
190 get_method = mock.Mock(return_value=fake_response, spec=[])
191 transport = mock.Mock(request=get_method, spec=['request'])
192
193.. doctest:: chunked-download
194
195 >>> from google.resumable_media.requests import ChunkedDownload
196 >>>
197 >>> chunk_size = 50 * 1024 * 1024 # 50MB
198 >>> stream = io.BytesIO()
199 >>> download = ChunkedDownload(
200 ... media_url, chunk_size, stream)
201 >>> # Check the state of the download before starting.
202 >>> download.bytes_downloaded
203 0
204 >>> download.total_bytes is None
205 True
206 >>> response = download.consume_next_chunk(transport)
207 >>> # Check the state of the download after consuming one chunk.
208 >>> download.finished
209 False
210 >>> download.bytes_downloaded # chunk_size
211 52428800
212 >>> download.total_bytes # 1GB
213 1073741824
214 >>> response
215 <Response [206]>
216 >>> response.headers['Content-Length']
217 '52428800'
218 >>> response.headers['Content-Range']
219 'bytes 0-52428799/1073741824'
220 >>> len(response.content) == chunk_size
221 True
222 >>> stream.seek(0)
223 0
224 >>> stream.read(29)
225 b'The beginning of the chunk...'
226
227The download will change it's ``finished`` status to :data:`True`
228once the final chunk is consumed. In some cases, the final chunk may
229not be the same size as the other chunks:
230
231.. testsetup:: chunked-download-end
232
233 import mock
234 import requests
235 import http.client
236
237 from google.resumable_media.requests import ChunkedDownload
238
239 media_url = 'http://test.invalid'
240
241 fifty_mb = 50 * 1024 * 1024
242 one_gb = 1024 * 1024 * 1024
243 stream = mock.Mock(spec=['write'])
244 download = ChunkedDownload(media_url, fifty_mb, stream)
245 download._bytes_downloaded = 20 * fifty_mb
246 download._total_bytes = one_gb
247
248 fake_response = requests.Response()
249 fake_response.status_code = int(http.client.PARTIAL_CONTENT)
250 slice_size = one_gb - 20 * fifty_mb
251 fake_response.headers['Content-Length'] = '{:d}'.format(slice_size)
252 content_range = 'bytes {:d}-{:d}/{:d}'.format(
253 20 * fifty_mb, one_gb - 1, one_gb)
254 fake_response.headers['Content-Range'] = content_range
255 fake_content = mock.MagicMock(spec=['__len__'])
256 fake_content.__len__.return_value = slice_size
257 fake_response._content = fake_content
258
259 get_method = mock.Mock(return_value=fake_response, spec=[])
260 transport = mock.Mock(request=get_method, spec=['request'])
261
262.. doctest:: chunked-download-end
263
264 >>> # The state of the download in progress.
265 >>> download.finished
266 False
267 >>> download.bytes_downloaded # 20 chunks at 50MB
268 1048576000
269 >>> download.total_bytes # 1GB
270 1073741824
271 >>> response = download.consume_next_chunk(transport)
272 >>> # The state of the download after consuming the final chunk.
273 >>> download.finished
274 True
275 >>> download.bytes_downloaded == download.total_bytes
276 True
277 >>> response
278 <Response [206]>
279 >>> response.headers['Content-Length']
280 '25165824'
281 >>> response.headers['Content-Range']
282 'bytes 1048576000-1073741823/1073741824'
283 >>> len(response.content) < download.chunk_size
284 True
285
286In addition, a :class:`.ChunkedDownload` can also take optional
287``start`` and ``end`` byte positions.
288
289Usually, no checksum is returned with a chunked download. Even if one is returned,
290it is not validated. If you need to validate the checksum, you can do so
291by buffering the chunks and validating the checksum against the completed download.
292
293==============
294Simple Uploads
295==============
296
297Among the three supported upload classes, the simplest is
298:class:`.SimpleUpload`. A simple upload should be used when the resource
299being uploaded is small and when there is no metadata (other than the name)
300associated with the resource.
301
302.. testsetup:: simple-upload
303
304 import json
305
306 import mock
307 import requests
308 import http.client
309
310 bucket = 'some-bucket'
311 blob_name = 'file.txt'
312
313 fake_response = requests.Response()
314 fake_response.status_code = int(http.client.OK)
315 payload = {
316 'bucket': bucket,
317 'contentType': 'text/plain',
318 'md5Hash': 'M0XLEsX9/sMdiI+4pB4CAQ==',
319 'name': blob_name,
320 'size': '27',
321 }
322 fake_response._content = json.dumps(payload).encode('utf-8')
323
324 post_method = mock.Mock(return_value=fake_response, spec=[])
325 transport = mock.Mock(request=post_method, spec=['request'])
326
327.. doctest:: simple-upload
328 :options: +NORMALIZE_WHITESPACE
329
330 >>> from google.resumable_media.requests import SimpleUpload
331 >>>
332 >>> url_template = (
333 ... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
334 ... 'uploadType=media&'
335 ... 'name={blob_name}')
336 >>> upload_url = url_template.format(
337 ... bucket=bucket, blob_name=blob_name)
338 >>>
339 >>> upload = SimpleUpload(upload_url)
340 >>> data = b'Some not too large content.'
341 >>> content_type = 'text/plain'
342 >>> response = upload.transmit(transport, data, content_type)
343 >>> upload.finished
344 True
345 >>> response
346 <Response [200]>
347 >>> json_response = response.json()
348 >>> json_response['bucket'] == bucket
349 True
350 >>> json_response['name'] == blob_name
351 True
352 >>> json_response['contentType'] == content_type
353 True
354 >>> json_response['md5Hash']
355 'M0XLEsX9/sMdiI+4pB4CAQ=='
356 >>> int(json_response['size']) == len(data)
357 True
358
359In the rare case that an upload fails, an :exc:`.InvalidResponse`
360will be raised:
361
362.. testsetup:: simple-upload-fail
363
364 import time
365
366 import mock
367 import requests
368 import http.client
369
370 from google import resumable_media
371 from google.resumable_media import _helpers
372 from google.resumable_media.requests import SimpleUpload as constructor
373
374 upload_url = 'http://test.invalid'
375 data = b'Some not too large content.'
376 content_type = 'text/plain'
377
378 fake_response = requests.Response()
379 fake_response.status_code = int(http.client.SERVICE_UNAVAILABLE)
380
381 post_method = mock.Mock(return_value=fake_response, spec=[])
382 transport = mock.Mock(request=post_method, spec=['request'])
383
384 time_sleep = time.sleep
385 def dont_sleep(seconds):
386 raise RuntimeError('No sleep', seconds)
387
388 def SimpleUpload(*args, **kwargs):
389 upload = constructor(*args, **kwargs)
390 # Mock the cumulative sleep to avoid retries (and `time.sleep()`).
391 upload._retry_strategy = resumable_media.RetryStrategy(
392 max_cumulative_retry=-1.0)
393 return upload
394
395 time.sleep = dont_sleep
396
397.. doctest:: simple-upload-fail
398 :options: +NORMALIZE_WHITESPACE
399
400 >>> upload = SimpleUpload(upload_url)
401 >>> error = None
402 >>> try:
403 ... upload.transmit(transport, data, content_type)
404 ... except resumable_media.InvalidResponse as caught_exc:
405 ... error = caught_exc
406 ...
407 >>> error
408 InvalidResponse('Request failed with status code', 503,
409 'Expected one of', <HTTPStatus.OK: 200>)
410 >>> error.response
411 <Response [503]>
412 >>>
413 >>> upload.finished
414 True
415
416.. testcleanup:: simple-upload-fail
417
418 # Put back the correct ``sleep`` function on the ``time`` module.
419 time.sleep = time_sleep
420
421Even in the case of failure, we see that the upload is
422:attr:`~.SimpleUpload.finished`, i.e. it cannot be re-used.
423
424=================
425Multipart Uploads
426=================
427
428After the simple upload, the :class:`.MultipartUpload` can be used to
429achieve essentially the same task. However, a multipart upload allows some
430metadata about the resource to be sent along as well. (This is the "multi":
431we send a first part with the metadata and a second part with the actual
432bytes in the resource.)
433
434Usage is similar to the simple upload, but :meth:`~.MultipartUpload.transmit`
435accepts an extra required argument: ``metadata``.
436
437.. testsetup:: multipart-upload
438
439 import json
440
441 import mock
442 import requests
443 import http.client
444
445 bucket = 'some-bucket'
446 blob_name = 'file.txt'
447 data = b'Some not too large content.'
448 content_type = 'text/plain'
449
450 fake_response = requests.Response()
451 fake_response.status_code = int(http.client.OK)
452 payload = {
453 'bucket': bucket,
454 'name': blob_name,
455 'metadata': {'color': 'grurple'},
456 }
457 fake_response._content = json.dumps(payload).encode('utf-8')
458
459 post_method = mock.Mock(return_value=fake_response, spec=[])
460 transport = mock.Mock(request=post_method, spec=['request'])
461
462.. doctest:: multipart-upload
463
464 >>> from google.resumable_media.requests import MultipartUpload
465 >>>
466 >>> url_template = (
467 ... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
468 ... 'uploadType=multipart')
469 >>> upload_url = url_template.format(bucket=bucket)
470 >>>
471 >>> upload = MultipartUpload(upload_url)
472 >>> metadata = {
473 ... 'name': blob_name,
474 ... 'metadata': {
475 ... 'color': 'grurple',
476 ... },
477 ... }
478 >>> response = upload.transmit(transport, data, metadata, content_type)
479 >>> upload.finished
480 True
481 >>> response
482 <Response [200]>
483 >>> json_response = response.json()
484 >>> json_response['bucket'] == bucket
485 True
486 >>> json_response['name'] == blob_name
487 True
488 >>> json_response['metadata'] == metadata['metadata']
489 True
490
491As with the simple upload, in the case of failure an :exc:`.InvalidResponse`
492is raised, enclosing the :attr:`~.InvalidResponse.response` that caused
493the failure and the ``upload`` object cannot be re-used after a failure.
494
495=================
496Resumable Uploads
497=================
498
499A :class:`.ResumableUpload` deviates from the other two upload classes:
500it transmits a resource over the course of multiple requests. This
501is intended to be used in cases where:
502
503* the size of the resource is not known (i.e. it is generated on the fly)
504* requests must be short-lived
505* the client has request **size** limitations
506* the resource is too large to fit into memory
507
508In general, a resource should be sent in a **single** request to avoid
509latency and reduce QPS. See `GCS best practices`_ for more things to
510consider when using a resumable upload.
511
512.. _GCS best practices: https://cloud.google.com/storage/docs/\
513 best-practices#uploading
514
515After creating a :class:`.ResumableUpload` instance, a
516**resumable upload session** must be initiated to let the server know that
517a series of chunked upload requests will be coming and to obtain an
518``upload_id`` for the session. In contrast to the other two upload classes,
519:meth:`~.ResumableUpload.initiate` takes a byte ``stream`` as input rather
520than raw bytes as ``data``. This can be a file object, a :class:`~io.BytesIO`
521object or any other stream implementing the same interface.
522
523.. testsetup:: resumable-initiate
524
525 import io
526
527 import mock
528 import requests
529 import http.client
530
531 bucket = 'some-bucket'
532 blob_name = 'file.txt'
533 data = b'Some resumable bytes.'
534 content_type = 'text/plain'
535
536 fake_response = requests.Response()
537 fake_response.status_code = int(http.client.OK)
538 fake_response._content = b''
539 upload_id = 'ABCdef189XY_super_serious'
540 resumable_url_template = (
541 'https://www.googleapis.com/upload/storage/v1/b/{bucket}'
542 '/o?uploadType=resumable&upload_id={upload_id}')
543 resumable_url = resumable_url_template.format(
544 bucket=bucket, upload_id=upload_id)
545 fake_response.headers['location'] = resumable_url
546 fake_response.headers['x-guploader-uploadid'] = upload_id
547
548 post_method = mock.Mock(return_value=fake_response, spec=[])
549 transport = mock.Mock(request=post_method, spec=['request'])
550
551.. doctest:: resumable-initiate
552
553 >>> from google.resumable_media.requests import ResumableUpload
554 >>>
555 >>> url_template = (
556 ... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
557 ... 'uploadType=resumable')
558 >>> upload_url = url_template.format(bucket=bucket)
559 >>>
560 >>> chunk_size = 1024 * 1024 # 1MB
561 >>> upload = ResumableUpload(upload_url, chunk_size)
562 >>> stream = io.BytesIO(data)
563 >>> # The upload doesn't know how "big" it is until seeing a stream.
564 >>> upload.total_bytes is None
565 True
566 >>> metadata = {'name': blob_name}
567 >>> response = upload.initiate(transport, stream, metadata, content_type)
568 >>> response
569 <Response [200]>
570 >>> upload.resumable_url == response.headers['Location']
571 True
572 >>> upload.total_bytes == len(data)
573 True
574 >>> upload_id = response.headers['X-GUploader-UploadID']
575 >>> upload_id
576 'ABCdef189XY_super_serious'
577 >>> upload.resumable_url == upload_url + '&upload_id=' + upload_id
578 True
579
580Once a :class:`.ResumableUpload` has been initiated, the resource is
581transmitted in chunks until completion:
582
583.. testsetup:: resumable-transmit
584
585 import io
586 import json
587
588 import mock
589 import requests
590 import http.client
591
592 from google import resumable_media
593 import google.resumable_media.requests.upload as upload_mod
594
595 data = b'01234567891'
596 stream = io.BytesIO(data)
597 # Create an "already initiated" upload.
598 upload_url = 'http://test.invalid'
599 chunk_size = 256 * 1024 # 256KB
600 upload = upload_mod.ResumableUpload(upload_url, chunk_size)
601 upload._resumable_url = 'http://test.invalid?upload_id=mocked'
602 upload._stream = stream
603 upload._content_type = 'text/plain'
604 upload._total_bytes = len(data)
605
606 # After-the-fact update the chunk size so that len(data)
607 # is split into three.
608 upload._chunk_size = 4
609 # Make three fake responses.
610 fake_response0 = requests.Response()
611 fake_response0.status_code = http.client.PERMANENT_REDIRECT
612 fake_response0.headers['range'] = 'bytes=0-3'
613
614 fake_response1 = requests.Response()
615 fake_response1.status_code = http.client.PERMANENT_REDIRECT
616 fake_response1.headers['range'] = 'bytes=0-7'
617
618 fake_response2 = requests.Response()
619 fake_response2.status_code = int(http.client.OK)
620 bucket = 'some-bucket'
621 blob_name = 'file.txt'
622 payload = {
623 'bucket': bucket,
624 'name': blob_name,
625 'size': '{:d}'.format(len(data)),
626 }
627 fake_response2._content = json.dumps(payload).encode('utf-8')
628
629 # Use the fake responses to mock a transport.
630 responses = [fake_response0, fake_response1, fake_response2]
631 put_method = mock.Mock(side_effect=responses, spec=[])
632 transport = mock.Mock(request=put_method, spec=['request'])
633
634.. doctest:: resumable-transmit
635
636 >>> response0 = upload.transmit_next_chunk(transport)
637 >>> response0
638 <Response [308]>
639 >>> upload.finished
640 False
641 >>> upload.bytes_uploaded == upload.chunk_size
642 True
643 >>>
644 >>> response1 = upload.transmit_next_chunk(transport)
645 >>> response1
646 <Response [308]>
647 >>> upload.finished
648 False
649 >>> upload.bytes_uploaded == 2 * upload.chunk_size
650 True
651 >>>
652 >>> response2 = upload.transmit_next_chunk(transport)
653 >>> response2
654 <Response [200]>
655 >>> upload.finished
656 True
657 >>> upload.bytes_uploaded == upload.total_bytes
658 True
659 >>> json_response = response2.json()
660 >>> json_response['bucket'] == bucket
661 True
662 >>> json_response['name'] == blob_name
663 True
664"""
665from google.resumable_media.requests.download import ChunkedDownload
666from google.resumable_media.requests.download import Download
667from google.resumable_media.requests.upload import MultipartUpload
668from google.resumable_media.requests.download import RawChunkedDownload
669from google.resumable_media.requests.download import RawDownload
670from google.resumable_media.requests.upload import ResumableUpload
671from google.resumable_media.requests.upload import SimpleUpload
672from google.resumable_media.requests.upload import XMLMPUContainer
673from google.resumable_media.requests.upload import XMLMPUPart
674
675__all__ = [
676 "ChunkedDownload",
677 "Download",
678 "MultipartUpload",
679 "RawChunkedDownload",
680 "RawDownload",
681 "ResumableUpload",
682 "SimpleUpload",
683 "XMLMPUContainer",
684 "XMLMPUPart",
685]