Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/storage/blob.py: 30%
675 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:17 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:17 +0000
1# Copyright 2014 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15# pylint: disable=too-many-lines
17"""Create / interact with Google Cloud Storage blobs.
18"""
20import base64
21import copy
22import hashlib
23from io import BytesIO
24from io import TextIOWrapper
25import logging
26import mimetypes
27import os
28import re
29from email.parser import HeaderParser
30from urllib.parse import parse_qsl
31from urllib.parse import quote
32from urllib.parse import urlencode
33from urllib.parse import urlsplit
34from urllib.parse import urlunsplit
35import warnings
37from google import resumable_media
38from google.resumable_media.requests import ChunkedDownload
39from google.resumable_media.requests import Download
40from google.resumable_media.requests import RawDownload
41from google.resumable_media.requests import RawChunkedDownload
42from google.resumable_media.requests import MultipartUpload
43from google.resumable_media.requests import ResumableUpload
45from google.api_core.iam import Policy
46from google.cloud import exceptions
47from google.cloud._helpers import _bytes_to_unicode
48from google.cloud._helpers import _datetime_to_rfc3339
49from google.cloud._helpers import _rfc3339_nanos_to_datetime
50from google.cloud._helpers import _to_bytes
51from google.cloud.exceptions import NotFound
52from google.cloud.storage._helpers import _add_etag_match_headers
53from google.cloud.storage._helpers import _add_generation_match_parameters
54from google.cloud.storage._helpers import _PropertyMixin
55from google.cloud.storage._helpers import _scalar_property
56from google.cloud.storage._helpers import _bucket_bound_hostname_url
57from google.cloud.storage._helpers import _raise_if_more_than_one_set
58from google.cloud.storage._helpers import _api_core_retry_to_resumable_media_retry
59from google.cloud.storage._helpers import _get_default_headers
60from google.cloud.storage._signing import generate_signed_url_v2
61from google.cloud.storage._signing import generate_signed_url_v4
62from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE
63from google.cloud.storage._helpers import _DEFAULT_STORAGE_HOST
64from google.cloud.storage._helpers import _API_VERSION
65from google.cloud.storage.acl import ACL
66from google.cloud.storage.acl import ObjectACL
67from google.cloud.storage.constants import _DEFAULT_TIMEOUT
68from google.cloud.storage.constants import ARCHIVE_STORAGE_CLASS
69from google.cloud.storage.constants import COLDLINE_STORAGE_CLASS
70from google.cloud.storage.constants import MULTI_REGIONAL_LEGACY_STORAGE_CLASS
71from google.cloud.storage.constants import NEARLINE_STORAGE_CLASS
72from google.cloud.storage.constants import REGIONAL_LEGACY_STORAGE_CLASS
73from google.cloud.storage.constants import STANDARD_STORAGE_CLASS
74from google.cloud.storage.retry import ConditionalRetryPolicy
75from google.cloud.storage.retry import DEFAULT_RETRY
76from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON
77from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED
78from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED
79from google.cloud.storage.fileio import BlobReader
80from google.cloud.storage.fileio import BlobWriter
83_API_ACCESS_ENDPOINT = _DEFAULT_STORAGE_HOST
84_DEFAULT_CONTENT_TYPE = "application/octet-stream"
85_DOWNLOAD_URL_TEMPLATE = "{hostname}/download/storage/{api_version}{path}?alt=media"
86_BASE_UPLOAD_TEMPLATE = (
87 "{hostname}/upload/storage/{api_version}{bucket_path}/o?uploadType="
88)
89_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart"
90_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable"
91# NOTE: "acl" is also writeable but we defer ACL management to
92# the classes in the google.cloud.storage.acl module.
93_CONTENT_TYPE_FIELD = "contentType"
94_WRITABLE_FIELDS = (
95 "cacheControl",
96 "contentDisposition",
97 "contentEncoding",
98 "contentLanguage",
99 _CONTENT_TYPE_FIELD,
100 "crc32c",
101 "customTime",
102 "md5Hash",
103 "metadata",
104 "name",
105 "storageClass",
106)
107_READ_LESS_THAN_SIZE = (
108 "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining."
109)
110_CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE = (
111 "A checksum of type `{}` was requested, but checksumming is not available "
112 "for downloads when chunk_size is set."
113)
114_COMPOSE_IF_GENERATION_LIST_DEPRECATED = (
115 "'if_generation_match: type list' is deprecated and supported for "
116 "backwards-compatability reasons only. Use 'if_source_generation_match' "
117 "instead' to match source objects' generations."
118)
119_COMPOSE_IF_GENERATION_LIST_AND_IF_SOURCE_GENERATION_ERROR = (
120 "Use 'if_generation_match' to match the generation of the destination "
121 "object by passing in a generation number, instead of a list. "
122 "Use 'if_source_generation_match' to match source objects generations."
123)
124_COMPOSE_IF_METAGENERATION_LIST_DEPRECATED = (
125 "'if_metageneration_match: type list' is deprecated and supported for "
126 "backwards-compatability reasons only. Note that the metageneration to "
127 "be matched is that of the destination blob. Please pass in a single "
128 "value (type long)."
129)
130_COMPOSE_IF_SOURCE_GENERATION_MISMATCH_ERROR = (
131 "'if_source_generation_match' length must be the same as 'sources' length"
132)
133_DOWNLOAD_AS_STRING_DEPRECATED = (
134 "Blob.download_as_string() is deprecated and will be removed in future. "
135 "Use Blob.download_as_bytes() instead."
136)
139_DEFAULT_CHUNKSIZE = 104857600 # 1024 * 1024 B * 100 = 100 MB
140_MAX_MULTIPART_SIZE = 8388608 # 8 MB
142_logger = logging.getLogger(__name__)
145class Blob(_PropertyMixin):
146 """A wrapper around Cloud Storage's concept of an ``Object``.
148 :type name: str
149 :param name: The name of the blob. This corresponds to the unique path of
150 the object in the bucket. If bytes, will be converted to a
151 unicode object. Blob / object names can contain any sequence
152 of valid unicode characters, of length 1-1024 bytes when
153 UTF-8 encoded.
155 :type bucket: :class:`google.cloud.storage.bucket.Bucket`
156 :param bucket: The bucket to which this blob belongs.
158 :type chunk_size: int
159 :param chunk_size:
160 (Optional) The size of a chunk of data whenever iterating (in bytes).
161 This must be a multiple of 256 KB per the API specification. If not
162 specified, the chunk_size of the blob itself is used. If that is not
163 specified, a default value of 40 MB is used.
165 :type encryption_key: bytes
166 :param encryption_key:
167 (Optional) 32 byte encryption key for customer-supplied encryption.
168 See https://cloud.google.com/storage/docs/encryption#customer-supplied.
170 :type kms_key_name: str
171 :param kms_key_name:
172 (Optional) Resource name of Cloud KMS key used to encrypt the blob's
173 contents.
175 :type generation: long
176 :param generation:
177 (Optional) If present, selects a specific revision of this object.
178 """
180 _chunk_size = None # Default value for each instance.
181 _CHUNK_SIZE_MULTIPLE = 256 * 1024
182 """Number (256 KB, in bytes) that must divide the chunk size."""
184 STORAGE_CLASSES = (
185 STANDARD_STORAGE_CLASS,
186 NEARLINE_STORAGE_CLASS,
187 COLDLINE_STORAGE_CLASS,
188 ARCHIVE_STORAGE_CLASS,
189 MULTI_REGIONAL_LEGACY_STORAGE_CLASS,
190 REGIONAL_LEGACY_STORAGE_CLASS,
191 )
192 """Allowed values for :attr:`storage_class`.
194 See
195 https://cloud.google.com/storage/docs/json_api/v1/objects#storageClass
196 https://cloud.google.com/storage/docs/per-object-storage-class
198 .. note::
199 This list does not include 'DURABLE_REDUCED_AVAILABILITY', which
200 is only documented for buckets (and deprecated).
201 """
203 def __init__(
204 self,
205 name,
206 bucket,
207 chunk_size=None,
208 encryption_key=None,
209 kms_key_name=None,
210 generation=None,
211 ):
212 """
213 property :attr:`name`
214 Get the blob's name.
215 """
216 name = _bytes_to_unicode(name)
217 super(Blob, self).__init__(name=name)
219 self.chunk_size = chunk_size # Check that setter accepts value.
220 self._bucket = bucket
221 self._acl = ObjectACL(self)
222 _raise_if_more_than_one_set(
223 encryption_key=encryption_key, kms_key_name=kms_key_name
224 )
226 self._encryption_key = encryption_key
228 if kms_key_name is not None:
229 self._properties["kmsKeyName"] = kms_key_name
231 if generation is not None:
232 self._properties["generation"] = generation
234 @property
235 def bucket(self):
236 """Bucket which contains the object.
238 :rtype: :class:`~google.cloud.storage.bucket.Bucket`
239 :returns: The object's bucket.
240 """
241 return self._bucket
243 @property
244 def chunk_size(self):
245 """Get the blob's default chunk size.
247 :rtype: int or ``NoneType``
248 :returns: The current blob's chunk size, if it is set.
249 """
250 return self._chunk_size
252 @chunk_size.setter
253 def chunk_size(self, value):
254 """Set the blob's default chunk size.
256 :type value: int
257 :param value: (Optional) The current blob's chunk size, if it is set.
259 :raises: :class:`ValueError` if ``value`` is not ``None`` and is not a
260 multiple of 256 KB.
261 """
262 if value is not None and value > 0 and value % self._CHUNK_SIZE_MULTIPLE != 0:
263 raise ValueError(
264 "Chunk size must be a multiple of %d." % (self._CHUNK_SIZE_MULTIPLE,)
265 )
266 self._chunk_size = value
268 @property
269 def encryption_key(self):
270 """Retrieve the customer-supplied encryption key for the object.
272 :rtype: bytes or ``NoneType``
273 :returns:
274 The encryption key or ``None`` if no customer-supplied encryption key was used,
275 or the blob's resource has not been loaded from the server.
276 """
277 return self._encryption_key
279 @encryption_key.setter
280 def encryption_key(self, value):
281 """Set the blob's encryption key.
283 See https://cloud.google.com/storage/docs/encryption#customer-supplied
285 To perform a key rotation for an encrypted blob, use :meth:`rewrite`.
286 See https://cloud.google.com/storage/docs/encryption/using-customer-supplied-keys?hl=ca#rotating
288 :type value: bytes
289 :param value: 32 byte encryption key for customer-supplied encryption.
290 """
291 self._encryption_key = value
293 @staticmethod
294 def path_helper(bucket_path, blob_name):
295 """Relative URL path for a blob.
297 :type bucket_path: str
298 :param bucket_path: The URL path for a bucket.
300 :type blob_name: str
301 :param blob_name: The name of the blob.
303 :rtype: str
304 :returns: The relative URL path for ``blob_name``.
305 """
306 return bucket_path + "/o/" + _quote(blob_name)
308 @property
309 def acl(self):
310 """Create our ACL on demand."""
311 return self._acl
313 def __repr__(self):
314 if self.bucket:
315 bucket_name = self.bucket.name
316 else:
317 bucket_name = None
319 return f"<Blob: {bucket_name}, {self.name}, {self.generation}>"
321 @property
322 def path(self):
323 """Getter property for the URL path to this Blob.
325 :rtype: str
326 :returns: The URL path to this Blob.
327 """
328 if not self.name:
329 raise ValueError("Cannot determine path without a blob name.")
331 return self.path_helper(self.bucket.path, self.name)
333 @property
334 def client(self):
335 """The client bound to this blob."""
336 return self.bucket.client
338 @property
339 def user_project(self):
340 """Project ID billed for API requests made via this blob.
342 Derived from bucket's value.
344 :rtype: str
345 """
346 return self.bucket.user_project
348 def _encryption_headers(self):
349 """Return any encryption headers needed to fetch the object.
351 :rtype: List(Tuple(str, str))
352 :returns: a list of tuples to be passed as headers.
353 """
354 return _get_encryption_headers(self._encryption_key)
356 @property
357 def _query_params(self):
358 """Default query parameters."""
359 params = {}
360 if self.generation is not None:
361 params["generation"] = self.generation
362 if self.user_project is not None:
363 params["userProject"] = self.user_project
364 return params
366 @property
367 def public_url(self):
368 """The public URL for this blob.
370 Use :meth:`make_public` to enable anonymous access via the returned
371 URL.
373 :rtype: `string`
374 :returns: The public URL for this blob.
375 """
376 return "{storage_base_url}/{bucket_name}/{quoted_name}".format(
377 storage_base_url=_API_ACCESS_ENDPOINT,
378 bucket_name=self.bucket.name,
379 quoted_name=_quote(self.name, safe=b"/~"),
380 )
382 @classmethod
383 def from_string(cls, uri, client=None):
384 """Get a constructor for blob object by URI.
386 .. code-block:: python
388 from google.cloud import storage
389 from google.cloud.storage.blob import Blob
390 client = storage.Client()
391 blob = Blob.from_string("gs://bucket/object", client=client)
393 :type uri: str
394 :param uri: The blob uri pass to get blob object.
396 :type client: :class:`~google.cloud.storage.client.Client`
397 :param client:
398 (Optional) The client to use. Application code should
399 *always* pass ``client``.
401 :rtype: :class:`google.cloud.storage.blob.Blob`
402 :returns: The blob object created.
403 """
404 from google.cloud.storage.bucket import Bucket
406 scheme, netloc, path, query, frag = urlsplit(uri)
407 if scheme != "gs":
408 raise ValueError("URI scheme must be gs")
410 bucket = Bucket(client, name=netloc)
411 return cls(path[1:], bucket)
413 def generate_signed_url(
414 self,
415 expiration=None,
416 api_access_endpoint=_API_ACCESS_ENDPOINT,
417 method="GET",
418 content_md5=None,
419 content_type=None,
420 response_disposition=None,
421 response_type=None,
422 generation=None,
423 headers=None,
424 query_parameters=None,
425 client=None,
426 credentials=None,
427 version=None,
428 service_account_email=None,
429 access_token=None,
430 virtual_hosted_style=False,
431 bucket_bound_hostname=None,
432 scheme="http",
433 ):
434 """Generates a signed URL for this blob.
436 .. note::
438 If you are on Google Compute Engine, you can't generate a signed
439 URL using GCE service account.
440 If you'd like to be able to generate a signed URL from GCE,
441 you can use a standard service account from a JSON file rather
442 than a GCE service account.
444 If you have a blob that you want to allow access to for a set
445 amount of time, you can use this method to generate a URL that
446 is only valid within a certain time period.
448 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-generate-signed-url-v4#storage_generate_signed_url_v4-python).
450 This is particularly useful if you don't want publicly
451 accessible blobs, but don't want to require users to explicitly
452 log in.
454 If ``bucket_bound_hostname`` is set as an argument of :attr:`api_access_endpoint`,
455 ``https`` works only if using a ``CDN``.
457 :type expiration: Union[Integer, datetime.datetime, datetime.timedelta]
458 :param expiration:
459 Point in time when the signed URL should expire. If a ``datetime``
460 instance is passed without an explicit ``tzinfo`` set, it will be
461 assumed to be ``UTC``.
463 :type api_access_endpoint: str
464 :param api_access_endpoint: (Optional) URI base.
466 :type method: str
467 :param method: The HTTP verb that will be used when requesting the URL.
469 :type content_md5: str
470 :param content_md5:
471 (Optional) The MD5 hash of the object referenced by ``resource``.
473 :type content_type: str
474 :param content_type:
475 (Optional) The content type of the object referenced by
476 ``resource``.
478 :type response_disposition: str
479 :param response_disposition:
480 (Optional) Content disposition of responses to requests for the
481 signed URL. For example, to enable the signed URL to initiate a
482 file of ``blog.png``, use the value ``'attachment;
483 filename=blob.png'``.
485 :type response_type: str
486 :param response_type:
487 (Optional) Content type of responses to requests for the signed
488 URL. Ignored if content_type is set on object/blob metadata.
490 :type generation: str
491 :param generation:
492 (Optional) A value that indicates which generation of the resource
493 to fetch.
495 :type headers: dict
496 :param headers:
497 (Optional) Additional HTTP headers to be included as part of the
498 signed URLs. See:
499 https://cloud.google.com/storage/docs/xml-api/reference-headers
500 Requests using the signed URL *must* pass the specified header
501 (name and value) with each request for the URL.
503 :type query_parameters: dict
504 :param query_parameters:
505 (Optional) Additional query parameters to be included as part of the
506 signed URLs. See:
507 https://cloud.google.com/storage/docs/xml-api/reference-headers#query
509 :type client: :class:`~google.cloud.storage.client.Client`
510 :param client:
511 (Optional) The client to use. If not passed, falls back to the
512 ``client`` stored on the blob's bucket.
514 :type credentials: :class:`google.auth.credentials.Credentials`
515 :param credentials:
516 (Optional) The authorization credentials to attach to requests.
517 These credentials identify this application to the service. If
518 none are specified, the client will attempt to ascertain the
519 credentials from the environment.
521 :type version: str
522 :param version:
523 (Optional) The version of signed credential to create. Must be one
524 of 'v2' | 'v4'.
526 :type service_account_email: str
527 :param service_account_email:
528 (Optional) E-mail address of the service account.
530 :type access_token: str
531 :param access_token: (Optional) Access token for a service account.
533 :type virtual_hosted_style: bool
534 :param virtual_hosted_style:
535 (Optional) If true, then construct the URL relative the bucket's
536 virtual hostname, e.g., '<bucket-name>.storage.googleapis.com'.
538 :type bucket_bound_hostname: str
539 :param bucket_bound_hostname:
540 (Optional) If passed, then construct the URL relative to the
541 bucket-bound hostname. Value can be a bare or with scheme, e.g.,
542 'example.com' or 'http://example.com'. See:
543 https://cloud.google.com/storage/docs/request-endpoints#cname
545 :type scheme: str
546 :param scheme:
547 (Optional) If ``bucket_bound_hostname`` is passed as a bare
548 hostname, use this value as the scheme. ``https`` will work only
549 when using a CDN. Defaults to ``"http"``.
551 :raises: :exc:`ValueError` when version is invalid.
552 :raises: :exc:`TypeError` when expiration is not a valid type.
553 :raises: :exc:`AttributeError` if credentials is not an instance
554 of :class:`google.auth.credentials.Signing`.
556 :rtype: str
557 :returns: A signed URL you can use to access the resource
558 until expiration.
559 """
560 if version is None:
561 version = "v2"
562 elif version not in ("v2", "v4"):
563 raise ValueError("'version' must be either 'v2' or 'v4'")
565 quoted_name = _quote(self.name, safe=b"/~")
567 # If you are on Google Compute Engine, you can't generate a signed URL
568 # using GCE service account.
569 # See https://github.com/googleapis/google-auth-library-python/issues/50
570 if virtual_hosted_style:
571 api_access_endpoint = f"https://{self.bucket.name}.storage.googleapis.com"
572 elif bucket_bound_hostname:
573 api_access_endpoint = _bucket_bound_hostname_url(
574 bucket_bound_hostname, scheme
575 )
576 else:
577 resource = f"/{self.bucket.name}/{quoted_name}"
579 if virtual_hosted_style or bucket_bound_hostname:
580 resource = f"/{quoted_name}"
582 if credentials is None:
583 client = self._require_client(client)
584 credentials = client._credentials
586 if version == "v2":
587 helper = generate_signed_url_v2
588 else:
589 helper = generate_signed_url_v4
591 if self._encryption_key is not None:
592 encryption_headers = _get_encryption_headers(self._encryption_key)
593 if headers is None:
594 headers = {}
595 if version == "v2":
596 # See: https://cloud.google.com/storage/docs/access-control/signed-urls-v2#about-canonical-extension-headers
597 v2_copy_only = "X-Goog-Encryption-Algorithm"
598 headers[v2_copy_only] = encryption_headers[v2_copy_only]
599 else:
600 headers.update(encryption_headers)
602 return helper(
603 credentials,
604 resource=resource,
605 expiration=expiration,
606 api_access_endpoint=api_access_endpoint,
607 method=method.upper(),
608 content_md5=content_md5,
609 content_type=content_type,
610 response_type=response_type,
611 response_disposition=response_disposition,
612 generation=generation,
613 headers=headers,
614 query_parameters=query_parameters,
615 service_account_email=service_account_email,
616 access_token=access_token,
617 )
619 def exists(
620 self,
621 client=None,
622 if_etag_match=None,
623 if_etag_not_match=None,
624 if_generation_match=None,
625 if_generation_not_match=None,
626 if_metageneration_match=None,
627 if_metageneration_not_match=None,
628 timeout=_DEFAULT_TIMEOUT,
629 retry=DEFAULT_RETRY,
630 ):
631 """Determines whether or not this blob exists.
633 If :attr:`user_project` is set on the bucket, bills the API request
634 to that project.
636 :type client: :class:`~google.cloud.storage.client.Client`
637 :param client:
638 (Optional) The client to use. If not passed, falls back to the
639 ``client`` stored on the blob's bucket.
641 :type if_etag_match: Union[str, Set[str]]
642 :param if_etag_match:
643 (Optional) See :ref:`using-if-etag-match`
645 :type if_etag_not_match: Union[str, Set[str]]
646 :param if_etag_not_match:
647 (Optional) See :ref:`using-if-etag-not-match`
649 :type if_generation_match: long
650 :param if_generation_match:
651 (Optional) See :ref:`using-if-generation-match`
653 :type if_generation_not_match: long
654 :param if_generation_not_match:
655 (Optional) See :ref:`using-if-generation-not-match`
657 :type if_metageneration_match: long
658 :param if_metageneration_match:
659 (Optional) See :ref:`using-if-metageneration-match`
661 :type if_metageneration_not_match: long
662 :param if_metageneration_not_match:
663 (Optional) See :ref:`using-if-metageneration-not-match`
665 :type timeout: float or tuple
666 :param timeout:
667 (Optional) The amount of time, in seconds, to wait
668 for the server response. See: :ref:`configuring_timeouts`
670 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
671 :param retry:
672 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
674 :rtype: bool
675 :returns: True if the blob exists in Cloud Storage.
676 """
677 client = self._require_client(client)
678 # We only need the status code (200 or not) so we seek to
679 # minimize the returned payload.
680 query_params = self._query_params
681 query_params["fields"] = "name"
683 _add_generation_match_parameters(
684 query_params,
685 if_generation_match=if_generation_match,
686 if_generation_not_match=if_generation_not_match,
687 if_metageneration_match=if_metageneration_match,
688 if_metageneration_not_match=if_metageneration_not_match,
689 )
691 headers = {}
692 _add_etag_match_headers(
693 headers, if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match
694 )
696 try:
697 # We intentionally pass `_target_object=None` since fields=name
698 # would limit the local properties.
699 client._get_resource(
700 self.path,
701 query_params=query_params,
702 headers=headers,
703 timeout=timeout,
704 retry=retry,
705 _target_object=None,
706 )
707 except NotFound:
708 # NOTE: This will not fail immediately in a batch. However, when
709 # Batch.finish() is called, the resulting `NotFound` will be
710 # raised.
711 return False
712 return True
714 def delete(
715 self,
716 client=None,
717 if_generation_match=None,
718 if_generation_not_match=None,
719 if_metageneration_match=None,
720 if_metageneration_not_match=None,
721 timeout=_DEFAULT_TIMEOUT,
722 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
723 ):
724 """Deletes a blob from Cloud Storage.
726 If :attr:`user_project` is set on the bucket, bills the API request
727 to that project.
729 :type client: :class:`~google.cloud.storage.client.Client`
730 :param client:
731 (Optional) The client to use. If not passed, falls back to the
732 ``client`` stored on the blob's bucket.
734 :type if_generation_match: long
735 :param if_generation_match:
736 (Optional) See :ref:`using-if-generation-match`
738 :type if_generation_not_match: long
739 :param if_generation_not_match:
740 (Optional) See :ref:`using-if-generation-not-match`
742 :type if_metageneration_match: long
743 :param if_metageneration_match:
744 (Optional) See :ref:`using-if-metageneration-match`
746 :type if_metageneration_not_match: long
747 :param if_metageneration_not_match:
748 (Optional) See :ref:`using-if-metageneration-not-match`
750 :type timeout: float or tuple
751 :param timeout:
752 (Optional) The amount of time, in seconds, to wait
753 for the server response. See: :ref:`configuring_timeouts`
755 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
756 :param retry:
757 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
759 :raises: :class:`google.cloud.exceptions.NotFound`
760 (propagated from
761 :meth:`google.cloud.storage.bucket.Bucket.delete_blob`).
762 """
763 self.bucket.delete_blob(
764 self.name,
765 client=client,
766 generation=self.generation,
767 timeout=timeout,
768 if_generation_match=if_generation_match,
769 if_generation_not_match=if_generation_not_match,
770 if_metageneration_match=if_metageneration_match,
771 if_metageneration_not_match=if_metageneration_not_match,
772 retry=retry,
773 )
775 def _get_transport(self, client):
776 """Return the client's transport.
778 :type client: :class:`~google.cloud.storage.client.Client`
779 :param client:
780 (Optional) The client to use. If not passed, falls back to the
781 ``client`` stored on the blob's bucket.
783 :rtype transport:
784 :class:`~google.auth.transport.requests.AuthorizedSession`
785 :returns: The transport (with credentials) that will
786 make authenticated requests.
787 """
788 client = self._require_client(client)
789 return client._http
791 def _get_download_url(
792 self,
793 client,
794 if_generation_match=None,
795 if_generation_not_match=None,
796 if_metageneration_match=None,
797 if_metageneration_not_match=None,
798 ):
799 """Get the download URL for the current blob.
801 If the ``media_link`` has been loaded, it will be used, otherwise
802 the URL will be constructed from the current blob's path (and possibly
803 generation) to avoid a round trip.
805 :type client: :class:`~google.cloud.storage.client.Client`
806 :param client: The client to use.
808 :type if_generation_match: long
809 :param if_generation_match:
810 (Optional) See :ref:`using-if-generation-match`
812 :type if_generation_not_match: long
813 :param if_generation_not_match:
814 (Optional) See :ref:`using-if-generation-not-match`
816 :type if_metageneration_match: long
817 :param if_metageneration_match:
818 (Optional) See :ref:`using-if-metageneration-match`
820 :type if_metageneration_not_match: long
821 :param if_metageneration_not_match:
822 (Optional) See :ref:`using-if-metageneration-not-match`
824 :rtype: str
825 :returns: The download URL for the current blob.
826 """
827 name_value_pairs = []
828 if self.media_link is None:
829 hostname = _get_host_name(client._connection)
830 base_url = _DOWNLOAD_URL_TEMPLATE.format(
831 hostname=hostname, path=self.path, api_version=_API_VERSION
832 )
833 if self.generation is not None:
834 name_value_pairs.append(("generation", f"{self.generation:d}"))
835 else:
836 base_url = self.media_link
838 if self.user_project is not None:
839 name_value_pairs.append(("userProject", self.user_project))
841 _add_generation_match_parameters(
842 name_value_pairs,
843 if_generation_match=if_generation_match,
844 if_generation_not_match=if_generation_not_match,
845 if_metageneration_match=if_metageneration_match,
846 if_metageneration_not_match=if_metageneration_not_match,
847 )
848 return _add_query_parameters(base_url, name_value_pairs)
850 def _extract_headers_from_download(self, response):
851 """Extract headers from a non-chunked request's http object.
853 This avoids the need to make a second request for commonly used
854 headers.
856 :type response:
857 :class requests.models.Response
858 :param response: The server response from downloading a non-chunked file
859 """
860 self._properties["contentEncoding"] = response.headers.get(
861 "Content-Encoding", None
862 )
863 self._properties[_CONTENT_TYPE_FIELD] = response.headers.get(
864 "Content-Type", None
865 )
866 self._properties["cacheControl"] = response.headers.get("Cache-Control", None)
867 self._properties["storageClass"] = response.headers.get(
868 "X-Goog-Storage-Class", None
869 )
870 self._properties["contentLanguage"] = response.headers.get(
871 "Content-Language", None
872 )
873 self._properties["etag"] = response.headers.get("ETag", None)
874 self._properties["generation"] = response.headers.get("X-goog-generation", None)
875 self._properties["metageneration"] = response.headers.get(
876 "X-goog-metageneration", None
877 )
878 # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==',
879 x_goog_hash = response.headers.get("X-Goog-Hash", "")
881 if x_goog_hash:
882 digests = {}
883 for encoded_digest in x_goog_hash.split(","):
884 match = re.match(r"(crc32c|md5)=([\w\d/\+/]+={0,3})", encoded_digest)
885 if match:
886 method, digest = match.groups()
887 digests[method] = digest
889 self._properties["crc32c"] = digests.get("crc32c", None)
890 self._properties["md5Hash"] = digests.get("md5", None)
892 def _do_download(
893 self,
894 transport,
895 file_obj,
896 download_url,
897 headers,
898 start=None,
899 end=None,
900 raw_download=False,
901 timeout=_DEFAULT_TIMEOUT,
902 checksum="md5",
903 retry=None,
904 ):
905 """Perform a download without any error handling.
907 This is intended to be called by :meth:`_prep_and_do_download` so it can
908 be wrapped with error handling / remapping.
910 :type transport:
911 :class:`~google.auth.transport.requests.AuthorizedSession`
912 :param transport:
913 The transport (with credentials) that will make authenticated
914 requests.
916 :type file_obj: file
917 :param file_obj: A file handle to which to write the blob's data.
919 :type download_url: str
920 :param download_url: The URL where the media can be accessed.
922 :type headers: dict
923 :param headers: Headers to be sent with the request(s).
925 :type start: int
926 :param start: (Optional) The first byte in a range to be downloaded.
928 :type end: int
929 :param end: (Optional) The last byte in a range to be downloaded.
931 :type raw_download: bool
932 :param raw_download:
933 (Optional) If true, download the object without any expansion.
935 :type timeout: float or tuple
936 :param timeout:
937 (Optional) The amount of time, in seconds, to wait
938 for the server response. See: :ref:`configuring_timeouts`
940 :type checksum: str
941 :param checksum:
942 (Optional) The type of checksum to compute to verify the integrity
943 of the object. The response headers must contain a checksum of the
944 requested type. If the headers lack an appropriate checksum (for
945 instance in the case of transcoded or ranged downloads where the
946 remote service does not know the correct checksum, including
947 downloads where chunk_size is set) an INFO-level log will be
948 emitted. Supported values are "md5", "crc32c" and None. The default
949 is "md5".
951 :type retry: google.api_core.retry.Retry
952 :param retry: (Optional) How to retry the RPC. A None value will disable
953 retries. A google.api_core.retry.Retry value will enable retries,
954 and the object will configure backoff and timeout options. Custom
955 predicates (customizable error codes) are not supported for media
956 operations such as this one.
958 This private method does not accept ConditionalRetryPolicy values
959 because the information necessary to evaluate the policy is instead
960 evaluated in blob._prep_and_do_download().
962 See the retry.py source code and docstrings in this package
963 (google.cloud.storage.retry) for information on retry types and how
964 to configure them.
965 """
967 retry_strategy = _api_core_retry_to_resumable_media_retry(retry)
969 if self.chunk_size is None:
970 if raw_download:
971 klass = RawDownload
972 else:
973 klass = Download
975 download = klass(
976 download_url,
977 stream=file_obj,
978 headers=headers,
979 start=start,
980 end=end,
981 checksum=checksum,
982 )
983 download._retry_strategy = retry_strategy
984 response = download.consume(transport, timeout=timeout)
985 self._extract_headers_from_download(response)
986 else:
988 if checksum:
989 msg = _CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE.format(checksum)
990 _logger.info(msg)
992 if raw_download:
993 klass = RawChunkedDownload
994 else:
995 klass = ChunkedDownload
997 download = klass(
998 download_url,
999 self.chunk_size,
1000 file_obj,
1001 headers=headers,
1002 start=start if start else 0,
1003 end=end,
1004 )
1006 download._retry_strategy = retry_strategy
1007 while not download.finished:
1008 download.consume_next_chunk(transport, timeout=timeout)
1010 def download_to_file(
1011 self,
1012 file_obj,
1013 client=None,
1014 start=None,
1015 end=None,
1016 raw_download=False,
1017 if_etag_match=None,
1018 if_etag_not_match=None,
1019 if_generation_match=None,
1020 if_generation_not_match=None,
1021 if_metageneration_match=None,
1022 if_metageneration_not_match=None,
1023 timeout=_DEFAULT_TIMEOUT,
1024 checksum="md5",
1025 retry=DEFAULT_RETRY,
1026 ):
1027 """Download the contents of this blob into a file-like object.
1029 .. note::
1031 If the server-set property, :attr:`media_link`, is not yet
1032 initialized, makes an additional API request to load it.
1034 If the :attr:`chunk_size` of a current blob is `None`, will download data
1035 in single download request otherwise it will download the :attr:`chunk_size`
1036 of data in each request.
1038 For more fine-grained control over the download process, check out
1039 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
1040 For example, this library allows downloading **parts** of a blob rather than the whole thing.
1042 If :attr:`user_project` is set on the bucket, bills the API request
1043 to that project.
1045 :type file_obj: file
1046 :param file_obj: A file handle to which to write the blob's data.
1048 :type client: :class:`~google.cloud.storage.client.Client`
1049 :param client:
1050 (Optional) The client to use. If not passed, falls back to the
1051 ``client`` stored on the blob's bucket.
1053 :type start: int
1054 :param start: (Optional) The first byte in a range to be downloaded.
1056 :type end: int
1057 :param end: (Optional) The last byte in a range to be downloaded.
1059 :type raw_download: bool
1060 :param raw_download:
1061 (Optional) If true, download the object without any expansion.
1063 :type if_etag_match: Union[str, Set[str]]
1064 :param if_etag_match:
1065 (Optional) See :ref:`using-if-etag-match`
1067 :type if_etag_not_match: Union[str, Set[str]]
1068 :param if_etag_not_match:
1069 (Optional) See :ref:`using-if-etag-not-match`
1071 :type if_generation_match: long
1072 :param if_generation_match:
1073 (Optional) See :ref:`using-if-generation-match`
1075 :type if_generation_not_match: long
1076 :param if_generation_not_match:
1077 (Optional) See :ref:`using-if-generation-not-match`
1079 :type if_metageneration_match: long
1080 :param if_metageneration_match:
1081 (Optional) See :ref:`using-if-metageneration-match`
1083 :type if_metageneration_not_match: long
1084 :param if_metageneration_not_match:
1085 (Optional) See :ref:`using-if-metageneration-not-match`
1087 :type timeout: float or tuple
1088 :param timeout:
1089 (Optional) The amount of time, in seconds, to wait
1090 for the server response. See: :ref:`configuring_timeouts`
1092 :type checksum: str
1093 :param checksum:
1094 (Optional) The type of checksum to compute to verify the integrity
1095 of the object. The response headers must contain a checksum of the
1096 requested type. If the headers lack an appropriate checksum (for
1097 instance in the case of transcoded or ranged downloads where the
1098 remote service does not know the correct checksum, including
1099 downloads where chunk_size is set) an INFO-level log will be
1100 emitted. Supported values are "md5", "crc32c" and None. The default
1101 is "md5".
1103 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1104 :param retry: (Optional) How to retry the RPC. A None value will disable
1105 retries. A google.api_core.retry.Retry value will enable retries,
1106 and the object will define retriable response codes and errors and
1107 configure backoff and timeout options.
1109 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1110 Retry object and activates it only if certain conditions are met.
1111 This class exists to provide safe defaults for RPC calls that are
1112 not technically safe to retry normally (due to potential data
1113 duplication or other side-effects) but become safe to retry if a
1114 condition such as if_metageneration_match is set.
1116 See the retry.py source code and docstrings in this package
1117 (google.cloud.storage.retry) for information on retry types and how
1118 to configure them.
1120 Media operations (downloads and uploads) do not support non-default
1121 predicates in a Retry object. The default will always be used. Other
1122 configuration changes for Retry objects such as delays and deadlines
1123 are respected.
1125 :raises: :class:`google.cloud.exceptions.NotFound`
1126 """
1128 self._prep_and_do_download(
1129 file_obj,
1130 client=client,
1131 start=start,
1132 end=end,
1133 raw_download=raw_download,
1134 if_etag_match=if_etag_match,
1135 if_etag_not_match=if_etag_not_match,
1136 if_generation_match=if_generation_match,
1137 if_generation_not_match=if_generation_not_match,
1138 if_metageneration_match=if_metageneration_match,
1139 if_metageneration_not_match=if_metageneration_not_match,
1140 timeout=timeout,
1141 checksum=checksum,
1142 retry=retry,
1143 )
1145 def _handle_filename_and_download(self, filename, *args, **kwargs):
1146 """Download the contents of this blob into a named file.
1148 :type filename: str
1149 :param filename: A filename to be passed to ``open``.
1151 For *args and **kwargs, refer to the documentation for download_to_filename() for more information.
1152 """
1154 try:
1155 with open(filename, "wb") as file_obj:
1156 self._prep_and_do_download(
1157 file_obj,
1158 *args,
1159 **kwargs,
1160 )
1162 except resumable_media.DataCorruption:
1163 # Delete the corrupt downloaded file.
1164 os.remove(filename)
1165 raise
1167 updated = self.updated
1168 if updated is not None:
1169 mtime = updated.timestamp()
1170 os.utime(file_obj.name, (mtime, mtime))
1172 def download_to_filename(
1173 self,
1174 filename,
1175 client=None,
1176 start=None,
1177 end=None,
1178 raw_download=False,
1179 if_etag_match=None,
1180 if_etag_not_match=None,
1181 if_generation_match=None,
1182 if_generation_not_match=None,
1183 if_metageneration_match=None,
1184 if_metageneration_not_match=None,
1185 timeout=_DEFAULT_TIMEOUT,
1186 checksum="md5",
1187 retry=DEFAULT_RETRY,
1188 ):
1189 """Download the contents of this blob into a named file.
1191 If :attr:`user_project` is set on the bucket, bills the API request
1192 to that project.
1194 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-download-encrypted-file#storage_download_encrypted_file-python)
1195 to download a file with a [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied).
1197 :type filename: str
1198 :param filename: A filename to be passed to ``open``.
1200 :type client: :class:`~google.cloud.storage.client.Client`
1201 :param client:
1202 (Optional) The client to use. If not passed, falls back to the
1203 ``client`` stored on the blob's bucket.
1205 :type start: int
1206 :param start: (Optional) The first byte in a range to be downloaded.
1208 :type end: int
1209 :param end: (Optional) The last byte in a range to be downloaded.
1211 :type raw_download: bool
1212 :param raw_download:
1213 (Optional) If true, download the object without any expansion.
1215 :type if_etag_match: Union[str, Set[str]]
1216 :param if_etag_match:
1217 (Optional) See :ref:`using-if-etag-match`
1219 :type if_etag_not_match: Union[str, Set[str]]
1220 :param if_etag_not_match:
1221 (Optional) See :ref:`using-if-etag-not-match`
1223 :type if_generation_match: long
1224 :param if_generation_match:
1225 (Optional) See :ref:`using-if-generation-match`
1227 :type if_generation_not_match: long
1228 :param if_generation_not_match:
1229 (Optional) See :ref:`using-if-generation-not-match`
1231 :type if_metageneration_match: long
1232 :param if_metageneration_match:
1233 (Optional) See :ref:`using-if-metageneration-match`
1235 :type if_metageneration_not_match: long
1236 :param if_metageneration_not_match:
1237 (Optional) See :ref:`using-if-metageneration-not-match`
1239 :type timeout: float or tuple
1240 :param timeout:
1241 (Optional) The amount of time, in seconds, to wait
1242 for the server response. See: :ref:`configuring_timeouts`
1244 :type checksum: str
1245 :param checksum:
1246 (Optional) The type of checksum to compute to verify the integrity
1247 of the object. The response headers must contain a checksum of the
1248 requested type. If the headers lack an appropriate checksum (for
1249 instance in the case of transcoded or ranged downloads where the
1250 remote service does not know the correct checksum, including
1251 downloads where chunk_size is set) an INFO-level log will be
1252 emitted. Supported values are "md5", "crc32c" and None. The default
1253 is "md5".
1255 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1256 :param retry: (Optional) How to retry the RPC. A None value will disable
1257 retries. A google.api_core.retry.Retry value will enable retries,
1258 and the object will define retriable response codes and errors and
1259 configure backoff and timeout options.
1261 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1262 Retry object and activates it only if certain conditions are met.
1263 This class exists to provide safe defaults for RPC calls that are
1264 not technically safe to retry normally (due to potential data
1265 duplication or other side-effects) but become safe to retry if a
1266 condition such as if_metageneration_match is set.
1268 See the retry.py source code and docstrings in this package
1269 (google.cloud.storage.retry) for information on retry types and how
1270 to configure them.
1272 Media operations (downloads and uploads) do not support non-default
1273 predicates in a Retry object. The default will always be used. Other
1274 configuration changes for Retry objects such as delays and deadlines
1275 are respected.
1277 :raises: :class:`google.cloud.exceptions.NotFound`
1278 """
1280 self._handle_filename_and_download(
1281 filename,
1282 client=client,
1283 start=start,
1284 end=end,
1285 raw_download=raw_download,
1286 if_etag_match=if_etag_match,
1287 if_etag_not_match=if_etag_not_match,
1288 if_generation_match=if_generation_match,
1289 if_generation_not_match=if_generation_not_match,
1290 if_metageneration_match=if_metageneration_match,
1291 if_metageneration_not_match=if_metageneration_not_match,
1292 timeout=timeout,
1293 checksum=checksum,
1294 retry=retry,
1295 )
1297 def download_as_bytes(
1298 self,
1299 client=None,
1300 start=None,
1301 end=None,
1302 raw_download=False,
1303 if_etag_match=None,
1304 if_etag_not_match=None,
1305 if_generation_match=None,
1306 if_generation_not_match=None,
1307 if_metageneration_match=None,
1308 if_metageneration_not_match=None,
1309 timeout=_DEFAULT_TIMEOUT,
1310 checksum="md5",
1311 retry=DEFAULT_RETRY,
1312 ):
1313 """Download the contents of this blob as a bytes object.
1315 If :attr:`user_project` is set on the bucket, bills the API request
1316 to that project.
1318 :type client: :class:`~google.cloud.storage.client.Client`
1319 :param client:
1320 (Optional) The client to use. If not passed, falls back to the
1321 ``client`` stored on the blob's bucket.
1323 :type start: int
1324 :param start: (Optional) The first byte in a range to be downloaded.
1326 :type end: int
1327 :param end: (Optional) The last byte in a range to be downloaded.
1329 :type raw_download: bool
1330 :param raw_download:
1331 (Optional) If true, download the object without any expansion.
1333 :type if_etag_match: Union[str, Set[str]]
1334 :param if_etag_match:
1335 (Optional) See :ref:`using-if-etag-match`
1337 :type if_etag_not_match: Union[str, Set[str]]
1338 :param if_etag_not_match:
1339 (Optional) See :ref:`using-if-etag-not-match`
1341 :type if_generation_match: long
1342 :param if_generation_match:
1343 (Optional) See :ref:`using-if-generation-match`
1345 :type if_generation_not_match: long
1346 :param if_generation_not_match:
1347 (Optional) See :ref:`using-if-generation-not-match`
1349 :type if_metageneration_match: long
1350 :param if_metageneration_match:
1351 (Optional) See :ref:`using-if-metageneration-match`
1353 :type if_metageneration_not_match: long
1354 :param if_metageneration_not_match:
1355 (Optional) See :ref:`using-if-metageneration-not-match`
1357 :type timeout: float or tuple
1358 :param timeout:
1359 (Optional) The amount of time, in seconds, to wait
1360 for the server response. See: :ref:`configuring_timeouts`
1362 :type checksum: str
1363 :param checksum:
1364 (Optional) The type of checksum to compute to verify the integrity
1365 of the object. The response headers must contain a checksum of the
1366 requested type. If the headers lack an appropriate checksum (for
1367 instance in the case of transcoded or ranged downloads where the
1368 remote service does not know the correct checksum, including
1369 downloads where chunk_size is set) an INFO-level log will be
1370 emitted. Supported values are "md5", "crc32c" and None. The default
1371 is "md5".
1373 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1374 :param retry: (Optional) How to retry the RPC. A None value will disable
1375 retries. A google.api_core.retry.Retry value will enable retries,
1376 and the object will define retriable response codes and errors and
1377 configure backoff and timeout options.
1379 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1380 Retry object and activates it only if certain conditions are met.
1381 This class exists to provide safe defaults for RPC calls that are
1382 not technically safe to retry normally (due to potential data
1383 duplication or other side-effects) but become safe to retry if a
1384 condition such as if_metageneration_match is set.
1386 See the retry.py source code and docstrings in this package
1387 (google.cloud.storage.retry) for information on retry types and how
1388 to configure them.
1390 Media operations (downloads and uploads) do not support non-default
1391 predicates in a Retry object. The default will always be used. Other
1392 configuration changes for Retry objects such as delays and deadlines
1393 are respected.
1395 :rtype: bytes
1396 :returns: The data stored in this blob.
1398 :raises: :class:`google.cloud.exceptions.NotFound`
1399 """
1401 string_buffer = BytesIO()
1403 self._prep_and_do_download(
1404 string_buffer,
1405 client=client,
1406 start=start,
1407 end=end,
1408 raw_download=raw_download,
1409 if_etag_match=if_etag_match,
1410 if_etag_not_match=if_etag_not_match,
1411 if_generation_match=if_generation_match,
1412 if_generation_not_match=if_generation_not_match,
1413 if_metageneration_match=if_metageneration_match,
1414 if_metageneration_not_match=if_metageneration_not_match,
1415 timeout=timeout,
1416 checksum=checksum,
1417 retry=retry,
1418 )
1419 return string_buffer.getvalue()
1421 def download_as_string(
1422 self,
1423 client=None,
1424 start=None,
1425 end=None,
1426 raw_download=False,
1427 if_etag_match=None,
1428 if_etag_not_match=None,
1429 if_generation_match=None,
1430 if_generation_not_match=None,
1431 if_metageneration_match=None,
1432 if_metageneration_not_match=None,
1433 timeout=_DEFAULT_TIMEOUT,
1434 retry=DEFAULT_RETRY,
1435 ):
1436 """(Deprecated) Download the contents of this blob as a bytes object.
1438 If :attr:`user_project` is set on the bucket, bills the API request
1439 to that project.
1441 .. note::
1442 Deprecated alias for :meth:`download_as_bytes`.
1444 :type client: :class:`~google.cloud.storage.client.Client`
1445 :param client:
1446 (Optional) The client to use. If not passed, falls back to the
1447 ``client`` stored on the blob's bucket.
1449 :type start: int
1450 :param start: (Optional) The first byte in a range to be downloaded.
1452 :type end: int
1453 :param end: (Optional) The last byte in a range to be downloaded.
1455 :type raw_download: bool
1456 :param raw_download:
1457 (Optional) If true, download the object without any expansion.
1459 :type if_etag_match: Union[str, Set[str]]
1460 :param if_etag_match:
1461 (Optional) See :ref:`using-if-etag-match`
1463 :type if_etag_not_match: Union[str, Set[str]]
1464 :param if_etag_not_match:
1465 (Optional) See :ref:`using-if-etag-not-match`
1467 :type if_generation_match: long
1468 :param if_generation_match:
1469 (Optional) See :ref:`using-if-generation-match`
1471 :type if_generation_not_match: long
1472 :param if_generation_not_match:
1473 (Optional) See :ref:`using-if-generation-not-match`
1475 :type if_metageneration_match: long
1476 :param if_metageneration_match:
1477 (Optional) See :ref:`using-if-metageneration-match`
1479 :type if_metageneration_not_match: long
1480 :param if_metageneration_not_match:
1481 (Optional) See :ref:`using-if-metageneration-not-match`
1483 :type timeout: float or tuple
1484 :param timeout:
1485 (Optional) The amount of time, in seconds, to wait
1486 for the server response. See: :ref:`configuring_timeouts`
1488 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1489 :param retry: (Optional) How to retry the RPC. A None value will disable
1490 retries. A google.api_core.retry.Retry value will enable retries,
1491 and the object will define retriable response codes and errors and
1492 configure backoff and timeout options.
1494 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1495 Retry object and activates it only if certain conditions are met.
1496 This class exists to provide safe defaults for RPC calls that are
1497 not technically safe to retry normally (due to potential data
1498 duplication or other side-effects) but become safe to retry if a
1499 condition such as if_metageneration_match is set.
1501 See the retry.py source code and docstrings in this package
1502 (google.cloud.storage.retry) for information on retry types and how
1503 to configure them.
1505 Media operations (downloads and uploads) do not support non-default
1506 predicates in a Retry object. The default will always be used. Other
1507 configuration changes for Retry objects such as delays and deadlines
1508 are respected.
1510 :rtype: bytes
1511 :returns: The data stored in this blob.
1513 :raises: :class:`google.cloud.exceptions.NotFound`
1514 """
1515 warnings.warn(
1516 _DOWNLOAD_AS_STRING_DEPRECATED, PendingDeprecationWarning, stacklevel=2
1517 )
1518 return self.download_as_bytes(
1519 client=client,
1520 start=start,
1521 end=end,
1522 raw_download=raw_download,
1523 if_etag_match=if_etag_match,
1524 if_etag_not_match=if_etag_not_match,
1525 if_generation_match=if_generation_match,
1526 if_generation_not_match=if_generation_not_match,
1527 if_metageneration_match=if_metageneration_match,
1528 if_metageneration_not_match=if_metageneration_not_match,
1529 timeout=timeout,
1530 retry=retry,
1531 )
1533 def download_as_text(
1534 self,
1535 client=None,
1536 start=None,
1537 end=None,
1538 raw_download=False,
1539 encoding=None,
1540 if_etag_match=None,
1541 if_etag_not_match=None,
1542 if_generation_match=None,
1543 if_generation_not_match=None,
1544 if_metageneration_match=None,
1545 if_metageneration_not_match=None,
1546 timeout=_DEFAULT_TIMEOUT,
1547 retry=DEFAULT_RETRY,
1548 ):
1549 """Download the contents of this blob as text (*not* bytes).
1551 If :attr:`user_project` is set on the bucket, bills the API request
1552 to that project.
1554 :type client: :class:`~google.cloud.storage.client.Client`
1555 :param client:
1556 (Optional) The client to use. If not passed, falls back to the
1557 ``client`` stored on the blob's bucket.
1559 :type start: int
1560 :param start: (Optional) The first byte in a range to be downloaded.
1562 :type end: int
1563 :param end: (Optional) The last byte in a range to be downloaded.
1565 :type raw_download: bool
1566 :param raw_download:
1567 (Optional) If true, download the object without any expansion.
1569 :type encoding: str
1570 :param encoding: (Optional) encoding to be used to decode the
1571 downloaded bytes. Defaults to the ``charset`` param of
1572 attr:`content_type`, or else to "utf-8".
1574 :type if_etag_match: Union[str, Set[str]]
1575 :param if_etag_match:
1576 (Optional) See :ref:`using-if-etag-match`
1578 :type if_etag_not_match: Union[str, Set[str]]
1579 :param if_etag_not_match:
1580 (Optional) See :ref:`using-if-etag-not-match`
1582 :type if_generation_match: long
1583 :param if_generation_match:
1584 (Optional) See :ref:`using-if-generation-match`
1586 :type if_generation_not_match: long
1587 :param if_generation_not_match:
1588 (Optional) See :ref:`using-if-generation-not-match`
1590 :type if_metageneration_match: long
1591 :param if_metageneration_match:
1592 (Optional) See :ref:`using-if-metageneration-match`
1594 :type if_metageneration_not_match: long
1595 :param if_metageneration_not_match:
1596 (Optional) See :ref:`using-if-metageneration-not-match`
1598 :type timeout: float or tuple
1599 :param timeout:
1600 (Optional) The amount of time, in seconds, to wait
1601 for the server response. See: :ref:`configuring_timeouts`
1603 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1604 :param retry: (Optional) How to retry the RPC. A None value will disable
1605 retries. A google.api_core.retry.Retry value will enable retries,
1606 and the object will define retriable response codes and errors and
1607 configure backoff and timeout options.
1609 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1610 Retry object and activates it only if certain conditions are met.
1611 This class exists to provide safe defaults for RPC calls that are
1612 not technically safe to retry normally (due to potential data
1613 duplication or other side-effects) but become safe to retry if a
1614 condition such as if_metageneration_match is set.
1616 See the retry.py source code and docstrings in this package
1617 (google.cloud.storage.retry) for information on retry types and how
1618 to configure them.
1620 Media operations (downloads and uploads) do not support non-default
1621 predicates in a Retry object. The default will always be used. Other
1622 configuration changes for Retry objects such as delays and deadlines
1623 are respected.
1625 :rtype: text
1626 :returns: The data stored in this blob, decoded to text.
1627 """
1628 data = self.download_as_bytes(
1629 client=client,
1630 start=start,
1631 end=end,
1632 raw_download=raw_download,
1633 if_etag_match=if_etag_match,
1634 if_etag_not_match=if_etag_not_match,
1635 if_generation_match=if_generation_match,
1636 if_generation_not_match=if_generation_not_match,
1637 if_metageneration_match=if_metageneration_match,
1638 if_metageneration_not_match=if_metageneration_not_match,
1639 timeout=timeout,
1640 retry=retry,
1641 )
1643 if encoding is not None:
1644 return data.decode(encoding)
1646 if self.content_type is not None:
1647 msg = HeaderParser().parsestr("Content-Type: " + self.content_type)
1648 params = dict(msg.get_params()[1:])
1649 if "charset" in params:
1650 return data.decode(params["charset"])
1652 return data.decode("utf-8")
1654 def _get_content_type(self, content_type, filename=None):
1655 """Determine the content type from the current object.
1657 The return value will be determined in order of precedence:
1659 - The value passed in to this method (if not :data:`None`)
1660 - The value stored on the current blob
1661 - The default value ('application/octet-stream')
1663 :type content_type: str
1664 :param content_type: (Optional) Type of content.
1666 :type filename: str
1667 :param filename:
1668 (Optional) The name of the file where the content is stored.
1670 :rtype: str
1671 :returns: Type of content gathered from the object.
1672 """
1673 if content_type is None:
1674 content_type = self.content_type
1676 if content_type is None and filename is not None:
1677 content_type, _ = mimetypes.guess_type(filename)
1679 if content_type is None:
1680 content_type = _DEFAULT_CONTENT_TYPE
1682 return content_type
1684 def _get_writable_metadata(self):
1685 """Get the object / blob metadata which is writable.
1687 This is intended to be used when creating a new object / blob.
1689 See the [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects)
1690 for more information, the fields marked as writable are:
1692 * ``acl``
1693 * ``cacheControl``
1694 * ``contentDisposition``
1695 * ``contentEncoding``
1696 * ``contentLanguage``
1697 * ``contentType``
1698 * ``crc32c``
1699 * ``customTime``
1700 * ``md5Hash``
1701 * ``metadata``
1702 * ``name``
1703 * ``storageClass``
1705 For now, we don't support ``acl``, access control lists should be
1706 managed directly through :class:`ObjectACL` methods.
1707 """
1708 # NOTE: This assumes `self.name` is unicode.
1709 object_metadata = {"name": self.name}
1710 for key in self._changes:
1711 if key in _WRITABLE_FIELDS:
1712 object_metadata[key] = self._properties[key]
1714 return object_metadata
1716 def _get_upload_arguments(self, client, content_type, filename=None, command=None):
1717 """Get required arguments for performing an upload.
1719 The content type returned will be determined in order of precedence:
1721 - The value passed in to this method (if not :data:`None`)
1722 - The value stored on the current blob
1723 - The default value ('application/octet-stream')
1725 :type content_type: str
1726 :param content_type: Type of content being uploaded (or :data:`None`).
1728 :type command: str
1729 :param command:
1730 (Optional) Information about which interface for upload was used,
1731 to be included in the X-Goog-API-Client header. Please leave as None
1732 unless otherwise directed.
1734 :rtype: tuple
1735 :returns: A triple of
1737 * A header dictionary
1738 * An object metadata dictionary
1739 * The ``content_type`` as a string (according to precedence)
1740 """
1741 content_type = self._get_content_type(content_type, filename=filename)
1742 headers = {
1743 **_get_default_headers(
1744 client._connection.user_agent, content_type, command=command
1745 ),
1746 **_get_encryption_headers(self._encryption_key),
1747 }
1748 object_metadata = self._get_writable_metadata()
1749 return headers, object_metadata, content_type
1751 def _do_multipart_upload(
1752 self,
1753 client,
1754 stream,
1755 content_type,
1756 size,
1757 num_retries,
1758 predefined_acl,
1759 if_generation_match,
1760 if_generation_not_match,
1761 if_metageneration_match,
1762 if_metageneration_not_match,
1763 timeout=_DEFAULT_TIMEOUT,
1764 checksum=None,
1765 retry=None,
1766 command=None,
1767 ):
1768 """Perform a multipart upload.
1770 The content type of the upload will be determined in order
1771 of precedence:
1773 - The value passed in to this method (if not :data:`None`)
1774 - The value stored on the current blob
1775 - The default value ('application/octet-stream')
1777 :type client: :class:`~google.cloud.storage.client.Client`
1778 :param client:
1779 (Optional) The client to use. If not passed, falls back to the
1780 ``client`` stored on the blob's bucket.
1782 :type stream: IO[bytes]
1783 :param stream: A bytes IO object open for reading.
1785 :type content_type: str
1786 :param content_type: Type of content being uploaded (or :data:`None`).
1788 :type size: int
1789 :param size:
1790 The number of bytes to be uploaded (which will be read from
1791 ``stream``). If not provided, the upload will be concluded once
1792 ``stream`` is exhausted (or :data:`None`).
1794 :type num_retries: int
1795 :param num_retries:
1796 Number of upload retries. By default, only uploads with
1797 if_generation_match set will be retried, as uploads without the
1798 argument are not guaranteed to be idempotent. Setting num_retries
1799 will override this default behavior and guarantee retries even when
1800 if_generation_match is not set. (Deprecated: This argument
1801 will be removed in a future release.)
1803 :type predefined_acl: str
1804 :param predefined_acl: (Optional) Predefined access control list
1806 :type if_generation_match: long
1807 :param if_generation_match:
1808 (Optional) See :ref:`using-if-generation-match`
1810 :type if_generation_not_match: long
1811 :param if_generation_not_match:
1812 (Optional) See :ref:`using-if-generation-not-match`
1814 :type if_metageneration_match: long
1815 :param if_metageneration_match:
1816 (Optional) See :ref:`using-if-metageneration-match`
1818 :type if_metageneration_not_match: long
1819 :param if_metageneration_not_match:
1820 (Optional) See :ref:`using-if-metageneration-not-match`
1822 :type timeout: float or tuple
1823 :param timeout:
1824 (Optional) The amount of time, in seconds, to wait
1825 for the server response. See: :ref:`configuring_timeouts`
1827 :type checksum: str
1828 :param checksum:
1829 (Optional) The type of checksum to compute to verify
1830 the integrity of the object. The request metadata will be amended
1831 to include the computed value. Using this option will override a
1832 manually-set checksum value. Supported values are "md5",
1833 "crc32c" and None. The default is None.
1835 :type retry: google.api_core.retry.Retry
1836 :param retry: (Optional) How to retry the RPC. A None value will disable
1837 retries. A google.api_core.retry.Retry value will enable retries,
1838 and the object will configure backoff and timeout options. Custom
1839 predicates (customizable error codes) are not supported for media
1840 operations such as this one.
1842 This private method does not accept ConditionalRetryPolicy values
1843 because the information necessary to evaluate the policy is instead
1844 evaluated in blob._do_upload().
1846 See the retry.py source code and docstrings in this package
1847 (google.cloud.storage.retry) for information on retry types and how
1848 to configure them.
1850 :type command: str
1851 :param command:
1852 (Optional) Information about which interface for upload was used,
1853 to be included in the X-Goog-API-Client header. Please leave as None
1854 unless otherwise directed.
1856 :rtype: :class:`~requests.Response`
1857 :returns: The "200 OK" response object returned after the multipart
1858 upload request.
1859 :raises: :exc:`ValueError` if ``size`` is not :data:`None` but the
1860 ``stream`` has fewer than ``size`` bytes remaining.
1861 """
1862 if size is None:
1863 data = stream.read()
1864 else:
1865 data = stream.read(size)
1866 if len(data) < size:
1867 msg = _READ_LESS_THAN_SIZE.format(size, len(data))
1868 raise ValueError(msg)
1870 client = self._require_client(client)
1871 transport = self._get_transport(client)
1872 if "metadata" in self._properties and "metadata" not in self._changes:
1873 self._changes.add("metadata")
1874 info = self._get_upload_arguments(client, content_type, command=command)
1875 headers, object_metadata, content_type = info
1877 hostname = _get_host_name(client._connection)
1878 base_url = _MULTIPART_URL_TEMPLATE.format(
1879 hostname=hostname, bucket_path=self.bucket.path, api_version=_API_VERSION
1880 )
1881 name_value_pairs = []
1883 if self.user_project is not None:
1884 name_value_pairs.append(("userProject", self.user_project))
1886 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
1887 # at rest, object resource metadata will store the version of the Key Management
1888 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
1889 # used to upload a new version of the object then the existing kmsKeyName version
1890 # value can't be used in the upload request and the client instead ignores it.
1891 if (
1892 self.kms_key_name is not None
1893 and "cryptoKeyVersions" not in self.kms_key_name
1894 ):
1895 name_value_pairs.append(("kmsKeyName", self.kms_key_name))
1897 if predefined_acl is not None:
1898 name_value_pairs.append(("predefinedAcl", predefined_acl))
1900 if if_generation_match is not None:
1901 name_value_pairs.append(("ifGenerationMatch", if_generation_match))
1903 if if_generation_not_match is not None:
1904 name_value_pairs.append(("ifGenerationNotMatch", if_generation_not_match))
1906 if if_metageneration_match is not None:
1907 name_value_pairs.append(("ifMetagenerationMatch", if_metageneration_match))
1909 if if_metageneration_not_match is not None:
1910 name_value_pairs.append(
1911 ("ifMetaGenerationNotMatch", if_metageneration_not_match)
1912 )
1914 upload_url = _add_query_parameters(base_url, name_value_pairs)
1915 upload = MultipartUpload(upload_url, headers=headers, checksum=checksum)
1917 upload._retry_strategy = _api_core_retry_to_resumable_media_retry(
1918 retry, num_retries
1919 )
1921 response = upload.transmit(
1922 transport, data, object_metadata, content_type, timeout=timeout
1923 )
1925 return response
1927 def _initiate_resumable_upload(
1928 self,
1929 client,
1930 stream,
1931 content_type,
1932 size,
1933 num_retries,
1934 predefined_acl=None,
1935 extra_headers=None,
1936 chunk_size=None,
1937 if_generation_match=None,
1938 if_generation_not_match=None,
1939 if_metageneration_match=None,
1940 if_metageneration_not_match=None,
1941 timeout=_DEFAULT_TIMEOUT,
1942 checksum=None,
1943 retry=None,
1944 command=None,
1945 ):
1946 """Initiate a resumable upload.
1948 The content type of the upload will be determined in order
1949 of precedence:
1951 - The value passed in to this method (if not :data:`None`)
1952 - The value stored on the current blob
1953 - The default value ('application/octet-stream')
1955 :type client: :class:`~google.cloud.storage.client.Client`
1956 :param client:
1957 (Optional) The client to use. If not passed, falls back to the
1958 ``client`` stored on the blob's bucket.
1960 :type stream: IO[bytes]
1961 :param stream: A bytes IO object open for reading.
1963 :type content_type: str
1964 :param content_type: Type of content being uploaded (or :data:`None`).
1966 :type size: int
1967 :param size:
1968 The number of bytes to be uploaded (which will be read from
1969 ``stream``). If not provided, the upload will be concluded once
1970 ``stream`` is exhausted (or :data:`None`).
1972 :type predefined_acl: str
1973 :param predefined_acl: (Optional) Predefined access control list
1975 :type num_retries: int
1976 :param num_retries:
1977 Number of upload retries. By default, only uploads with
1978 if_generation_match set will be retried, as uploads without the
1979 argument are not guaranteed to be idempotent. Setting num_retries
1980 will override this default behavior and guarantee retries even when
1981 if_generation_match is not set. (Deprecated: This argument
1982 will be removed in a future release.)
1984 :type extra_headers: dict
1985 :param extra_headers:
1986 (Optional) Extra headers to add to standard headers.
1988 :type chunk_size: int
1989 :param chunk_size:
1990 (Optional) Chunk size to use when creating a
1991 :class:`~google.resumable_media.requests.ResumableUpload`.
1992 If not passed, will fall back to the chunk size on the
1993 current blob, if the chunk size of a current blob is also
1994 `None`, will set the default value.
1995 The default value of ``chunk_size`` is 100 MB.
1997 :type if_generation_match: long
1998 :param if_generation_match:
1999 (Optional) See :ref:`using-if-generation-match`
2001 :type if_generation_not_match: long
2002 :param if_generation_not_match:
2003 (Optional) See :ref:`using-if-generation-not-match`
2005 :type if_metageneration_match: long
2006 :param if_metageneration_match:
2007 (Optional) See :ref:`using-if-metageneration-match`
2009 :type if_metageneration_not_match: long
2010 :param if_metageneration_not_match:
2011 (Optional) See :ref:`using-if-metageneration-not-match`
2013 :type timeout: float or tuple
2014 :param timeout:
2015 (Optional) The amount of time, in seconds, to wait
2016 for the server response. See: :ref:`configuring_timeouts`
2018 :type checksum: str
2019 :param checksum:
2020 (Optional) The type of checksum to compute to verify
2021 the integrity of the object. After the upload is complete, the
2022 server-computed checksum of the resulting object will be checked
2023 and google.resumable_media.common.DataCorruption will be raised on
2024 a mismatch. On a validation failure, the client will attempt to
2025 delete the uploaded object automatically. Supported values
2026 are "md5", "crc32c" and None. The default is None.
2028 :type retry: google.api_core.retry.Retry
2029 :param retry: (Optional) How to retry the RPC. A None value will disable
2030 retries. A google.api_core.retry.Retry value will enable retries,
2031 and the object will configure backoff and timeout options. Custom
2032 predicates (customizable error codes) are not supported for media
2033 operations such as this one.
2035 This private method does not accept ConditionalRetryPolicy values
2036 because the information necessary to evaluate the policy is instead
2037 evaluated in blob._do_upload().
2039 See the retry.py source code and docstrings in this package
2040 (google.cloud.storage.retry) for information on retry types and how
2041 to configure them.
2043 :type command: str
2044 :param command:
2045 (Optional) Information about which interface for upload was used,
2046 to be included in the X-Goog-API-Client header. Please leave as None
2047 unless otherwise directed.
2049 :rtype: tuple
2050 :returns:
2051 Pair of
2053 * The :class:`~google.resumable_media.requests.ResumableUpload`
2054 that was created
2055 * The ``transport`` used to initiate the upload.
2056 """
2057 client = self._require_client(client)
2058 if chunk_size is None:
2059 chunk_size = self.chunk_size
2060 if chunk_size is None:
2061 chunk_size = _DEFAULT_CHUNKSIZE
2063 transport = self._get_transport(client)
2064 if "metadata" in self._properties and "metadata" not in self._changes:
2065 self._changes.add("metadata")
2066 info = self._get_upload_arguments(client, content_type, command=command)
2067 headers, object_metadata, content_type = info
2068 if extra_headers is not None:
2069 headers.update(extra_headers)
2071 hostname = _get_host_name(client._connection)
2072 base_url = _RESUMABLE_URL_TEMPLATE.format(
2073 hostname=hostname, bucket_path=self.bucket.path, api_version=_API_VERSION
2074 )
2075 name_value_pairs = []
2077 if self.user_project is not None:
2078 name_value_pairs.append(("userProject", self.user_project))
2080 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
2081 # at rest, object resource metadata will store the version of the Key Management
2082 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
2083 # used to upload a new version of the object then the existing kmsKeyName version
2084 # value can't be used in the upload request and the client instead ignores it.
2085 if (
2086 self.kms_key_name is not None
2087 and "cryptoKeyVersions" not in self.kms_key_name
2088 ):
2089 name_value_pairs.append(("kmsKeyName", self.kms_key_name))
2091 if predefined_acl is not None:
2092 name_value_pairs.append(("predefinedAcl", predefined_acl))
2094 if if_generation_match is not None:
2095 name_value_pairs.append(("ifGenerationMatch", if_generation_match))
2097 if if_generation_not_match is not None:
2098 name_value_pairs.append(("ifGenerationNotMatch", if_generation_not_match))
2100 if if_metageneration_match is not None:
2101 name_value_pairs.append(("ifMetagenerationMatch", if_metageneration_match))
2103 if if_metageneration_not_match is not None:
2104 name_value_pairs.append(
2105 ("ifMetaGenerationNotMatch", if_metageneration_not_match)
2106 )
2108 upload_url = _add_query_parameters(base_url, name_value_pairs)
2109 upload = ResumableUpload(
2110 upload_url, chunk_size, headers=headers, checksum=checksum
2111 )
2113 upload._retry_strategy = _api_core_retry_to_resumable_media_retry(
2114 retry, num_retries
2115 )
2117 upload.initiate(
2118 transport,
2119 stream,
2120 object_metadata,
2121 content_type,
2122 total_bytes=size,
2123 stream_final=False,
2124 timeout=timeout,
2125 )
2127 return upload, transport
2129 def _do_resumable_upload(
2130 self,
2131 client,
2132 stream,
2133 content_type,
2134 size,
2135 num_retries,
2136 predefined_acl,
2137 if_generation_match,
2138 if_generation_not_match,
2139 if_metageneration_match,
2140 if_metageneration_not_match,
2141 timeout=_DEFAULT_TIMEOUT,
2142 checksum=None,
2143 retry=None,
2144 command=None,
2145 ):
2146 """Perform a resumable upload.
2148 Assumes ``chunk_size`` is not :data:`None` on the current blob.
2149 The default value of ``chunk_size`` is 100 MB.
2151 The content type of the upload will be determined in order
2152 of precedence:
2154 - The value passed in to this method (if not :data:`None`)
2155 - The value stored on the current blob
2156 - The default value ('application/octet-stream')
2158 :type client: :class:`~google.cloud.storage.client.Client`
2159 :param client:
2160 (Optional) The client to use. If not passed, falls back to the
2161 ``client`` stored on the blob's bucket.
2163 :type stream: IO[bytes]
2164 :param stream: A bytes IO object open for reading.
2166 :type content_type: str
2167 :param content_type: Type of content being uploaded (or :data:`None`).
2169 :type size: int
2170 :param size:
2171 The number of bytes to be uploaded (which will be read from
2172 ``stream``). If not provided, the upload will be concluded once
2173 ``stream`` is exhausted (or :data:`None`).
2175 :type num_retries: int
2176 :param num_retries:
2177 Number of upload retries. By default, only uploads with
2178 if_generation_match set will be retried, as uploads without the
2179 argument are not guaranteed to be idempotent. Setting num_retries
2180 will override this default behavior and guarantee retries even when
2181 if_generation_match is not set. (Deprecated: This argument
2182 will be removed in a future release.)
2184 :type predefined_acl: str
2185 :param predefined_acl: (Optional) Predefined access control list
2187 :type if_generation_match: long
2188 :param if_generation_match:
2189 (Optional) See :ref:`using-if-generation-match`
2191 :type if_generation_not_match: long
2192 :param if_generation_not_match:
2193 (Optional) See :ref:`using-if-generation-not-match`
2195 :type if_metageneration_match: long
2196 :param if_metageneration_match:
2197 (Optional) See :ref:`using-if-metageneration-match`
2199 :type if_metageneration_not_match: long
2200 :param if_metageneration_not_match:
2201 (Optional) See :ref:`using-if-metageneration-not-match`
2203 :type timeout: float or tuple
2204 :param timeout:
2205 (Optional) The amount of time, in seconds, to wait
2206 for the server response. See: :ref:`configuring_timeouts`
2208 :type checksum: str
2209 :param checksum:
2210 (Optional) The type of checksum to compute to verify
2211 the integrity of the object. After the upload is complete, the
2212 server-computed checksum of the resulting object will be checked
2213 and google.resumable_media.common.DataCorruption will be raised on
2214 a mismatch. On a validation failure, the client will attempt to
2215 delete the uploaded object automatically. Supported values
2216 are "md5", "crc32c" and None. The default is None.
2218 :type retry: google.api_core.retry.Retry
2219 :param retry: (Optional) How to retry the RPC. A None value will disable
2220 retries. A google.api_core.retry.Retry value will enable retries,
2221 and the object will configure backoff and timeout options. Custom
2222 predicates (customizable error codes) are not supported for media
2223 operations such as this one.
2225 This private method does not accept ConditionalRetryPolicy values
2226 because the information necessary to evaluate the policy is instead
2227 evaluated in blob._do_upload().
2229 See the retry.py source code and docstrings in this package
2230 (google.cloud.storage.retry) for information on retry types and how
2231 to configure them.
2233 :type command: str
2234 :param command:
2235 (Optional) Information about which interface for upload was used,
2236 to be included in the X-Goog-API-Client header. Please leave as None
2237 unless otherwise directed.
2239 :rtype: :class:`~requests.Response`
2240 :returns: The "200 OK" response object returned after the final chunk
2241 is uploaded.
2242 """
2243 upload, transport = self._initiate_resumable_upload(
2244 client,
2245 stream,
2246 content_type,
2247 size,
2248 num_retries,
2249 predefined_acl=predefined_acl,
2250 if_generation_match=if_generation_match,
2251 if_generation_not_match=if_generation_not_match,
2252 if_metageneration_match=if_metageneration_match,
2253 if_metageneration_not_match=if_metageneration_not_match,
2254 timeout=timeout,
2255 checksum=checksum,
2256 retry=retry,
2257 command=command,
2258 )
2259 while not upload.finished:
2260 try:
2261 response = upload.transmit_next_chunk(transport, timeout=timeout)
2262 except resumable_media.DataCorruption:
2263 # Attempt to delete the corrupted object.
2264 self.delete()
2265 raise
2266 return response
2268 def _do_upload(
2269 self,
2270 client,
2271 stream,
2272 content_type,
2273 size,
2274 num_retries,
2275 predefined_acl,
2276 if_generation_match,
2277 if_generation_not_match,
2278 if_metageneration_match,
2279 if_metageneration_not_match,
2280 timeout=_DEFAULT_TIMEOUT,
2281 checksum=None,
2282 retry=None,
2283 command=None,
2284 ):
2285 """Determine an upload strategy and then perform the upload.
2287 If the size of the data to be uploaded exceeds 8 MB a resumable media
2288 request will be used, otherwise the content and the metadata will be
2289 uploaded in a single multipart upload request.
2291 The content type of the upload will be determined in order
2292 of precedence:
2294 - The value passed in to this method (if not :data:`None`)
2295 - The value stored on the current blob
2296 - The default value ('application/octet-stream')
2298 :type client: :class:`~google.cloud.storage.client.Client`
2299 :param client:
2300 (Optional) The client to use. If not passed, falls back to the
2301 ``client`` stored on the blob's bucket.
2303 :type stream: IO[bytes]
2304 :param stream: A bytes IO object open for reading.
2306 :type content_type: str
2307 :param content_type: Type of content being uploaded (or :data:`None`).
2309 :type size: int
2310 :param size:
2311 The number of bytes to be uploaded (which will be read from
2312 ``stream``). If not provided, the upload will be concluded once
2313 ``stream`` is exhausted (or :data:`None`).
2315 :type num_retries: int
2316 :param num_retries:
2317 Number of upload retries. By default, only uploads with
2318 if_generation_match set will be retried, as uploads without the
2319 argument are not guaranteed to be idempotent. Setting num_retries
2320 will override this default behavior and guarantee retries even when
2321 if_generation_match is not set. (Deprecated: This argument
2322 will be removed in a future release.)
2324 :type predefined_acl: str
2325 :param predefined_acl: (Optional) Predefined access control list
2327 :type if_generation_match: long
2328 :param if_generation_match:
2329 (Optional) See :ref:`using-if-generation-match`
2331 :type if_generation_not_match: long
2332 :param if_generation_not_match:
2333 (Optional) See :ref:`using-if-generation-not-match`
2335 :type if_metageneration_match: long
2336 :param if_metageneration_match:
2337 (Optional) See :ref:`using-if-metageneration-match`
2339 :type if_metageneration_not_match: long
2340 :param if_metageneration_not_match:
2341 (Optional) See :ref:`using-if-metageneration-not-match`
2343 :type timeout: float or tuple
2344 :param timeout:
2345 (Optional) The amount of time, in seconds, to wait
2346 for the server response. See: :ref:`configuring_timeouts`
2348 :type checksum: str
2349 :param checksum:
2350 (Optional) The type of checksum to compute to verify
2351 the integrity of the object. If the upload is completed in a single
2352 request, the checksum will be entirely precomputed and the remote
2353 server will handle verification and error handling. If the upload
2354 is too large and must be transmitted in multiple requests, the
2355 checksum will be incrementally computed and the client will handle
2356 verification and error handling, raising
2357 google.resumable_media.common.DataCorruption on a mismatch and
2358 attempting to delete the corrupted file. Supported values are
2359 "md5", "crc32c" and None. The default is None.
2361 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2362 :param retry: (Optional) How to retry the RPC. A None value will disable
2363 retries. A google.api_core.retry.Retry value will enable retries,
2364 and the object will define retriable response codes and errors and
2365 configure backoff and timeout options.
2367 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2368 Retry object and activates it only if certain conditions are met.
2369 This class exists to provide safe defaults for RPC calls that are
2370 not technically safe to retry normally (due to potential data
2371 duplication or other side-effects) but become safe to retry if a
2372 condition such as if_generation_match is set.
2374 See the retry.py source code and docstrings in this package
2375 (google.cloud.storage.retry) for information on retry types and how
2376 to configure them.
2378 Media operations (downloads and uploads) do not support non-default
2379 predicates in a Retry object. The default will always be used. Other
2380 configuration changes for Retry objects such as delays and deadlines
2381 are respected.
2383 :type command: str
2384 :param command:
2385 (Optional) Information about which interface for upload was used,
2386 to be included in the X-Goog-API-Client header. Please leave as None
2387 unless otherwise directed.
2389 :rtype: dict
2390 :returns: The parsed JSON from the "200 OK" response. This will be the
2391 **only** response in the multipart case and it will be the
2392 **final** response in the resumable case.
2393 """
2395 # Handle ConditionalRetryPolicy.
2396 if isinstance(retry, ConditionalRetryPolicy):
2397 # Conditional retries are designed for non-media calls, which change
2398 # arguments into query_params dictionaries. Media operations work
2399 # differently, so here we make a "fake" query_params to feed to the
2400 # ConditionalRetryPolicy.
2401 query_params = {
2402 "ifGenerationMatch": if_generation_match,
2403 "ifMetagenerationMatch": if_metageneration_match,
2404 }
2405 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
2407 if size is not None and size <= _MAX_MULTIPART_SIZE:
2408 response = self._do_multipart_upload(
2409 client,
2410 stream,
2411 content_type,
2412 size,
2413 num_retries,
2414 predefined_acl,
2415 if_generation_match,
2416 if_generation_not_match,
2417 if_metageneration_match,
2418 if_metageneration_not_match,
2419 timeout=timeout,
2420 checksum=checksum,
2421 retry=retry,
2422 command=command,
2423 )
2424 else:
2425 response = self._do_resumable_upload(
2426 client,
2427 stream,
2428 content_type,
2429 size,
2430 num_retries,
2431 predefined_acl,
2432 if_generation_match,
2433 if_generation_not_match,
2434 if_metageneration_match,
2435 if_metageneration_not_match,
2436 timeout=timeout,
2437 checksum=checksum,
2438 retry=retry,
2439 command=command,
2440 )
2442 return response.json()
2444 def _prep_and_do_upload(
2445 self,
2446 file_obj,
2447 rewind=False,
2448 size=None,
2449 content_type=None,
2450 num_retries=None,
2451 client=None,
2452 predefined_acl=None,
2453 if_generation_match=None,
2454 if_generation_not_match=None,
2455 if_metageneration_match=None,
2456 if_metageneration_not_match=None,
2457 timeout=_DEFAULT_TIMEOUT,
2458 checksum=None,
2459 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
2460 command=None,
2461 ):
2462 """Upload the contents of this blob from a file-like object.
2464 The content type of the upload will be determined in order
2465 of precedence:
2467 - The value passed in to this method (if not :data:`None`)
2468 - The value stored on the current blob
2469 - The default value ('application/octet-stream')
2471 .. note::
2472 The effect of uploading to an existing blob depends on the
2473 "versioning" and "lifecycle" policies defined on the blob's
2474 bucket. In the absence of those policies, upload will
2475 overwrite any existing contents.
2477 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2478 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2479 API documents for details.
2481 If the size of the data to be uploaded exceeds 8 MB a resumable media
2482 request will be used, otherwise the content and the metadata will be
2483 uploaded in a single multipart upload request.
2485 For more fine-grained over the upload process, check out
2486 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
2488 If :attr:`user_project` is set on the bucket, bills the API request
2489 to that project.
2491 :type file_obj: file
2492 :param file_obj: A file handle opened in binary mode for reading.
2494 :type rewind: bool
2495 :param rewind:
2496 If True, seek to the beginning of the file handle before writing
2497 the file to Cloud Storage.
2499 :type size: int
2500 :param size:
2501 The number of bytes to be uploaded (which will be read from
2502 ``file_obj``). If not provided, the upload will be concluded once
2503 ``file_obj`` is exhausted.
2505 :type content_type: str
2506 :param content_type: (Optional) Type of content being uploaded.
2508 :type num_retries: int
2509 :param num_retries:
2510 Number of upload retries. By default, only uploads with
2511 if_generation_match set will be retried, as uploads without the
2512 argument are not guaranteed to be idempotent. Setting num_retries
2513 will override this default behavior and guarantee retries even when
2514 if_generation_match is not set. (Deprecated: This argument
2515 will be removed in a future release.)
2517 :type client: :class:`~google.cloud.storage.client.Client`
2518 :param client:
2519 (Optional) The client to use. If not passed, falls back to the
2520 ``client`` stored on the blob's bucket.
2522 :type predefined_acl: str
2523 :param predefined_acl: (Optional) Predefined access control list
2525 :type if_generation_match: long
2526 :param if_generation_match:
2527 (Optional) See :ref:`using-if-generation-match`
2529 :type if_generation_not_match: long
2530 :param if_generation_not_match:
2531 (Optional) See :ref:`using-if-generation-not-match`
2533 :type if_metageneration_match: long
2534 :param if_metageneration_match:
2535 (Optional) See :ref:`using-if-metageneration-match`
2537 :type if_metageneration_not_match: long
2538 :param if_metageneration_not_match:
2539 (Optional) See :ref:`using-if-metageneration-not-match`
2541 :type timeout: float or tuple
2542 :param timeout:
2543 (Optional) The amount of time, in seconds, to wait
2544 for the server response. See: :ref:`configuring_timeouts`
2546 :type checksum: str
2547 :param checksum:
2548 (Optional) The type of checksum to compute to verify
2549 the integrity of the object. If the upload is completed in a single
2550 request, the checksum will be entirely precomputed and the remote
2551 server will handle verification and error handling. If the upload
2552 is too large and must be transmitted in multiple requests, the
2553 checksum will be incrementally computed and the client will handle
2554 verification and error handling, raising
2555 google.resumable_media.common.DataCorruption on a mismatch and
2556 attempting to delete the corrupted file. Supported values are
2557 "md5", "crc32c" and None. The default is None.
2559 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2560 :param retry: (Optional) How to retry the RPC. A None value will disable
2561 retries. A google.api_core.retry.Retry value will enable retries,
2562 and the object will define retriable response codes and errors and
2563 configure backoff and timeout options.
2565 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2566 Retry object and activates it only if certain conditions are met.
2567 This class exists to provide safe defaults for RPC calls that are
2568 not technically safe to retry normally (due to potential data
2569 duplication or other side-effects) but become safe to retry if a
2570 condition such as if_generation_match is set.
2572 See the retry.py source code and docstrings in this package
2573 (google.cloud.storage.retry) for information on retry types and how
2574 to configure them.
2576 Media operations (downloads and uploads) do not support non-default
2577 predicates in a Retry object. The default will always be used. Other
2578 configuration changes for Retry objects such as delays and deadlines
2579 are respected.
2581 :type command: str
2582 :param command:
2583 (Optional) Information about which interface for upload was used,
2584 to be included in the X-Goog-API-Client header. Please leave as None
2585 unless otherwise directed.
2587 :raises: :class:`~google.cloud.exceptions.GoogleCloudError`
2588 if the upload response returns an error status.
2589 """
2590 if num_retries is not None:
2591 warnings.warn(_NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2)
2592 # num_retries and retry are mutually exclusive. If num_retries is
2593 # set and retry is exactly the default, then nullify retry for
2594 # backwards compatibility.
2595 if retry is DEFAULT_RETRY_IF_GENERATION_SPECIFIED:
2596 retry = None
2598 _maybe_rewind(file_obj, rewind=rewind)
2599 predefined_acl = ACL.validate_predefined(predefined_acl)
2601 try:
2602 created_json = self._do_upload(
2603 client,
2604 file_obj,
2605 content_type,
2606 size,
2607 num_retries,
2608 predefined_acl,
2609 if_generation_match,
2610 if_generation_not_match,
2611 if_metageneration_match,
2612 if_metageneration_not_match,
2613 timeout=timeout,
2614 checksum=checksum,
2615 retry=retry,
2616 command=command,
2617 )
2618 self._set_properties(created_json)
2619 except resumable_media.InvalidResponse as exc:
2620 _raise_from_invalid_response(exc)
2622 def upload_from_file(
2623 self,
2624 file_obj,
2625 rewind=False,
2626 size=None,
2627 content_type=None,
2628 num_retries=None,
2629 client=None,
2630 predefined_acl=None,
2631 if_generation_match=None,
2632 if_generation_not_match=None,
2633 if_metageneration_match=None,
2634 if_metageneration_not_match=None,
2635 timeout=_DEFAULT_TIMEOUT,
2636 checksum=None,
2637 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
2638 ):
2639 """Upload the contents of this blob from a file-like object.
2641 The content type of the upload will be determined in order
2642 of precedence:
2644 - The value passed in to this method (if not :data:`None`)
2645 - The value stored on the current blob
2646 - The default value ('application/octet-stream')
2648 .. note::
2649 The effect of uploading to an existing blob depends on the
2650 "versioning" and "lifecycle" policies defined on the blob's
2651 bucket. In the absence of those policies, upload will
2652 overwrite any existing contents.
2654 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2655 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2656 API documents for details.
2658 If the size of the data to be uploaded exceeds 8 MB a resumable media
2659 request will be used, otherwise the content and the metadata will be
2660 uploaded in a single multipart upload request.
2662 For more fine-grained over the upload process, check out
2663 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
2665 If :attr:`user_project` is set on the bucket, bills the API request
2666 to that project.
2668 :type file_obj: file
2669 :param file_obj: A file handle opened in binary mode for reading.
2671 :type rewind: bool
2672 :param rewind:
2673 If True, seek to the beginning of the file handle before writing
2674 the file to Cloud Storage.
2676 :type size: int
2677 :param size:
2678 The number of bytes to be uploaded (which will be read from
2679 ``file_obj``). If not provided, the upload will be concluded once
2680 ``file_obj`` is exhausted.
2682 :type content_type: str
2683 :param content_type: (Optional) Type of content being uploaded.
2685 :type num_retries: int
2686 :param num_retries:
2687 Number of upload retries. By default, only uploads with
2688 if_generation_match set will be retried, as uploads without the
2689 argument are not guaranteed to be idempotent. Setting num_retries
2690 will override this default behavior and guarantee retries even when
2691 if_generation_match is not set. (Deprecated: This argument
2692 will be removed in a future release.)
2694 :type client: :class:`~google.cloud.storage.client.Client`
2695 :param client:
2696 (Optional) The client to use. If not passed, falls back to the
2697 ``client`` stored on the blob's bucket.
2699 :type predefined_acl: str
2700 :param predefined_acl: (Optional) Predefined access control list
2702 :type if_generation_match: long
2703 :param if_generation_match:
2704 (Optional) See :ref:`using-if-generation-match`
2706 :type if_generation_not_match: long
2707 :param if_generation_not_match:
2708 (Optional) See :ref:`using-if-generation-not-match`
2710 :type if_metageneration_match: long
2711 :param if_metageneration_match:
2712 (Optional) See :ref:`using-if-metageneration-match`
2714 :type if_metageneration_not_match: long
2715 :param if_metageneration_not_match:
2716 (Optional) See :ref:`using-if-metageneration-not-match`
2718 :type timeout: float or tuple
2719 :param timeout:
2720 (Optional) The amount of time, in seconds, to wait
2721 for the server response. See: :ref:`configuring_timeouts`
2723 :type checksum: str
2724 :param checksum:
2725 (Optional) The type of checksum to compute to verify
2726 the integrity of the object. If the upload is completed in a single
2727 request, the checksum will be entirely precomputed and the remote
2728 server will handle verification and error handling. If the upload
2729 is too large and must be transmitted in multiple requests, the
2730 checksum will be incrementally computed and the client will handle
2731 verification and error handling, raising
2732 google.resumable_media.common.DataCorruption on a mismatch and
2733 attempting to delete the corrupted file. Supported values are
2734 "md5", "crc32c" and None. The default is None.
2736 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2737 :param retry: (Optional) How to retry the RPC. A None value will disable
2738 retries. A google.api_core.retry.Retry value will enable retries,
2739 and the object will define retriable response codes and errors and
2740 configure backoff and timeout options.
2742 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2743 Retry object and activates it only if certain conditions are met.
2744 This class exists to provide safe defaults for RPC calls that are
2745 not technically safe to retry normally (due to potential data
2746 duplication or other side-effects) but become safe to retry if a
2747 condition such as if_generation_match is set.
2749 See the retry.py source code and docstrings in this package
2750 (google.cloud.storage.retry) for information on retry types and how
2751 to configure them.
2753 Media operations (downloads and uploads) do not support non-default
2754 predicates in a Retry object. The default will always be used. Other
2755 configuration changes for Retry objects such as delays and deadlines
2756 are respected.
2758 :raises: :class:`~google.cloud.exceptions.GoogleCloudError`
2759 if the upload response returns an error status.
2760 """
2761 self._prep_and_do_upload(
2762 file_obj,
2763 rewind=rewind,
2764 size=size,
2765 content_type=content_type,
2766 num_retries=num_retries,
2767 client=client,
2768 predefined_acl=predefined_acl,
2769 if_generation_match=if_generation_match,
2770 if_generation_not_match=if_generation_not_match,
2771 if_metageneration_match=if_metageneration_match,
2772 if_metageneration_not_match=if_metageneration_not_match,
2773 timeout=timeout,
2774 checksum=checksum,
2775 retry=retry,
2776 )
2778 def _handle_filename_and_upload(self, filename, content_type=None, *args, **kwargs):
2779 """Upload this blob's contents from the content of a named file.
2781 :type filename: str
2782 :param filename: The path to the file.
2784 :type content_type: str
2785 :param content_type: (Optional) Type of content being uploaded.
2787 For *args and **kwargs, refer to the documentation for upload_from_filename() for more information.
2788 """
2790 content_type = self._get_content_type(content_type, filename=filename)
2792 with open(filename, "rb") as file_obj:
2793 total_bytes = os.fstat(file_obj.fileno()).st_size
2794 self._prep_and_do_upload(
2795 file_obj,
2796 content_type=content_type,
2797 size=total_bytes,
2798 *args,
2799 **kwargs,
2800 )
2802 def upload_from_filename(
2803 self,
2804 filename,
2805 content_type=None,
2806 num_retries=None,
2807 client=None,
2808 predefined_acl=None,
2809 if_generation_match=None,
2810 if_generation_not_match=None,
2811 if_metageneration_match=None,
2812 if_metageneration_not_match=None,
2813 timeout=_DEFAULT_TIMEOUT,
2814 checksum=None,
2815 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
2816 ):
2817 """Upload this blob's contents from the content of a named file.
2819 The content type of the upload will be determined in order
2820 of precedence:
2822 - The value passed in to this method (if not :data:`None`)
2823 - The value stored on the current blob
2824 - The value given by ``mimetypes.guess_type``
2825 - The default value ('application/octet-stream')
2827 .. note::
2828 The effect of uploading to an existing blob depends on the
2829 "versioning" and "lifecycle" policies defined on the blob's
2830 bucket. In the absence of those policies, upload will
2831 overwrite any existing contents.
2833 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2834 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2835 API documents for details.
2837 If :attr:`user_project` is set on the bucket, bills the API request
2838 to that project.
2840 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-upload-encrypted-file#storage_upload_encrypted_file-python)
2841 to upload a file with a
2842 [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied).
2844 :type filename: str
2845 :param filename: The path to the file.
2847 :type content_type: str
2848 :param content_type: (Optional) Type of content being uploaded.
2850 :type client: :class:`~google.cloud.storage.client.Client`
2851 :param client:
2852 (Optional) The client to use. If not passed, falls back to the
2853 ``client`` stored on the blob's bucket.
2855 :type num_retries: int
2856 :param num_retries:
2857 Number of upload retries. By default, only uploads with
2858 if_generation_match set will be retried, as uploads without the
2859 argument are not guaranteed to be idempotent. Setting num_retries
2860 will override this default behavior and guarantee retries even when
2861 if_generation_match is not set. (Deprecated: This argument
2862 will be removed in a future release.)
2864 :type predefined_acl: str
2865 :param predefined_acl: (Optional) Predefined access control list
2867 :type if_generation_match: long
2868 :param if_generation_match:
2869 (Optional) See :ref:`using-if-generation-match`
2871 :type if_generation_not_match: long
2872 :param if_generation_not_match:
2873 (Optional) See :ref:`using-if-generation-not-match`
2875 :type if_metageneration_match: long
2876 :param if_metageneration_match:
2877 (Optional) See :ref:`using-if-metageneration-match`
2879 :type if_metageneration_not_match: long
2880 :param if_metageneration_not_match:
2881 (Optional) See :ref:`using-if-metageneration-not-match`
2883 :type timeout: float or tuple
2884 :param timeout:
2885 (Optional) The amount of time, in seconds, to wait
2886 for the server response. See: :ref:`configuring_timeouts`
2888 :type checksum: str
2889 :param checksum:
2890 (Optional) The type of checksum to compute to verify
2891 the integrity of the object. If the upload is completed in a single
2892 request, the checksum will be entirely precomputed and the remote
2893 server will handle verification and error handling. If the upload
2894 is too large and must be transmitted in multiple requests, the
2895 checksum will be incrementally computed and the client will handle
2896 verification and error handling, raising
2897 google.resumable_media.common.DataCorruption on a mismatch and
2898 attempting to delete the corrupted file. Supported values are
2899 "md5", "crc32c" and None. The default is None.
2901 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2902 :param retry: (Optional) How to retry the RPC. A None value will disable
2903 retries. A google.api_core.retry.Retry value will enable retries,
2904 and the object will define retriable response codes and errors and
2905 configure backoff and timeout options.
2907 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2908 Retry object and activates it only if certain conditions are met.
2909 This class exists to provide safe defaults for RPC calls that are
2910 not technically safe to retry normally (due to potential data
2911 duplication or other side-effects) but become safe to retry if a
2912 condition such as if_generation_match is set.
2914 See the retry.py source code and docstrings in this package
2915 (google.cloud.storage.retry) for information on retry types and how
2916 to configure them.
2918 Media operations (downloads and uploads) do not support non-default
2919 predicates in a Retry object. The default will always be used. Other
2920 configuration changes for Retry objects such as delays and deadlines
2921 are respected.
2922 """
2924 self._handle_filename_and_upload(
2925 filename,
2926 content_type=content_type,
2927 num_retries=num_retries,
2928 client=client,
2929 predefined_acl=predefined_acl,
2930 if_generation_match=if_generation_match,
2931 if_generation_not_match=if_generation_not_match,
2932 if_metageneration_match=if_metageneration_match,
2933 if_metageneration_not_match=if_metageneration_not_match,
2934 timeout=timeout,
2935 checksum=checksum,
2936 retry=retry,
2937 )
2939 def upload_from_string(
2940 self,
2941 data,
2942 content_type="text/plain",
2943 num_retries=None,
2944 client=None,
2945 predefined_acl=None,
2946 if_generation_match=None,
2947 if_generation_not_match=None,
2948 if_metageneration_match=None,
2949 if_metageneration_not_match=None,
2950 timeout=_DEFAULT_TIMEOUT,
2951 checksum=None,
2952 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
2953 ):
2954 """Upload contents of this blob from the provided string.
2956 .. note::
2957 The effect of uploading to an existing blob depends on the
2958 "versioning" and "lifecycle" policies defined on the blob's
2959 bucket. In the absence of those policies, upload will
2960 overwrite any existing contents.
2962 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2963 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2964 API documents for details.
2966 If :attr:`user_project` is set on the bucket, bills the API request
2967 to that project.
2969 :type data: bytes or str
2970 :param data:
2971 The data to store in this blob. If the value is text, it will be
2972 encoded as UTF-8.
2974 :type content_type: str
2975 :param content_type:
2976 (Optional) Type of content being uploaded. Defaults to
2977 ``'text/plain'``.
2979 :type num_retries: int
2980 :param num_retries:
2981 Number of upload retries. By default, only uploads with
2982 if_generation_match set will be retried, as uploads without the
2983 argument are not guaranteed to be idempotent. Setting num_retries
2984 will override this default behavior and guarantee retries even when
2985 if_generation_match is not set. (Deprecated: This argument
2986 will be removed in a future release.)
2988 :type client: :class:`~google.cloud.storage.client.Client`
2989 :param client:
2990 (Optional) The client to use. If not passed, falls back to the
2991 ``client`` stored on the blob's bucket.
2993 :type predefined_acl: str
2994 :param predefined_acl: (Optional) Predefined access control list
2996 :type if_generation_match: long
2997 :param if_generation_match:
2998 (Optional) See :ref:`using-if-generation-match`
3000 :type if_generation_not_match: long
3001 :param if_generation_not_match:
3002 (Optional) See :ref:`using-if-generation-not-match`
3004 :type if_metageneration_match: long
3005 :param if_metageneration_match:
3006 (Optional) See :ref:`using-if-metageneration-match`
3008 :type if_metageneration_not_match: long
3009 :param if_metageneration_not_match:
3010 (Optional) See :ref:`using-if-metageneration-not-match`
3012 :type timeout: float or tuple
3013 :param timeout:
3014 (Optional) The amount of time, in seconds, to wait
3015 for the server response. See: :ref:`configuring_timeouts`
3017 :type checksum: str
3018 :param checksum:
3019 (Optional) The type of checksum to compute to verify
3020 the integrity of the object. If the upload is completed in a single
3021 request, the checksum will be entirely precomputed and the remote
3022 server will handle verification and error handling. If the upload
3023 is too large and must be transmitted in multiple requests, the
3024 checksum will be incrementally computed and the client will handle
3025 verification and error handling, raising
3026 google.resumable_media.common.DataCorruption on a mismatch and
3027 attempting to delete the corrupted file. Supported values are
3028 "md5", "crc32c" and None. The default is None.
3030 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3031 :param retry: (Optional) How to retry the RPC. A None value will disable
3032 retries. A google.api_core.retry.Retry value will enable retries,
3033 and the object will define retriable response codes and errors and
3034 configure backoff and timeout options.
3036 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3037 Retry object and activates it only if certain conditions are met.
3038 This class exists to provide safe defaults for RPC calls that are
3039 not technically safe to retry normally (due to potential data
3040 duplication or other side-effects) but become safe to retry if a
3041 condition such as if_generation_match is set.
3043 See the retry.py source code and docstrings in this package
3044 (google.cloud.storage.retry) for information on retry types and how
3045 to configure them.
3047 Media operations (downloads and uploads) do not support non-default
3048 predicates in a Retry object. The default will always be used. Other
3049 configuration changes for Retry objects such as delays and deadlines
3050 are respected.
3051 """
3052 data = _to_bytes(data, encoding="utf-8")
3053 string_buffer = BytesIO(data)
3054 self.upload_from_file(
3055 file_obj=string_buffer,
3056 size=len(data),
3057 content_type=content_type,
3058 num_retries=num_retries,
3059 client=client,
3060 predefined_acl=predefined_acl,
3061 if_generation_match=if_generation_match,
3062 if_generation_not_match=if_generation_not_match,
3063 if_metageneration_match=if_metageneration_match,
3064 if_metageneration_not_match=if_metageneration_not_match,
3065 timeout=timeout,
3066 checksum=checksum,
3067 retry=retry,
3068 )
3070 def create_resumable_upload_session(
3071 self,
3072 content_type=None,
3073 size=None,
3074 origin=None,
3075 client=None,
3076 timeout=_DEFAULT_TIMEOUT,
3077 checksum=None,
3078 predefined_acl=None,
3079 if_generation_match=None,
3080 if_generation_not_match=None,
3081 if_metageneration_match=None,
3082 if_metageneration_not_match=None,
3083 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3084 ):
3085 """Create a resumable upload session.
3087 Resumable upload sessions allow you to start an upload session from
3088 one client and complete the session in another. This method is called
3089 by the initiator to set the metadata and limits. The initiator then
3090 passes the session URL to the client that will upload the binary data.
3091 The client performs a PUT request on the session URL to complete the
3092 upload. This process allows untrusted clients to upload to an
3093 access-controlled bucket.
3095 For more details, see the
3096 documentation on [`signed URLs`](https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable).
3098 The content type of the upload will be determined in order
3099 of precedence:
3101 - The value passed in to this method (if not :data:`None`)
3102 - The value stored on the current blob
3103 - The default value ('application/octet-stream')
3105 .. note::
3106 The effect of uploading to an existing blob depends on the
3107 "versioning" and "lifecycle" policies defined on the blob's
3108 bucket. In the absence of those policies, upload will
3109 overwrite any existing contents.
3111 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3112 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3113 API documents for details.
3115 If :attr:`encryption_key` is set, the blob will be encrypted with
3116 a [`customer-supplied`](https://cloud.google.com/storage/docs/encryption#customer-supplied)
3117 encryption key.
3119 If :attr:`user_project` is set on the bucket, bills the API request
3120 to that project.
3122 :type size: int
3123 :param size:
3124 (Optional) The maximum number of bytes that can be uploaded using
3125 this session. If the size is not known when creating the session,
3126 this should be left blank.
3128 :type content_type: str
3129 :param content_type: (Optional) Type of content being uploaded.
3131 :type origin: str
3132 :param origin:
3133 (Optional) If set, the upload can only be completed by a user-agent
3134 that uploads from the given origin. This can be useful when passing
3135 the session to a web client.
3137 :type client: :class:`~google.cloud.storage.client.Client`
3138 :param client:
3139 (Optional) The client to use. If not passed, falls back to the
3140 ``client`` stored on the blob's bucket.
3142 :type timeout: float or tuple
3143 :param timeout:
3144 (Optional) The amount of time, in seconds, to wait
3145 for the server response. See: :ref:`configuring_timeouts`
3147 :type checksum: str
3148 :param checksum:
3149 (Optional) The type of checksum to compute to verify
3150 the integrity of the object. After the upload is complete, the
3151 server-computed checksum of the resulting object will be checked
3152 and google.resumable_media.common.DataCorruption will be raised on
3153 a mismatch. On a validation failure, the client will attempt to
3154 delete the uploaded object automatically. Supported values
3155 are "md5", "crc32c" and None. The default is None.
3157 :type predefined_acl: str
3158 :param predefined_acl: (Optional) Predefined access control list
3160 :type if_generation_match: long
3161 :param if_generation_match:
3162 (Optional) See :ref:`using-if-generation-match`
3164 :type if_generation_not_match: long
3165 :param if_generation_not_match:
3166 (Optional) See :ref:`using-if-generation-not-match`
3168 :type if_metageneration_match: long
3169 :param if_metageneration_match:
3170 (Optional) See :ref:`using-if-metageneration-match`
3172 :type if_metageneration_not_match: long
3173 :param if_metageneration_not_match:
3174 (Optional) See :ref:`using-if-metageneration-not-match`
3176 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3177 :param retry: (Optional) How to retry the RPC. A None value will disable
3178 retries. A google.api_core.retry.Retry value will enable retries,
3179 and the object will define retriable response codes and errors and
3180 configure backoff and timeout options.
3181 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3182 Retry object and activates it only if certain conditions are met.
3183 This class exists to provide safe defaults for RPC calls that are
3184 not technically safe to retry normally (due to potential data
3185 duplication or other side-effects) but become safe to retry if a
3186 condition such as if_generation_match is set.
3187 See the retry.py source code and docstrings in this package
3188 (google.cloud.storage.retry) for information on retry types and how
3189 to configure them.
3190 Media operations (downloads and uploads) do not support non-default
3191 predicates in a Retry object. The default will always be used. Other
3192 configuration changes for Retry objects such as delays and deadlines
3193 are respected.
3195 :rtype: str
3196 :returns: The resumable upload session URL. The upload can be
3197 completed by making an HTTP PUT request with the
3198 file's contents.
3200 :raises: :class:`google.cloud.exceptions.GoogleCloudError`
3201 if the session creation response returns an error status.
3202 """
3204 # Handle ConditionalRetryPolicy.
3205 if isinstance(retry, ConditionalRetryPolicy):
3206 # Conditional retries are designed for non-media calls, which change
3207 # arguments into query_params dictionaries. Media operations work
3208 # differently, so here we make a "fake" query_params to feed to the
3209 # ConditionalRetryPolicy.
3210 query_params = {
3211 "ifGenerationMatch": if_generation_match,
3212 "ifMetagenerationMatch": if_metageneration_match,
3213 }
3214 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
3216 extra_headers = {}
3217 if origin is not None:
3218 # This header is specifically for client-side uploads, it
3219 # determines the origins allowed for CORS.
3220 extra_headers["Origin"] = origin
3222 try:
3223 fake_stream = BytesIO(b"")
3224 # Send a fake the chunk size which we **know** will be acceptable
3225 # to the `ResumableUpload` constructor. The chunk size only
3226 # matters when **sending** bytes to an upload.
3227 upload, _ = self._initiate_resumable_upload(
3228 client,
3229 fake_stream,
3230 content_type,
3231 size,
3232 None,
3233 predefined_acl=predefined_acl,
3234 if_generation_match=if_generation_match,
3235 if_generation_not_match=if_generation_not_match,
3236 if_metageneration_match=if_metageneration_match,
3237 if_metageneration_not_match=if_metageneration_not_match,
3238 extra_headers=extra_headers,
3239 chunk_size=self._CHUNK_SIZE_MULTIPLE,
3240 timeout=timeout,
3241 checksum=checksum,
3242 retry=retry,
3243 )
3245 return upload.resumable_url
3246 except resumable_media.InvalidResponse as exc:
3247 _raise_from_invalid_response(exc)
3249 def get_iam_policy(
3250 self,
3251 client=None,
3252 requested_policy_version=None,
3253 timeout=_DEFAULT_TIMEOUT,
3254 retry=DEFAULT_RETRY,
3255 ):
3256 """Retrieve the IAM policy for the object.
3258 .. note::
3260 Blob- / object-level IAM support does not yet exist and methods
3261 currently call an internal ACL backend not providing any utility
3262 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3263 in the future and is currently undocumented. Use :attr:`acl` for
3264 managing object access control.
3266 If :attr:`user_project` is set on the bucket, bills the API request
3267 to that project.
3269 :type client: :class:`~google.cloud.storage.client.Client`
3270 :param client:
3271 (Optional) The client to use. If not passed, falls back to the
3272 ``client`` stored on the current object's bucket.
3274 :type requested_policy_version: int or ``NoneType``
3275 :param requested_policy_version:
3276 (Optional) The version of IAM policies to request. If a policy
3277 with a condition is requested without setting this, the server will
3278 return an error. This must be set to a value of 3 to retrieve IAM
3279 policies containing conditions. This is to prevent client code that
3280 isn't aware of IAM conditions from interpreting and modifying
3281 policies incorrectly. The service might return a policy with
3282 version lower than the one that was requested, based on the feature
3283 syntax in the policy fetched.
3285 :type timeout: float or tuple
3286 :param timeout:
3287 (Optional) The amount of time, in seconds, to wait
3288 for the server response. See: :ref:`configuring_timeouts`
3290 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3291 :param retry:
3292 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3294 :rtype: :class:`google.api_core.iam.Policy`
3295 :returns: the policy instance, based on the resource returned from
3296 the ``getIamPolicy`` API request.
3297 """
3298 client = self._require_client(client)
3300 query_params = {}
3302 if self.user_project is not None:
3303 query_params["userProject"] = self.user_project
3305 if requested_policy_version is not None:
3306 query_params["optionsRequestedPolicyVersion"] = requested_policy_version
3308 info = client._get_resource(
3309 f"{self.path}/iam",
3310 query_params=query_params,
3311 timeout=timeout,
3312 retry=retry,
3313 _target_object=None,
3314 )
3315 return Policy.from_api_repr(info)
3317 def set_iam_policy(
3318 self,
3319 policy,
3320 client=None,
3321 timeout=_DEFAULT_TIMEOUT,
3322 retry=DEFAULT_RETRY_IF_ETAG_IN_JSON,
3323 ):
3324 """Update the IAM policy for the bucket.
3326 .. note::
3328 Blob- / object-level IAM support does not yet exist and methods
3329 currently call an internal ACL backend not providing any utility
3330 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3331 in the future and is currently undocumented. Use :attr:`acl` for
3332 managing object access control.
3334 If :attr:`user_project` is set on the bucket, bills the API request
3335 to that project.
3337 :type policy: :class:`google.api_core.iam.Policy`
3338 :param policy: policy instance used to update bucket's IAM policy.
3340 :type client: :class:`~google.cloud.storage.client.Client`
3341 :param client:
3342 (Optional) The client to use. If not passed, falls back to the
3343 ``client`` stored on the current bucket.
3345 :type timeout: float or tuple
3346 :param timeout:
3347 (Optional) The amount of time, in seconds, to wait
3348 for the server response. See: :ref:`configuring_timeouts`
3350 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3351 :param retry:
3352 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3354 :rtype: :class:`google.api_core.iam.Policy`
3355 :returns: the policy instance, based on the resource returned from
3356 the ``setIamPolicy`` API request.
3357 """
3358 client = self._require_client(client)
3360 query_params = {}
3362 if self.user_project is not None:
3363 query_params["userProject"] = self.user_project
3365 path = f"{self.path}/iam"
3366 resource = policy.to_api_repr()
3367 resource["resourceId"] = self.path
3368 info = client._put_resource(
3369 path,
3370 resource,
3371 query_params=query_params,
3372 timeout=timeout,
3373 retry=retry,
3374 _target_object=None,
3375 )
3376 return Policy.from_api_repr(info)
3378 def test_iam_permissions(
3379 self, permissions, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY
3380 ):
3381 """API call: test permissions
3383 .. note::
3385 Blob- / object-level IAM support does not yet exist and methods
3386 currently call an internal ACL backend not providing any utility
3387 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3388 in the future and is currently undocumented. Use :attr:`acl` for
3389 managing object access control.
3391 If :attr:`user_project` is set on the bucket, bills the API request
3392 to that project.
3394 :type permissions: list of string
3395 :param permissions: the permissions to check
3397 :type client: :class:`~google.cloud.storage.client.Client`
3398 :param client:
3399 (Optional) The client to use. If not passed, falls back to the
3400 ``client`` stored on the current bucket.
3402 :type timeout: float or tuple
3403 :param timeout:
3404 (Optional) The amount of time, in seconds, to wait
3405 for the server response. See: :ref:`configuring_timeouts`
3407 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3408 :param retry:
3409 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3411 :rtype: list of string
3412 :returns: the permissions returned by the ``testIamPermissions`` API
3413 request.
3414 """
3415 client = self._require_client(client)
3416 query_params = {"permissions": permissions}
3418 if self.user_project is not None:
3419 query_params["userProject"] = self.user_project
3421 path = f"{self.path}/iam/testPermissions"
3422 resp = client._get_resource(
3423 path,
3424 query_params=query_params,
3425 timeout=timeout,
3426 retry=retry,
3427 _target_object=None,
3428 )
3430 return resp.get("permissions", [])
3432 def make_public(
3433 self,
3434 client=None,
3435 timeout=_DEFAULT_TIMEOUT,
3436 if_generation_match=None,
3437 if_generation_not_match=None,
3438 if_metageneration_match=None,
3439 if_metageneration_not_match=None,
3440 retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED,
3441 ):
3442 """Update blob's ACL, granting read access to anonymous users.
3444 :type client: :class:`~google.cloud.storage.client.Client` or
3445 ``NoneType``
3446 :param client: (Optional) The client to use. If not passed, falls back
3447 to the ``client`` stored on the blob's bucket.
3449 :type timeout: float or tuple
3450 :param timeout:
3451 (Optional) The amount of time, in seconds, to wait
3452 for the server response. See: :ref:`configuring_timeouts`
3454 :type if_generation_match: long
3455 :param if_generation_match:
3456 (Optional) See :ref:`using-if-generation-match`
3458 :type if_generation_not_match: long
3459 :param if_generation_not_match:
3460 (Optional) See :ref:`using-if-generation-not-match`
3462 :type if_metageneration_match: long
3463 :param if_metageneration_match:
3464 (Optional) See :ref:`using-if-metageneration-match`
3466 :type if_metageneration_not_match: long
3467 :param if_metageneration_not_match:
3468 (Optional) See :ref:`using-if-metageneration-not-match`
3470 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3471 :param retry:
3472 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3473 """
3474 self.acl.all().grant_read()
3475 self.acl.save(
3476 client=client,
3477 timeout=timeout,
3478 if_generation_match=if_generation_match,
3479 if_generation_not_match=if_generation_not_match,
3480 if_metageneration_match=if_metageneration_match,
3481 if_metageneration_not_match=if_metageneration_not_match,
3482 retry=retry,
3483 )
3485 def make_private(
3486 self,
3487 client=None,
3488 timeout=_DEFAULT_TIMEOUT,
3489 if_generation_match=None,
3490 if_generation_not_match=None,
3491 if_metageneration_match=None,
3492 if_metageneration_not_match=None,
3493 retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED,
3494 ):
3495 """Update blob's ACL, revoking read access for anonymous users.
3497 :type client: :class:`~google.cloud.storage.client.Client` or
3498 ``NoneType``
3499 :param client: (Optional) The client to use. If not passed, falls back
3500 to the ``client`` stored on the blob's bucket.
3502 :type timeout: float or tuple
3503 :param timeout:
3504 (Optional) The amount of time, in seconds, to wait
3505 for the server response. See: :ref:`configuring_timeouts`
3507 :type if_generation_match: long
3508 :param if_generation_match:
3509 (Optional) See :ref:`using-if-generation-match`
3511 :type if_generation_not_match: long
3512 :param if_generation_not_match:
3513 (Optional) See :ref:`using-if-generation-not-match`
3515 :type if_metageneration_match: long
3516 :param if_metageneration_match:
3517 (Optional) See :ref:`using-if-metageneration-match`
3519 :type if_metageneration_not_match: long
3520 :param if_metageneration_not_match:
3521 (Optional) See :ref:`using-if-metageneration-not-match`
3523 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3524 :param retry:
3525 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3526 """
3527 self.acl.all().revoke_read()
3528 self.acl.save(
3529 client=client,
3530 timeout=timeout,
3531 if_generation_match=if_generation_match,
3532 if_generation_not_match=if_generation_not_match,
3533 if_metageneration_match=if_metageneration_match,
3534 if_metageneration_not_match=if_metageneration_not_match,
3535 retry=retry,
3536 )
3538 def compose(
3539 self,
3540 sources,
3541 client=None,
3542 timeout=_DEFAULT_TIMEOUT,
3543 if_generation_match=None,
3544 if_metageneration_match=None,
3545 if_source_generation_match=None,
3546 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3547 ):
3548 """Concatenate source blobs into this one.
3550 If :attr:`user_project` is set on the bucket, bills the API request
3551 to that project.
3553 See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/objects/compose)
3554 and a [code sample](https://cloud.google.com/storage/docs/samples/storage-compose-file#storage_compose_file-python).
3556 :type sources: list of :class:`Blob`
3557 :param sources: Blobs whose contents will be composed into this blob.
3559 :type client: :class:`~google.cloud.storage.client.Client`
3560 :param client:
3561 (Optional) The client to use. If not passed, falls back to the
3562 ``client`` stored on the blob's bucket.
3564 :type timeout: float or tuple
3565 :param timeout:
3566 (Optional) The amount of time, in seconds, to wait
3567 for the server response. See: :ref:`configuring_timeouts`
3569 :type if_generation_match: long
3570 :param if_generation_match:
3571 (Optional) Makes the operation conditional on whether the
3572 destination object's current generation matches the given value.
3573 Setting to 0 makes the operation succeed only if there are no live
3574 versions of the object.
3575 Note: In a previous version, this argument worked identically to the
3576 ``if_source_generation_match`` argument. For
3577 backwards-compatibility reasons, if a list is passed in,
3578 this argument will behave like ``if_source_generation_match``
3579 and also issue a DeprecationWarning.
3581 :type if_metageneration_match: long
3582 :param if_metageneration_match:
3583 (Optional) Makes the operation conditional on whether the
3584 destination object's current metageneration matches the given
3585 value.
3587 If a list of long is passed in, no match operation will be
3588 performed. (Deprecated: type(list of long) is supported for
3589 backwards-compatability reasons only.)
3591 :type if_source_generation_match: list of long
3592 :param if_source_generation_match:
3593 (Optional) Makes the operation conditional on whether the current
3594 generation of each source blob matches the corresponding generation.
3595 The list must match ``sources`` item-to-item.
3597 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3598 :param retry:
3599 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3600 """
3601 sources_len = len(sources)
3602 client = self._require_client(client)
3603 query_params = {}
3605 if isinstance(if_generation_match, list):
3606 warnings.warn(
3607 _COMPOSE_IF_GENERATION_LIST_DEPRECATED,
3608 DeprecationWarning,
3609 stacklevel=2,
3610 )
3612 if if_source_generation_match is not None:
3613 raise ValueError(
3614 _COMPOSE_IF_GENERATION_LIST_AND_IF_SOURCE_GENERATION_ERROR
3615 )
3617 if_source_generation_match = if_generation_match
3618 if_generation_match = None
3620 if isinstance(if_metageneration_match, list):
3621 warnings.warn(
3622 _COMPOSE_IF_METAGENERATION_LIST_DEPRECATED,
3623 DeprecationWarning,
3624 stacklevel=2,
3625 )
3627 if_metageneration_match = None
3629 if if_source_generation_match is None:
3630 if_source_generation_match = [None] * sources_len
3631 if len(if_source_generation_match) != sources_len:
3632 raise ValueError(_COMPOSE_IF_SOURCE_GENERATION_MISMATCH_ERROR)
3634 source_objects = []
3635 for source, source_generation in zip(sources, if_source_generation_match):
3636 source_object = {"name": source.name, "generation": source.generation}
3638 preconditions = {}
3639 if source_generation is not None:
3640 preconditions["ifGenerationMatch"] = source_generation
3642 if preconditions:
3643 source_object["objectPreconditions"] = preconditions
3645 source_objects.append(source_object)
3647 request = {
3648 "sourceObjects": source_objects,
3649 "destination": self._properties.copy(),
3650 }
3652 if self.user_project is not None:
3653 query_params["userProject"] = self.user_project
3655 _add_generation_match_parameters(
3656 query_params,
3657 if_generation_match=if_generation_match,
3658 if_metageneration_match=if_metageneration_match,
3659 )
3661 api_response = client._post_resource(
3662 f"{self.path}/compose",
3663 request,
3664 query_params=query_params,
3665 timeout=timeout,
3666 retry=retry,
3667 _target_object=self,
3668 )
3669 self._set_properties(api_response)
3671 def rewrite(
3672 self,
3673 source,
3674 token=None,
3675 client=None,
3676 if_generation_match=None,
3677 if_generation_not_match=None,
3678 if_metageneration_match=None,
3679 if_metageneration_not_match=None,
3680 if_source_generation_match=None,
3681 if_source_generation_not_match=None,
3682 if_source_metageneration_match=None,
3683 if_source_metageneration_not_match=None,
3684 timeout=_DEFAULT_TIMEOUT,
3685 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3686 ):
3687 """Rewrite source blob into this one.
3689 If :attr:`user_project` is set on the bucket, bills the API request
3690 to that project.
3692 .. note::
3694 ``rewrite`` is not supported in a ``Batch`` context.
3696 :type source: :class:`Blob`
3697 :param source: blob whose contents will be rewritten into this blob.
3699 :type token: str
3700 :param token:
3701 (Optional) Token returned from an earlier, not-completed call to
3702 rewrite the same source blob. If passed, result will include
3703 updated status, total bytes written.
3705 :type client: :class:`~google.cloud.storage.client.Client`
3706 :param client:
3707 (Optional) The client to use. If not passed, falls back to the
3708 ``client`` stored on the blob's bucket.
3710 :type if_generation_match: long
3711 :param if_generation_match:
3712 (Optional) See :ref:`using-if-generation-match`
3713 Note that the generation to be matched is that of the
3714 ``destination`` blob.
3716 :type if_generation_not_match: long
3717 :param if_generation_not_match:
3718 (Optional) See :ref:`using-if-generation-not-match`
3719 Note that the generation to be matched is that of the
3720 ``destination`` blob.
3722 :type if_metageneration_match: long
3723 :param if_metageneration_match:
3724 (Optional) See :ref:`using-if-metageneration-match`
3725 Note that the metageneration to be matched is that of the
3726 ``destination`` blob.
3728 :type if_metageneration_not_match: long
3729 :param if_metageneration_not_match:
3730 (Optional) See :ref:`using-if-metageneration-not-match`
3731 Note that the metageneration to be matched is that of the
3732 ``destination`` blob.
3734 :type if_source_generation_match: long
3735 :param if_source_generation_match:
3736 (Optional) Makes the operation conditional on whether the source
3737 object's generation matches the given value.
3739 :type if_source_generation_not_match: long
3740 :param if_source_generation_not_match:
3741 (Optional) Makes the operation conditional on whether the source
3742 object's generation does not match the given value.
3744 :type if_source_metageneration_match: long
3745 :param if_source_metageneration_match:
3746 (Optional) Makes the operation conditional on whether the source
3747 object's current metageneration matches the given value.
3749 :type if_source_metageneration_not_match: long
3750 :param if_source_metageneration_not_match:
3751 (Optional) Makes the operation conditional on whether the source
3752 object's current metageneration does not match the given value.
3754 :type timeout: float or tuple
3755 :param timeout:
3756 (Optional) The amount of time, in seconds, to wait
3757 for the server response. See: :ref:`configuring_timeouts`
3759 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3760 :param retry:
3761 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3763 :rtype: tuple
3764 :returns: ``(token, bytes_rewritten, total_bytes)``, where ``token``
3765 is a rewrite token (``None`` if the rewrite is complete),
3766 ``bytes_rewritten`` is the number of bytes rewritten so far,
3767 and ``total_bytes`` is the total number of bytes to be
3768 rewritten.
3769 """
3770 client = self._require_client(client)
3771 headers = _get_encryption_headers(self._encryption_key)
3772 headers.update(_get_encryption_headers(source._encryption_key, source=True))
3774 query_params = self._query_params
3775 if "generation" in query_params:
3776 del query_params["generation"]
3778 if token:
3779 query_params["rewriteToken"] = token
3781 if source.generation:
3782 query_params["sourceGeneration"] = source.generation
3784 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
3785 # at rest, object resource metadata will store the version of the Key Management
3786 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
3787 # used to rewrite the object, then the existing kmsKeyName version
3788 # value can't be used in the rewrite request and the client instead ignores it.
3789 if (
3790 self.kms_key_name is not None
3791 and "cryptoKeyVersions" not in self.kms_key_name
3792 ):
3793 query_params["destinationKmsKeyName"] = self.kms_key_name
3795 _add_generation_match_parameters(
3796 query_params,
3797 if_generation_match=if_generation_match,
3798 if_generation_not_match=if_generation_not_match,
3799 if_metageneration_match=if_metageneration_match,
3800 if_metageneration_not_match=if_metageneration_not_match,
3801 if_source_generation_match=if_source_generation_match,
3802 if_source_generation_not_match=if_source_generation_not_match,
3803 if_source_metageneration_match=if_source_metageneration_match,
3804 if_source_metageneration_not_match=if_source_metageneration_not_match,
3805 )
3807 path = f"{source.path}/rewriteTo{self.path}"
3808 api_response = client._post_resource(
3809 path,
3810 self._properties,
3811 query_params=query_params,
3812 headers=headers,
3813 timeout=timeout,
3814 retry=retry,
3815 _target_object=self,
3816 )
3817 rewritten = int(api_response["totalBytesRewritten"])
3818 size = int(api_response["objectSize"])
3820 # The resource key is set if and only if the API response is
3821 # completely done. Additionally, there is no rewrite token to return
3822 # in this case.
3823 if api_response["done"]:
3824 self._set_properties(api_response["resource"])
3825 return None, rewritten, size
3827 return api_response["rewriteToken"], rewritten, size
3829 def update_storage_class(
3830 self,
3831 new_class,
3832 client=None,
3833 if_generation_match=None,
3834 if_generation_not_match=None,
3835 if_metageneration_match=None,
3836 if_metageneration_not_match=None,
3837 if_source_generation_match=None,
3838 if_source_generation_not_match=None,
3839 if_source_metageneration_match=None,
3840 if_source_metageneration_not_match=None,
3841 timeout=_DEFAULT_TIMEOUT,
3842 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3843 ):
3844 """Update blob's storage class via a rewrite-in-place. This helper will
3845 wait for the rewrite to complete before returning, so it may take some
3846 time for large files.
3848 See
3849 https://cloud.google.com/storage/docs/per-object-storage-class
3851 If :attr:`user_project` is set on the bucket, bills the API request
3852 to that project.
3854 :type new_class: str
3855 :param new_class:
3856 new storage class for the object. One of:
3857 :attr:`~google.cloud.storage.constants.NEARLINE_STORAGE_CLASS`,
3858 :attr:`~google.cloud.storage.constants.COLDLINE_STORAGE_CLASS`,
3859 :attr:`~google.cloud.storage.constants.ARCHIVE_STORAGE_CLASS`,
3860 :attr:`~google.cloud.storage.constants.STANDARD_STORAGE_CLASS`,
3861 :attr:`~google.cloud.storage.constants.MULTI_REGIONAL_LEGACY_STORAGE_CLASS`,
3862 or
3863 :attr:`~google.cloud.storage.constants.REGIONAL_LEGACY_STORAGE_CLASS`.
3865 :type client: :class:`~google.cloud.storage.client.Client`
3866 :param client:
3867 (Optional) The client to use. If not passed, falls back to the
3868 ``client`` stored on the blob's bucket.
3870 :type if_generation_match: long
3871 :param if_generation_match:
3872 (Optional) See :ref:`using-if-generation-match`
3873 Note that the generation to be matched is that of the
3874 ``destination`` blob.
3876 :type if_generation_not_match: long
3877 :param if_generation_not_match:
3878 (Optional) See :ref:`using-if-generation-not-match`
3879 Note that the generation to be matched is that of the
3880 ``destination`` blob.
3882 :type if_metageneration_match: long
3883 :param if_metageneration_match:
3884 (Optional) See :ref:`using-if-metageneration-match`
3885 Note that the metageneration to be matched is that of the
3886 ``destination`` blob.
3888 :type if_metageneration_not_match: long
3889 :param if_metageneration_not_match:
3890 (Optional) See :ref:`using-if-metageneration-not-match`
3891 Note that the metageneration to be matched is that of the
3892 ``destination`` blob.
3894 :type if_source_generation_match: long
3895 :param if_source_generation_match:
3896 (Optional) Makes the operation conditional on whether the source
3897 object's generation matches the given value.
3899 :type if_source_generation_not_match: long
3900 :param if_source_generation_not_match:
3901 (Optional) Makes the operation conditional on whether the source
3902 object's generation does not match the given value.
3904 :type if_source_metageneration_match: long
3905 :param if_source_metageneration_match:
3906 (Optional) Makes the operation conditional on whether the source
3907 object's current metageneration matches the given value.
3909 :type if_source_metageneration_not_match: long
3910 :param if_source_metageneration_not_match:
3911 (Optional) Makes the operation conditional on whether the source
3912 object's current metageneration does not match the given value.
3914 :type timeout: float or tuple
3915 :param timeout:
3916 (Optional) The amount of time, in seconds, to wait
3917 for the server response. See: :ref:`configuring_timeouts`
3919 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3920 :param retry:
3921 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3922 """
3923 # Update current blob's storage class prior to rewrite
3924 self._patch_property("storageClass", new_class)
3926 # Execute consecutive rewrite operations until operation is done
3927 token, _, _ = self.rewrite(
3928 self,
3929 if_generation_match=if_generation_match,
3930 if_generation_not_match=if_generation_not_match,
3931 if_metageneration_match=if_metageneration_match,
3932 if_metageneration_not_match=if_metageneration_not_match,
3933 if_source_generation_match=if_source_generation_match,
3934 if_source_generation_not_match=if_source_generation_not_match,
3935 if_source_metageneration_match=if_source_metageneration_match,
3936 if_source_metageneration_not_match=if_source_metageneration_not_match,
3937 timeout=timeout,
3938 retry=retry,
3939 )
3940 while token is not None:
3941 token, _, _ = self.rewrite(
3942 self,
3943 token=token,
3944 if_generation_match=if_generation_match,
3945 if_generation_not_match=if_generation_not_match,
3946 if_metageneration_match=if_metageneration_match,
3947 if_metageneration_not_match=if_metageneration_not_match,
3948 if_source_generation_match=if_source_generation_match,
3949 if_source_generation_not_match=if_source_generation_not_match,
3950 if_source_metageneration_match=if_source_metageneration_match,
3951 if_source_metageneration_not_match=if_source_metageneration_not_match,
3952 timeout=timeout,
3953 retry=retry,
3954 )
3956 def open(
3957 self,
3958 mode="r",
3959 chunk_size=None,
3960 ignore_flush=None,
3961 encoding=None,
3962 errors=None,
3963 newline=None,
3964 **kwargs,
3965 ):
3966 r"""Create a file handler for file-like I/O to or from this blob.
3968 This method can be used as a context manager, just like Python's
3969 built-in 'open()' function.
3971 While reading, as with other read methods, if blob.generation is not set
3972 the most recent blob generation will be used. Because the file-like IO
3973 reader downloads progressively in chunks, this could result in data from
3974 multiple versions being mixed together. If this is a concern, use
3975 either bucket.get_blob(), or blob.reload(), which will download the
3976 latest generation number and set it; or, if the generation is known, set
3977 it manually, for instance with bucket.blob(generation=123456).
3979 Checksumming (hashing) to verify data integrity is disabled for reads
3980 using this feature because reads are implemented using request ranges,
3981 which do not provide checksums to validate. See
3982 https://cloud.google.com/storage/docs/hashes-etags for details.
3984 See a [code sample](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_fileio_write_read.py).
3986 Keyword arguments to pass to the underlying API calls.
3987 For both uploads and downloads, the following arguments are
3988 supported:
3990 - ``if_generation_match``
3991 - ``if_generation_not_match``
3992 - ``if_metageneration_match``
3993 - ``if_metageneration_not_match``
3994 - ``timeout``
3995 - ``retry``
3997 For downloads only, the following additional arguments are supported:
3999 - ``raw_download``
4001 For uploads only, the following additional arguments are supported:
4003 - ``content_type``
4004 - ``num_retries``
4005 - ``predefined_acl``
4006 - ``checksum``
4008 .. note::
4010 ``num_retries`` is supported for backwards-compatibility
4011 reasons only; please use ``retry`` with a Retry object or
4012 ConditionalRetryPolicy instead.
4014 :type mode: str
4015 :param mode:
4016 (Optional) A mode string, as per standard Python `open()` semantics.The first
4017 character must be 'r', to open the blob for reading, or 'w' to open
4018 it for writing. The second character, if present, must be 't' for
4019 (unicode) text mode, or 'b' for bytes mode. If the second character
4020 is omitted, text mode is the default.
4022 :type chunk_size: long
4023 :param chunk_size:
4024 (Optional) For reads, the minimum number of bytes to read at a time.
4025 If fewer bytes than the chunk_size are requested, the remainder is
4026 buffered. For writes, the maximum number of bytes to buffer before
4027 sending data to the server, and the size of each request when data
4028 is sent. Writes are implemented as a "resumable upload", so
4029 chunk_size for writes must be exactly a multiple of 256KiB as with
4030 other resumable uploads. The default is 40 MiB.
4032 :type ignore_flush: bool
4033 :param ignore_flush:
4034 (Optional) For non text-mode writes, makes flush() do nothing
4035 instead of raising an error. flush() without closing is not
4036 supported by the remote service and therefore calling it normally
4037 results in io.UnsupportedOperation. However, that behavior is
4038 incompatible with some consumers and wrappers of file objects in
4039 Python, such as zipfile.ZipFile or io.TextIOWrapper. Setting
4040 ignore_flush will cause flush() to successfully do nothing, for
4041 compatibility with those contexts. The correct way to actually flush
4042 data to the remote server is to close() (using a context manager,
4043 such as in the example, will cause this to happen automatically).
4045 :type encoding: str
4046 :param encoding:
4047 (Optional) For text mode only, the name of the encoding that the stream will
4048 be decoded or encoded with. If omitted, it defaults to
4049 locale.getpreferredencoding(False).
4051 :type errors: str
4052 :param errors:
4053 (Optional) For text mode only, an optional string that specifies how encoding
4054 and decoding errors are to be handled. Pass 'strict' to raise a
4055 ValueError exception if there is an encoding error (the default of
4056 None has the same effect), or pass 'ignore' to ignore errors. (Note
4057 that ignoring encoding errors can lead to data loss.) Other more
4058 rarely-used options are also available; see the Python 'io' module
4059 documentation for 'io.TextIOWrapper' for a complete list.
4061 :type newline: str
4062 :param newline:
4063 (Optional) For text mode only, controls how line endings are handled. It can
4064 be None, '', '\n', '\r', and '\r\n'. If None, reads use "universal
4065 newline mode" and writes use the system default. See the Python
4066 'io' module documentation for 'io.TextIOWrapper' for details.
4068 :returns: A 'BlobReader' or 'BlobWriter' from
4069 'google.cloud.storage.fileio', or an 'io.TextIOWrapper' around one
4070 of those classes, depending on the 'mode' argument.
4071 """
4072 if mode == "rb":
4073 if encoding or errors or newline:
4074 raise ValueError(
4075 "encoding, errors and newline arguments are for text mode only"
4076 )
4077 if ignore_flush:
4078 raise ValueError(
4079 "ignore_flush argument is for non-text write mode only"
4080 )
4081 return BlobReader(self, chunk_size=chunk_size, **kwargs)
4082 elif mode == "wb":
4083 if encoding or errors or newline:
4084 raise ValueError(
4085 "encoding, errors and newline arguments are for text mode only"
4086 )
4087 return BlobWriter(
4088 self, chunk_size=chunk_size, ignore_flush=ignore_flush, **kwargs
4089 )
4090 elif mode in ("r", "rt"):
4091 if ignore_flush:
4092 raise ValueError(
4093 "ignore_flush argument is for non-text write mode only"
4094 )
4095 return TextIOWrapper(
4096 BlobReader(self, chunk_size=chunk_size, **kwargs),
4097 encoding=encoding,
4098 errors=errors,
4099 newline=newline,
4100 )
4101 elif mode in ("w", "wt"):
4102 if ignore_flush is False:
4103 raise ValueError(
4104 "ignore_flush is required for text mode writing and "
4105 "cannot be set to False"
4106 )
4107 return TextIOWrapper(
4108 BlobWriter(self, chunk_size=chunk_size, ignore_flush=True, **kwargs),
4109 encoding=encoding,
4110 errors=errors,
4111 newline=newline,
4112 )
4113 else:
4114 raise NotImplementedError(
4115 "Supported modes strings are 'r', 'rb', 'rt', 'w', 'wb', and 'wt' only."
4116 )
4118 cache_control = _scalar_property("cacheControl")
4119 """HTTP 'Cache-Control' header for this object.
4121 See [`RFC 7234`](https://tools.ietf.org/html/rfc7234#section-5.2)
4122 and [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4124 :rtype: str or ``NoneType``
4126 """
4128 content_disposition = _scalar_property("contentDisposition")
4129 """HTTP 'Content-Disposition' header for this object.
4131 See [`RFC 6266`](https://tools.ietf.org/html/rfc7234#section-5.2) and
4132 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4134 :rtype: str or ``NoneType``
4135 """
4137 content_encoding = _scalar_property("contentEncoding")
4138 """HTTP 'Content-Encoding' header for this object.
4140 See [`RFC 7231`](https://tools.ietf.org/html/rfc7231#section-3.1.2.2) and
4141 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4143 :rtype: str or ``NoneType``
4144 """
4146 content_language = _scalar_property("contentLanguage")
4147 """HTTP 'Content-Language' header for this object.
4149 See [`BCP47`](https://tools.ietf.org/html/bcp47) and
4150 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4152 :rtype: str or ``NoneType``
4153 """
4155 content_type = _scalar_property(_CONTENT_TYPE_FIELD)
4156 """HTTP 'Content-Type' header for this object.
4158 See [`RFC 2616`](https://tools.ietf.org/html/rfc2616#section-14.17) and
4159 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4161 :rtype: str or ``NoneType``
4162 """
4164 crc32c = _scalar_property("crc32c")
4165 """CRC32C checksum for this object.
4167 This returns the blob's CRC32C checksum. To retrieve the value, first use a
4168 reload method of the Blob class which loads the blob's properties from the server.
4170 See [`RFC 4960`](https://tools.ietf.org/html/rfc4960#appendix-B) and
4171 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4173 If not set before upload, the server will compute the hash.
4175 :rtype: str or ``NoneType``
4176 """
4178 def _prep_and_do_download(
4179 self,
4180 file_obj,
4181 client=None,
4182 start=None,
4183 end=None,
4184 raw_download=False,
4185 if_etag_match=None,
4186 if_etag_not_match=None,
4187 if_generation_match=None,
4188 if_generation_not_match=None,
4189 if_metageneration_match=None,
4190 if_metageneration_not_match=None,
4191 timeout=_DEFAULT_TIMEOUT,
4192 checksum="md5",
4193 retry=DEFAULT_RETRY,
4194 command=None,
4195 ):
4196 """Download the contents of a blob object into a file-like object.
4198 See https://cloud.google.com/storage/docs/downloading-objects
4200 If :attr:`user_project` is set on the bucket, bills the API request
4201 to that project.
4203 :type file_obj: file
4204 :param file_obj: A file handle to which to write the blob's data.
4206 :type client: :class:`~google.cloud.storage.client.Client`
4207 :param client:
4208 (Optional) The client to use. If not passed, falls back to the
4209 ``client`` stored on the blob's bucket.
4211 :type start: int
4212 :param start: (Optional) The first byte in a range to be downloaded.
4214 :type end: int
4215 :param end: (Optional) The last byte in a range to be downloaded.
4217 :type raw_download: bool
4218 :param raw_download:
4219 (Optional) If true, download the object without any expansion.
4221 :type if_etag_match: Union[str, Set[str]]
4222 :param if_etag_match:
4223 (Optional) See :ref:`using-if-etag-match`
4225 :type if_etag_not_match: Union[str, Set[str]]
4226 :param if_etag_not_match:
4227 (Optional) See :ref:`using-if-etag-not-match`
4229 :type if_generation_match: long
4230 :param if_generation_match:
4231 (Optional) See :ref:`using-if-generation-match`
4233 :type if_generation_not_match: long
4234 :param if_generation_not_match:
4235 (Optional) See :ref:`using-if-generation-not-match`
4237 :type if_metageneration_match: long
4238 :param if_metageneration_match:
4239 (Optional) See :ref:`using-if-metageneration-match`
4241 :type if_metageneration_not_match: long
4242 :param if_metageneration_not_match:
4243 (Optional) See :ref:`using-if-metageneration-not-match`
4245 :type timeout: float or tuple
4246 :param timeout:
4247 (Optional) The amount of time, in seconds, to wait
4248 for the server response. See: :ref:`configuring_timeouts`
4250 :type checksum: str
4251 :param checksum:
4252 (Optional) The type of checksum to compute to verify the integrity
4253 of the object. The response headers must contain a checksum of the
4254 requested type. If the headers lack an appropriate checksum (for
4255 instance in the case of transcoded or ranged downloads where the
4256 remote service does not know the correct checksum, including
4257 downloads where chunk_size is set) an INFO-level log will be
4258 emitted. Supported values are "md5", "crc32c" and None. The default
4259 is "md5".
4261 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4262 :param retry: (Optional) How to retry the RPC. A None value will disable
4263 retries. A google.api_core.retry.Retry value will enable retries,
4264 and the object will define retriable response codes and errors and
4265 configure backoff and timeout options.
4267 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
4268 Retry object and activates it only if certain conditions are met.
4269 This class exists to provide safe defaults for RPC calls that are
4270 not technically safe to retry normally (due to potential data
4271 duplication or other side-effects) but become safe to retry if a
4272 condition such as if_metageneration_match is set.
4274 See the retry.py source code and docstrings in this package
4275 (google.cloud.storage.retry) for information on retry types and how
4276 to configure them.
4278 Media operations (downloads and uploads) do not support non-default
4279 predicates in a Retry object. The default will always be used. Other
4280 configuration changes for Retry objects such as delays and deadlines
4281 are respected.
4283 :type command: str
4284 :param command:
4285 (Optional) Information about which interface for download was used,
4286 to be included in the X-Goog-API-Client header. Please leave as None
4287 unless otherwise directed.
4288 """
4289 # Handle ConditionalRetryPolicy.
4290 if isinstance(retry, ConditionalRetryPolicy):
4291 # Conditional retries are designed for non-media calls, which change
4292 # arguments into query_params dictionaries. Media operations work
4293 # differently, so here we make a "fake" query_params to feed to the
4294 # ConditionalRetryPolicy.
4295 query_params = {
4296 "ifGenerationMatch": if_generation_match,
4297 "ifMetagenerationMatch": if_metageneration_match,
4298 }
4299 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
4301 client = self._require_client(client)
4303 download_url = self._get_download_url(
4304 client,
4305 if_generation_match=if_generation_match,
4306 if_generation_not_match=if_generation_not_match,
4307 if_metageneration_match=if_metageneration_match,
4308 if_metageneration_not_match=if_metageneration_not_match,
4309 )
4310 headers = _get_encryption_headers(self._encryption_key)
4311 headers["accept-encoding"] = "gzip"
4312 _add_etag_match_headers(
4313 headers,
4314 if_etag_match=if_etag_match,
4315 if_etag_not_match=if_etag_not_match,
4316 )
4317 headers = {
4318 **_get_default_headers(client._connection.user_agent, command=command),
4319 **headers,
4320 }
4322 transport = client._http
4324 try:
4325 self._do_download(
4326 transport,
4327 file_obj,
4328 download_url,
4329 headers,
4330 start,
4331 end,
4332 raw_download,
4333 timeout=timeout,
4334 checksum=checksum,
4335 retry=retry,
4336 )
4337 except resumable_media.InvalidResponse as exc:
4338 _raise_from_invalid_response(exc)
4340 @property
4341 def component_count(self):
4342 """Number of underlying components that make up this object.
4344 See https://cloud.google.com/storage/docs/json_api/v1/objects
4346 :rtype: int or ``NoneType``
4347 :returns: The component count (in case of a composed object) or
4348 ``None`` if the blob's resource has not been loaded from
4349 the server. This property will not be set on objects
4350 not created via ``compose``.
4351 """
4352 component_count = self._properties.get("componentCount")
4353 if component_count is not None:
4354 return int(component_count)
4356 @property
4357 def etag(self):
4358 """Retrieve the ETag for the object.
4360 See [`RFC 2616 (etags)`](https://tools.ietf.org/html/rfc2616#section-3.11) and
4361 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4363 :rtype: str or ``NoneType``
4364 :returns: The blob etag or ``None`` if the blob's resource has not
4365 been loaded from the server.
4366 """
4367 return self._properties.get("etag")
4369 event_based_hold = _scalar_property("eventBasedHold")
4370 """Is an event-based hold active on the object?
4372 See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4374 If the property is not set locally, returns :data:`None`.
4376 :rtype: bool or ``NoneType``
4377 """
4379 @property
4380 def generation(self):
4381 """Retrieve the generation for the object.
4383 See https://cloud.google.com/storage/docs/json_api/v1/objects
4385 :rtype: int or ``NoneType``
4386 :returns: The generation of the blob or ``None`` if the blob's
4387 resource has not been loaded from the server.
4388 """
4389 generation = self._properties.get("generation")
4390 if generation is not None:
4391 return int(generation)
4393 @property
4394 def id(self):
4395 """Retrieve the ID for the object.
4397 See https://cloud.google.com/storage/docs/json_api/v1/objects
4399 The ID consists of the bucket name, object name, and generation number.
4401 :rtype: str or ``NoneType``
4402 :returns: The ID of the blob or ``None`` if the blob's
4403 resource has not been loaded from the server.
4404 """
4405 return self._properties.get("id")
4407 md5_hash = _scalar_property("md5Hash")
4408 """MD5 hash for this object.
4410 This returns the blob's MD5 hash. To retrieve the value, first use a
4411 reload method of the Blob class which loads the blob's properties from the server.
4413 See [`RFC 1321`](https://tools.ietf.org/html/rfc1321) and
4414 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4416 If not set before upload, the server will compute the hash.
4418 :rtype: str or ``NoneType``
4419 """
4421 @property
4422 def media_link(self):
4423 """Retrieve the media download URI for the object.
4425 See https://cloud.google.com/storage/docs/json_api/v1/objects
4427 :rtype: str or ``NoneType``
4428 :returns: The media link for the blob or ``None`` if the blob's
4429 resource has not been loaded from the server.
4430 """
4431 return self._properties.get("mediaLink")
4433 @property
4434 def metadata(self):
4435 """Retrieve arbitrary/application specific metadata for the object.
4437 See https://cloud.google.com/storage/docs/json_api/v1/objects
4439 :setter: Update arbitrary/application specific metadata for the
4440 object.
4441 :getter: Retrieve arbitrary/application specific metadata for
4442 the object.
4444 :rtype: dict or ``NoneType``
4445 :returns: The metadata associated with the blob or ``None`` if the
4446 property is not set.
4447 """
4448 return copy.deepcopy(self._properties.get("metadata"))
4450 @metadata.setter
4451 def metadata(self, value):
4452 """Update arbitrary/application specific metadata for the object.
4454 Values are stored to GCS as strings. To delete a key, set its value to
4455 None and call blob.patch().
4457 See https://cloud.google.com/storage/docs/json_api/v1/objects
4459 :type value: dict
4460 :param value: The blob metadata to set.
4461 """
4462 if value is not None:
4463 value = {k: str(v) if v is not None else None for k, v in value.items()}
4464 self._patch_property("metadata", value)
4466 @property
4467 def metageneration(self):
4468 """Retrieve the metageneration for the object.
4470 See https://cloud.google.com/storage/docs/json_api/v1/objects
4472 :rtype: int or ``NoneType``
4473 :returns: The metageneration of the blob or ``None`` if the blob's
4474 resource has not been loaded from the server.
4475 """
4476 metageneration = self._properties.get("metageneration")
4477 if metageneration is not None:
4478 return int(metageneration)
4480 @property
4481 def owner(self):
4482 """Retrieve info about the owner of the object.
4484 See https://cloud.google.com/storage/docs/json_api/v1/objects
4486 :rtype: dict or ``NoneType``
4487 :returns: Mapping of owner's role/ID, or ``None`` if the blob's
4488 resource has not been loaded from the server.
4489 """
4490 return copy.deepcopy(self._properties.get("owner"))
4492 @property
4493 def retention_expiration_time(self):
4494 """Retrieve timestamp at which the object's retention period expires.
4496 See https://cloud.google.com/storage/docs/json_api/v1/objects
4498 :rtype: :class:`datetime.datetime` or ``NoneType``
4499 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4500 ``None`` if the property is not set locally.
4501 """
4502 value = self._properties.get("retentionExpirationTime")
4503 if value is not None:
4504 return _rfc3339_nanos_to_datetime(value)
4506 @property
4507 def self_link(self):
4508 """Retrieve the URI for the object.
4510 See https://cloud.google.com/storage/docs/json_api/v1/objects
4512 :rtype: str or ``NoneType``
4513 :returns: The self link for the blob or ``None`` if the blob's
4514 resource has not been loaded from the server.
4515 """
4516 return self._properties.get("selfLink")
4518 @property
4519 def size(self):
4520 """Size of the object, in bytes.
4522 See https://cloud.google.com/storage/docs/json_api/v1/objects
4524 :rtype: int or ``NoneType``
4525 :returns: The size of the blob or ``None`` if the blob's
4526 resource has not been loaded from the server.
4527 """
4528 size = self._properties.get("size")
4529 if size is not None:
4530 return int(size)
4532 @property
4533 def kms_key_name(self):
4534 """Resource name of Cloud KMS key used to encrypt the blob's contents.
4536 :rtype: str or ``NoneType``
4537 :returns:
4538 The resource name or ``None`` if no Cloud KMS key was used,
4539 or the blob's resource has not been loaded from the server.
4540 """
4541 return self._properties.get("kmsKeyName")
4543 @kms_key_name.setter
4544 def kms_key_name(self, value):
4545 """Set KMS encryption key for object.
4547 :type value: str or ``NoneType``
4548 :param value: new KMS key name (None to clear any existing key).
4549 """
4550 self._patch_property("kmsKeyName", value)
4552 storage_class = _scalar_property("storageClass")
4553 """Retrieve the storage class for the object.
4555 This can only be set at blob / object **creation** time. If you'd
4556 like to change the storage class **after** the blob / object already
4557 exists in a bucket, call :meth:`update_storage_class` (which uses
4558 :meth:`rewrite`).
4560 See https://cloud.google.com/storage/docs/storage-classes
4562 :rtype: str or ``NoneType``
4563 :returns:
4564 If set, one of
4565 :attr:`~google.cloud.storage.constants.STANDARD_STORAGE_CLASS`,
4566 :attr:`~google.cloud.storage.constants.NEARLINE_STORAGE_CLASS`,
4567 :attr:`~google.cloud.storage.constants.COLDLINE_STORAGE_CLASS`,
4568 :attr:`~google.cloud.storage.constants.ARCHIVE_STORAGE_CLASS`,
4569 :attr:`~google.cloud.storage.constants.MULTI_REGIONAL_LEGACY_STORAGE_CLASS`,
4570 :attr:`~google.cloud.storage.constants.REGIONAL_LEGACY_STORAGE_CLASS`,
4571 :attr:`~google.cloud.storage.constants.DURABLE_REDUCED_AVAILABILITY_STORAGE_CLASS`,
4572 else ``None``.
4573 """
4575 temporary_hold = _scalar_property("temporaryHold")
4576 """Is a temporary hold active on the object?
4578 See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4580 If the property is not set locally, returns :data:`None`.
4582 :rtype: bool or ``NoneType``
4583 """
4585 @property
4586 def time_deleted(self):
4587 """Retrieve the timestamp at which the object was deleted.
4589 See https://cloud.google.com/storage/docs/json_api/v1/objects
4591 :rtype: :class:`datetime.datetime` or ``NoneType``
4592 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4593 ``None`` if the blob's resource has not been loaded from
4594 the server (see :meth:`reload`). If the blob has
4595 not been deleted, this will never be set.
4596 """
4597 value = self._properties.get("timeDeleted")
4598 if value is not None:
4599 return _rfc3339_nanos_to_datetime(value)
4601 @property
4602 def time_created(self):
4603 """Retrieve the timestamp at which the object was created.
4605 See https://cloud.google.com/storage/docs/json_api/v1/objects
4607 :rtype: :class:`datetime.datetime` or ``NoneType``
4608 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4609 ``None`` if the blob's resource has not been loaded from
4610 the server (see :meth:`reload`).
4611 """
4612 value = self._properties.get("timeCreated")
4613 if value is not None:
4614 return _rfc3339_nanos_to_datetime(value)
4616 @property
4617 def updated(self):
4618 """Retrieve the timestamp at which the object was updated.
4620 See https://cloud.google.com/storage/docs/json_api/v1/objects
4622 :rtype: :class:`datetime.datetime` or ``NoneType``
4623 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4624 ``None`` if the blob's resource has not been loaded from
4625 the server (see :meth:`reload`).
4626 """
4627 value = self._properties.get("updated")
4628 if value is not None:
4629 return _rfc3339_nanos_to_datetime(value)
4631 @property
4632 def custom_time(self):
4633 """Retrieve the custom time for the object.
4635 See https://cloud.google.com/storage/docs/json_api/v1/objects
4637 :rtype: :class:`datetime.datetime` or ``NoneType``
4638 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4639 ``None`` if the blob's resource has not been loaded from
4640 the server (see :meth:`reload`).
4641 """
4642 value = self._properties.get("customTime")
4643 if value is not None:
4644 return _rfc3339_nanos_to_datetime(value)
4646 @custom_time.setter
4647 def custom_time(self, value):
4648 """Set the custom time for the object.
4650 Once set on the server side object, this value can't be unset, but may
4651 only changed to a custom datetime in the future.
4653 If :attr:`custom_time` must be unset, either perform a rewrite
4654 operation or upload the data again.
4656 See https://cloud.google.com/storage/docs/json_api/v1/objects
4658 :type value: :class:`datetime.datetime`
4659 :param value: new value
4660 """
4661 if value is not None:
4662 value = _datetime_to_rfc3339(value)
4664 self._patch_property("customTime", value)
4667def _get_host_name(connection):
4668 """Returns the host name from the given connection.
4670 :type connection: :class:`~google.cloud.storage._http.Connection`
4671 :param connection: The connection object.
4673 :rtype: str
4674 :returns: The host name.
4675 """
4676 # TODO: After google-cloud-core 1.6.0 is stable and we upgrade it
4677 # to 1.6.0 in setup.py, we no longer need to check the attribute
4678 # existence. We can simply return connection.get_api_base_url_for_mtls().
4679 return (
4680 connection.API_BASE_URL
4681 if not hasattr(connection, "get_api_base_url_for_mtls")
4682 else connection.get_api_base_url_for_mtls()
4683 )
4686def _get_encryption_headers(key, source=False):
4687 """Builds customer encryption key headers
4689 :type key: bytes
4690 :param key: 32 byte key to build request key and hash.
4692 :type source: bool
4693 :param source: If true, return headers for the "source" blob; otherwise,
4694 return headers for the "destination" blob.
4696 :rtype: dict
4697 :returns: dict of HTTP headers being sent in request.
4698 """
4699 if key is None:
4700 return {}
4702 key = _to_bytes(key)
4703 key_hash = hashlib.sha256(key).digest()
4704 key_hash = base64.b64encode(key_hash)
4705 key = base64.b64encode(key)
4707 if source:
4708 prefix = "X-Goog-Copy-Source-Encryption-"
4709 else:
4710 prefix = "X-Goog-Encryption-"
4712 return {
4713 prefix + "Algorithm": "AES256",
4714 prefix + "Key": _bytes_to_unicode(key),
4715 prefix + "Key-Sha256": _bytes_to_unicode(key_hash),
4716 }
4719def _quote(value, safe=b"~"):
4720 """URL-quote a string.
4722 If the value is unicode, this method first UTF-8 encodes it as bytes and
4723 then quotes the bytes. (In Python 3, ``urllib.parse.quote`` does this
4724 encoding automatically, but in Python 2, non-ASCII characters cannot be
4725 quoted.)
4727 :type value: str or bytes
4728 :param value: The value to be URL-quoted.
4730 :type safe: bytes
4731 :param safe: Bytes *not* to be quoted. By default, includes only ``b'~'``.
4733 :rtype: str
4734 :returns: The encoded value (bytes in Python 2, unicode in Python 3).
4735 """
4736 value = _to_bytes(value, encoding="utf-8")
4737 return quote(value, safe=safe)
4740def _maybe_rewind(stream, rewind=False):
4741 """Rewind the stream if desired.
4743 :type stream: IO[bytes]
4744 :param stream: A bytes IO object open for reading.
4746 :type rewind: bool
4747 :param rewind: Indicates if we should seek to the beginning of the stream.
4748 """
4749 if rewind:
4750 stream.seek(0, os.SEEK_SET)
4753def _raise_from_invalid_response(error):
4754 """Re-wrap and raise an ``InvalidResponse`` exception.
4756 :type error: :exc:`google.resumable_media.InvalidResponse`
4757 :param error: A caught exception from the ``google-resumable-media``
4758 library.
4760 :raises: :class:`~google.cloud.exceptions.GoogleCloudError` corresponding
4761 to the failed status code
4762 """
4763 response = error.response
4765 # The 'response.text' gives the actual reason of error, where 'error' gives
4766 # the message of expected status code.
4767 if response.text:
4768 error_message = response.text + ": " + str(error)
4769 else:
4770 error_message = str(error)
4772 message = f"{response.request.method} {response.request.url}: {error_message}"
4774 raise exceptions.from_http_status(response.status_code, message, response=response)
4777def _add_query_parameters(base_url, name_value_pairs):
4778 """Add one query parameter to a base URL.
4780 :type base_url: string
4781 :param base_url: Base URL (may already contain query parameters)
4783 :type name_value_pairs: list of (string, string) tuples.
4784 :param name_value_pairs: Names and values of the query parameters to add
4786 :rtype: string
4787 :returns: URL with additional query strings appended.
4788 """
4789 if len(name_value_pairs) == 0:
4790 return base_url
4792 scheme, netloc, path, query, frag = urlsplit(base_url)
4793 query = parse_qsl(query)
4794 query.extend(name_value_pairs)
4795 return urlunsplit((scheme, netloc, path, urlencode(query), frag))