1# Copyright 2014 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15# pylint: disable=too-many-lines
16
17"""Create / interact with Google Cloud Storage blobs.
18"""
19
20import base64
21import copy
22import hashlib
23from io import BytesIO
24from io import TextIOWrapper
25import logging
26import mimetypes
27import os
28import re
29from email.parser import HeaderParser
30from urllib.parse import parse_qsl
31from urllib.parse import quote
32from urllib.parse import urlencode
33from urllib.parse import urlsplit
34from urllib.parse import urlunsplit
35import warnings
36
37from google.cloud.storage._media.requests import ChunkedDownload
38from google.cloud.storage._media.requests import Download
39from google.cloud.storage._media.requests import RawDownload
40from google.cloud.storage._media.requests import RawChunkedDownload
41from google.cloud.storage._media.requests import MultipartUpload
42from google.cloud.storage._media.requests import ResumableUpload
43
44from google.api_core.iam import Policy
45from google.cloud import exceptions
46from google.cloud._helpers import _bytes_to_unicode
47from google.cloud._helpers import _datetime_to_rfc3339
48from google.cloud._helpers import _rfc3339_nanos_to_datetime
49from google.cloud._helpers import _to_bytes
50from google.cloud.exceptions import NotFound
51from google.cloud.storage._helpers import _add_etag_match_headers
52from google.cloud.storage._helpers import _add_generation_match_parameters
53from google.cloud.storage._helpers import _PropertyMixin
54from google.cloud.storage._helpers import _scalar_property
55from google.cloud.storage._helpers import _bucket_bound_hostname_url
56from google.cloud.storage._helpers import _raise_if_more_than_one_set
57from google.cloud.storage._helpers import _get_default_headers
58from google.cloud.storage._helpers import _get_default_storage_base_url
59from google.cloud.storage._signing import generate_signed_url_v2
60from google.cloud.storage._signing import generate_signed_url_v4
61from google.cloud.storage._helpers import _API_VERSION
62from google.cloud.storage._helpers import _virtual_hosted_style_base_url
63from google.cloud.storage._opentelemetry_tracing import create_trace_span
64from google.cloud.storage.acl import ACL
65from google.cloud.storage.acl import ObjectACL
66from google.cloud.storage.constants import _DEFAULT_TIMEOUT
67from google.cloud.storage.constants import ARCHIVE_STORAGE_CLASS
68from google.cloud.storage.constants import COLDLINE_STORAGE_CLASS
69from google.cloud.storage.constants import MULTI_REGIONAL_LEGACY_STORAGE_CLASS
70from google.cloud.storage.constants import NEARLINE_STORAGE_CLASS
71from google.cloud.storage.constants import REGIONAL_LEGACY_STORAGE_CLASS
72from google.cloud.storage.constants import STANDARD_STORAGE_CLASS
73from google.cloud.storage.exceptions import DataCorruption
74from google.cloud.storage.exceptions import InvalidResponse
75from google.cloud.storage.retry import ConditionalRetryPolicy
76from google.cloud.storage.retry import DEFAULT_RETRY
77from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON
78from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED
79from google.cloud.storage.fileio import BlobReader
80from google.cloud.storage.fileio import BlobWriter
81
82
83_DEFAULT_CONTENT_TYPE = "application/octet-stream"
84_DOWNLOAD_URL_TEMPLATE = "{hostname}/download/storage/{api_version}{path}?alt=media"
85_BASE_UPLOAD_TEMPLATE = (
86 "{hostname}/upload/storage/{api_version}{bucket_path}/o?uploadType="
87)
88_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart"
89_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable"
90# NOTE: "acl" is also writeable but we defer ACL management to
91# the classes in the google.cloud.storage.acl module.
92_CONTENT_TYPE_FIELD = "contentType"
93_WRITABLE_FIELDS = (
94 "cacheControl",
95 "contentDisposition",
96 "contentEncoding",
97 "contentLanguage",
98 _CONTENT_TYPE_FIELD,
99 "crc32c",
100 "customTime",
101 "md5Hash",
102 "metadata",
103 "name",
104 "retention",
105 "storageClass",
106)
107_READ_LESS_THAN_SIZE = (
108 "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining."
109)
110_CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE = (
111 "A checksum of type `{}` was requested, but checksumming is not available "
112 "for downloads when chunk_size is set."
113)
114_COMPOSE_IF_GENERATION_LIST_DEPRECATED = (
115 "'if_generation_match: type list' is deprecated and supported for "
116 "backwards-compatability reasons only. Use 'if_source_generation_match' "
117 "instead' to match source objects' generations."
118)
119_COMPOSE_IF_GENERATION_LIST_AND_IF_SOURCE_GENERATION_ERROR = (
120 "Use 'if_generation_match' to match the generation of the destination "
121 "object by passing in a generation number, instead of a list. "
122 "Use 'if_source_generation_match' to match source objects generations."
123)
124_COMPOSE_IF_METAGENERATION_LIST_DEPRECATED = (
125 "'if_metageneration_match: type list' is deprecated and supported for "
126 "backwards-compatability reasons only. Note that the metageneration to "
127 "be matched is that of the destination blob. Please pass in a single "
128 "value (type long)."
129)
130_COMPOSE_IF_SOURCE_GENERATION_MISMATCH_ERROR = (
131 "'if_source_generation_match' length must be the same as 'sources' length"
132)
133_DOWNLOAD_AS_STRING_DEPRECATED = (
134 "Blob.download_as_string() is deprecated and will be removed in future. "
135 "Use Blob.download_as_bytes() instead."
136)
137_FROM_STRING_DEPRECATED = (
138 "Blob.from_string() is deprecated and will be removed in future. "
139 "Use Blob.from_uri() instead."
140)
141_GS_URL_REGEX_PATTERN = re.compile(
142 r"(?P<scheme>gs)://(?P<bucket_name>[a-z0-9_.-]+)/(?P<object_name>.+)"
143)
144
145_DEFAULT_CHUNKSIZE = 104857600 # 1024 * 1024 B * 100 = 100 MB
146_MAX_MULTIPART_SIZE = 8388608 # 8 MB
147
148_logger = logging.getLogger(__name__)
149
150
151class Blob(_PropertyMixin):
152 """A wrapper around Cloud Storage's concept of an ``Object``.
153
154 :type name: str
155 :param name: The name of the blob. This corresponds to the unique path of
156 the object in the bucket. If bytes, will be converted to a
157 unicode object. Blob / object names can contain any sequence
158 of valid unicode characters, of length 1-1024 bytes when
159 UTF-8 encoded.
160
161 :type bucket: :class:`google.cloud.storage.bucket.Bucket`
162 :param bucket: The bucket to which this blob belongs.
163
164 :type chunk_size: int
165 :param chunk_size:
166 (Optional) The size of a chunk of data whenever iterating (in bytes).
167 This must be a multiple of 256 KB per the API specification. If not
168 specified, the chunk_size of the blob itself is used. If that is not
169 specified, a default value of 40 MB is used.
170
171 :type encryption_key: bytes
172 :param encryption_key:
173 (Optional) 32 byte encryption key for customer-supplied encryption.
174 See https://cloud.google.com/storage/docs/encryption#customer-supplied.
175
176 :type kms_key_name: str
177 :param kms_key_name:
178 (Optional) Resource name of Cloud KMS key used to encrypt the blob's
179 contents.
180
181 :type generation: long
182 :param generation:
183 (Optional) If present, selects a specific revision of this object.
184 """
185
186 _chunk_size = None # Default value for each instance.
187 _CHUNK_SIZE_MULTIPLE = 256 * 1024
188 """Number (256 KB, in bytes) that must divide the chunk size."""
189
190 STORAGE_CLASSES = (
191 STANDARD_STORAGE_CLASS,
192 NEARLINE_STORAGE_CLASS,
193 COLDLINE_STORAGE_CLASS,
194 ARCHIVE_STORAGE_CLASS,
195 MULTI_REGIONAL_LEGACY_STORAGE_CLASS,
196 REGIONAL_LEGACY_STORAGE_CLASS,
197 )
198 """Allowed values for :attr:`storage_class`.
199
200 See
201 https://cloud.google.com/storage/docs/json_api/v1/objects#storageClass
202 https://cloud.google.com/storage/docs/per-object-storage-class
203
204 .. note::
205 This list does not include 'DURABLE_REDUCED_AVAILABILITY', which
206 is only documented for buckets (and deprecated).
207 """
208
209 def __init__(
210 self,
211 name,
212 bucket,
213 chunk_size=None,
214 encryption_key=None,
215 kms_key_name=None,
216 generation=None,
217 ):
218 """
219 property :attr:`name`
220 Get the blob's name.
221 """
222 name = _bytes_to_unicode(name)
223 super(Blob, self).__init__(name=name)
224
225 self.chunk_size = chunk_size # Check that setter accepts value.
226 self._bucket = bucket
227 self._acl = ObjectACL(self)
228 _raise_if_more_than_one_set(
229 encryption_key=encryption_key, kms_key_name=kms_key_name
230 )
231
232 self._encryption_key = encryption_key
233
234 if kms_key_name is not None:
235 self._properties["kmsKeyName"] = kms_key_name
236
237 if generation is not None:
238 self._properties["generation"] = generation
239
240 @property
241 def bucket(self):
242 """Bucket which contains the object.
243
244 :rtype: :class:`~google.cloud.storage.bucket.Bucket`
245 :returns: The object's bucket.
246 """
247 return self._bucket
248
249 @property
250 def chunk_size(self):
251 """Get the blob's default chunk size.
252
253 :rtype: int or ``NoneType``
254 :returns: The current blob's chunk size, if it is set.
255 """
256 return self._chunk_size
257
258 @chunk_size.setter
259 def chunk_size(self, value):
260 """Set the blob's default chunk size.
261
262 :type value: int
263 :param value: (Optional) The current blob's chunk size, if it is set.
264
265 :raises: :class:`ValueError` if ``value`` is not ``None`` and is not a
266 multiple of 256 KB.
267 """
268 if value is not None and value > 0 and value % self._CHUNK_SIZE_MULTIPLE != 0:
269 raise ValueError(
270 "Chunk size must be a multiple of %d." % (self._CHUNK_SIZE_MULTIPLE,)
271 )
272 self._chunk_size = value
273
274 @property
275 def encryption_key(self):
276 """Retrieve the customer-supplied encryption key for the object.
277
278 :rtype: bytes or ``NoneType``
279 :returns:
280 The encryption key or ``None`` if no customer-supplied encryption key was used,
281 or the blob's resource has not been loaded from the server.
282 """
283 return self._encryption_key
284
285 @encryption_key.setter
286 def encryption_key(self, value):
287 """Set the blob's encryption key.
288
289 See https://cloud.google.com/storage/docs/encryption#customer-supplied
290
291 To perform a key rotation for an encrypted blob, use :meth:`rewrite`.
292 See https://cloud.google.com/storage/docs/encryption/using-customer-supplied-keys?hl=ca#rotating
293
294 :type value: bytes
295 :param value: 32 byte encryption key for customer-supplied encryption.
296 """
297 self._encryption_key = value
298
299 @staticmethod
300 def path_helper(bucket_path, blob_name):
301 """Relative URL path for a blob.
302
303 :type bucket_path: str
304 :param bucket_path: The URL path for a bucket.
305
306 :type blob_name: str
307 :param blob_name: The name of the blob.
308
309 :rtype: str
310 :returns: The relative URL path for ``blob_name``.
311 """
312 return bucket_path + "/o/" + _quote(blob_name)
313
314 @property
315 def acl(self):
316 """Create our ACL on demand."""
317 return self._acl
318
319 def __repr__(self):
320 if self.bucket:
321 bucket_name = self.bucket.name
322 else:
323 bucket_name = None
324
325 return f"<Blob: {bucket_name}, {self.name}, {self.generation}>"
326
327 @property
328 def path(self):
329 """Getter property for the URL path to this Blob.
330
331 :rtype: str
332 :returns: The URL path to this Blob.
333 """
334 if not self.name:
335 raise ValueError("Cannot determine path without a blob name.")
336
337 return self.path_helper(self.bucket.path, self.name)
338
339 @property
340 def client(self):
341 """The client bound to this blob."""
342 return self.bucket.client
343
344 @property
345 def user_project(self):
346 """Project ID billed for API requests made via this blob.
347
348 Derived from bucket's value.
349
350 :rtype: str
351 """
352 return self.bucket.user_project
353
354 def _encryption_headers(self):
355 """Return any encryption headers needed to fetch the object.
356
357 :rtype: List(Tuple(str, str))
358 :returns: a list of tuples to be passed as headers.
359 """
360 return _get_encryption_headers(self._encryption_key)
361
362 @property
363 def _query_params(self):
364 """Default query parameters."""
365 params = {}
366 if self.generation is not None:
367 params["generation"] = self.generation
368 if self.user_project is not None:
369 params["userProject"] = self.user_project
370 return params
371
372 @property
373 def public_url(self):
374 """The public URL for this blob.
375
376 Use :meth:`make_public` to enable anonymous access via the returned
377 URL.
378
379 :rtype: `string`
380 :returns: The public URL for this blob.
381 """
382 if self.client:
383 endpoint = self.client.api_endpoint
384 else:
385 endpoint = _get_default_storage_base_url()
386 return "{storage_base_url}/{bucket_name}/{quoted_name}".format(
387 storage_base_url=endpoint,
388 bucket_name=self.bucket.name,
389 quoted_name=_quote(self.name, safe=b"/~"),
390 )
391
392 @classmethod
393 def from_uri(cls, uri, client=None):
394 """Get a constructor for blob object by URI.
395
396 .. code-block:: python
397
398 from google.cloud import storage
399 from google.cloud.storage.blob import Blob
400 client = storage.Client()
401 blob = Blob.from_uri("gs://bucket/object", client=client)
402
403 :type uri: str
404 :param uri: The blob uri following a gs://bucket/object pattern.
405 Both a bucket and object name is required to construct a blob object.
406
407 :type client: :class:`~google.cloud.storage.client.Client`
408 :param client:
409 (Optional) The client to use. Application code should
410 *always* pass ``client``.
411
412 :rtype: :class:`google.cloud.storage.blob.Blob`
413 :returns: The blob object created.
414 """
415 from google.cloud.storage.bucket import Bucket
416
417 match = _GS_URL_REGEX_PATTERN.match(uri)
418 if not match:
419 raise ValueError("URI pattern must be gs://bucket/object")
420 bucket = Bucket(client, name=match.group("bucket_name"))
421 return cls(match.group("object_name"), bucket)
422
423 @classmethod
424 def from_string(cls, uri, client=None):
425 """(Deprecated) Get a constructor for blob object by URI.
426
427 .. note::
428 Deprecated alias for :meth:`from_uri`.
429
430 .. code-block:: python
431
432 from google.cloud import storage
433 from google.cloud.storage.blob import Blob
434 client = storage.Client()
435 blob = Blob.from_string("gs://bucket/object", client=client)
436
437 :type uri: str
438 :param uri: The blob uri following a gs://bucket/object pattern.
439 Both a bucket and object name is required to construct a blob object.
440
441 :type client: :class:`~google.cloud.storage.client.Client`
442 :param client:
443 (Optional) The client to use. Application code should
444 *always* pass ``client``.
445
446 :rtype: :class:`google.cloud.storage.blob.Blob`
447 :returns: The blob object created.
448 """
449 warnings.warn(_FROM_STRING_DEPRECATED, PendingDeprecationWarning, stacklevel=2)
450 return Blob.from_uri(uri=uri, client=client)
451
452 def generate_signed_url(
453 self,
454 expiration=None,
455 api_access_endpoint=None,
456 method="GET",
457 content_md5=None,
458 content_type=None,
459 response_disposition=None,
460 response_type=None,
461 generation=None,
462 headers=None,
463 query_parameters=None,
464 client=None,
465 credentials=None,
466 version=None,
467 service_account_email=None,
468 access_token=None,
469 virtual_hosted_style=False,
470 bucket_bound_hostname=None,
471 scheme="http",
472 ):
473 """Generates a signed URL for this blob.
474
475 .. note::
476
477 If you are on Google Compute Engine, you can't generate a signed
478 URL using GCE service account.
479 If you'd like to be able to generate a signed URL from GCE,
480 you can use a standard service account from a JSON file rather
481 than a GCE service account.
482
483 If you have a blob that you want to allow access to for a set
484 amount of time, you can use this method to generate a URL that
485 is only valid within a certain time period.
486
487 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-generate-signed-url-v4#storage_generate_signed_url_v4-python).
488
489 This is particularly useful if you don't want publicly
490 accessible blobs, but don't want to require users to explicitly
491 log in.
492
493 If ``bucket_bound_hostname`` is set as an argument of :attr:`api_access_endpoint`,
494 ``https`` works only if using a ``CDN``.
495
496 :type expiration: Union[Integer, datetime.datetime, datetime.timedelta]
497 :param expiration:
498 Point in time when the signed URL should expire. If a ``datetime``
499 instance is passed without an explicit ``tzinfo`` set, it will be
500 assumed to be ``UTC``.
501
502 :type api_access_endpoint: str
503 :param api_access_endpoint: (Optional) URI base, for instance
504 "https://storage.googleapis.com". If not specified, the client's
505 api_endpoint will be used. Incompatible with bucket_bound_hostname.
506
507 :type method: str
508 :param method: The HTTP verb that will be used when requesting the URL.
509
510 :type content_md5: str
511 :param content_md5:
512 (Optional) The MD5 hash of the object referenced by ``resource``.
513
514 :type content_type: str
515 :param content_type:
516 (Optional) The content type of the object referenced by
517 ``resource``.
518
519 :type response_disposition: str
520 :param response_disposition:
521 (Optional) Content disposition of responses to requests for the
522 signed URL. For example, to enable the signed URL to initiate a
523 file of ``blog.png``, use the value ``'attachment;
524 filename=blob.png'``.
525
526 :type response_type: str
527 :param response_type:
528 (Optional) Content type of responses to requests for the signed
529 URL. Ignored if content_type is set on object/blob metadata.
530
531 :type generation: str
532 :param generation:
533 (Optional) A value that indicates which generation of the resource
534 to fetch.
535
536 :type headers: dict
537 :param headers:
538 (Optional) Additional HTTP headers to be included as part of the
539 signed URLs. See:
540 https://cloud.google.com/storage/docs/xml-api/reference-headers
541 Requests using the signed URL *must* pass the specified header
542 (name and value) with each request for the URL.
543
544 :type query_parameters: dict
545 :param query_parameters:
546 (Optional) Additional query parameters to be included as part of the
547 signed URLs. See:
548 https://cloud.google.com/storage/docs/xml-api/reference-headers#query
549
550 :type client: :class:`~google.cloud.storage.client.Client`
551 :param client:
552 (Optional) The client to use. If not passed, falls back to the
553 ``client`` stored on the blob's bucket.
554
555 :type credentials: :class:`google.auth.credentials.Credentials`
556 :param credentials:
557 (Optional) The authorization credentials to attach to requests.
558 These credentials identify this application to the service. If
559 none are specified, the client will attempt to ascertain the
560 credentials from the environment.
561
562 :type version: str
563 :param version:
564 (Optional) The version of signed credential to create. Must be one
565 of 'v2' | 'v4'.
566
567 :type service_account_email: str
568 :param service_account_email:
569 (Optional) E-mail address of the service account.
570
571 :type access_token: str
572 :param access_token: (Optional) Access token for a service account.
573
574 :type virtual_hosted_style: bool
575 :param virtual_hosted_style:
576 (Optional) If true, then construct the URL relative the bucket's
577 virtual hostname, e.g., '<bucket-name>.storage.googleapis.com'.
578 Incompatible with bucket_bound_hostname.
579
580 :type bucket_bound_hostname: str
581 :param bucket_bound_hostname:
582 (Optional) If passed, then construct the URL relative to the bucket-bound hostname.
583 Value can be a bare or with scheme, e.g., 'example.com' or 'http://example.com'.
584 Incompatible with api_access_endpoint and virtual_hosted_style.
585 See: https://cloud.google.com/storage/docs/request-endpoints#cname
586
587 :type scheme: str
588 :param scheme:
589 (Optional) If ``bucket_bound_hostname`` is passed as a bare
590 hostname, use this value as the scheme. ``https`` will work only
591 when using a CDN. Defaults to ``"http"``.
592
593 :raises: :exc:`ValueError` when version is invalid or mutually exclusive arguments are used.
594 :raises: :exc:`TypeError` when expiration is not a valid type.
595 :raises: :exc:`AttributeError` if credentials is not an instance
596 of :class:`google.auth.credentials.Signing`.
597
598 :rtype: str
599 :returns: A signed URL you can use to access the resource
600 until expiration.
601 """
602 if version is None:
603 version = "v2"
604 elif version not in ("v2", "v4"):
605 raise ValueError("'version' must be either 'v2' or 'v4'")
606
607 if (
608 api_access_endpoint is not None or virtual_hosted_style
609 ) and bucket_bound_hostname:
610 raise ValueError(
611 "The bucket_bound_hostname argument is not compatible with "
612 "either api_access_endpoint or virtual_hosted_style."
613 )
614
615 if api_access_endpoint is None:
616 client = self._require_client(client)
617 api_access_endpoint = client.api_endpoint
618
619 quoted_name = _quote(self.name, safe=b"/~")
620
621 # If you are on Google Compute Engine, you can't generate a signed URL
622 # using GCE service account.
623 # See https://github.com/googleapis/google-auth-library-python/issues/50
624 if virtual_hosted_style:
625 api_access_endpoint = _virtual_hosted_style_base_url(
626 api_access_endpoint, self.bucket.name
627 )
628 resource = f"/{quoted_name}"
629 elif bucket_bound_hostname:
630 api_access_endpoint = _bucket_bound_hostname_url(
631 bucket_bound_hostname, scheme
632 )
633 resource = f"/{quoted_name}"
634 else:
635 resource = f"/{self.bucket.name}/{quoted_name}"
636
637 if credentials is None:
638 client = self._require_client(client) # May be redundant, but that's ok.
639 credentials = client._credentials
640
641 client = self._require_client(client)
642 universe_domain = client.universe_domain
643
644 if version == "v2":
645 helper = generate_signed_url_v2
646 else:
647 helper = generate_signed_url_v4
648
649 if self._encryption_key is not None:
650 encryption_headers = _get_encryption_headers(self._encryption_key)
651 if headers is None:
652 headers = {}
653 if version == "v2":
654 # See: https://cloud.google.com/storage/docs/access-control/signed-urls-v2#about-canonical-extension-headers
655 v2_copy_only = "X-Goog-Encryption-Algorithm"
656 headers[v2_copy_only] = encryption_headers[v2_copy_only]
657 else:
658 headers.update(encryption_headers)
659
660 return helper(
661 credentials,
662 resource=resource,
663 expiration=expiration,
664 api_access_endpoint=api_access_endpoint,
665 method=method.upper(),
666 content_md5=content_md5,
667 content_type=content_type,
668 response_type=response_type,
669 response_disposition=response_disposition,
670 generation=generation,
671 headers=headers,
672 query_parameters=query_parameters,
673 service_account_email=service_account_email,
674 access_token=access_token,
675 universe_domain=universe_domain,
676 )
677
678 def exists(
679 self,
680 client=None,
681 if_etag_match=None,
682 if_etag_not_match=None,
683 if_generation_match=None,
684 if_generation_not_match=None,
685 if_metageneration_match=None,
686 if_metageneration_not_match=None,
687 timeout=_DEFAULT_TIMEOUT,
688 retry=DEFAULT_RETRY,
689 soft_deleted=None,
690 ):
691 """Determines whether or not this blob exists.
692
693 If :attr:`user_project` is set on the bucket, bills the API request
694 to that project.
695
696 :type client: :class:`~google.cloud.storage.client.Client`
697 :param client:
698 (Optional) The client to use. If not passed, falls back to the
699 ``client`` stored on the blob's bucket.
700
701 :type if_etag_match: Union[str, Set[str]]
702 :param if_etag_match:
703 (Optional) See :ref:`using-if-etag-match`
704
705 :type if_etag_not_match: Union[str, Set[str]]
706 :param if_etag_not_match:
707 (Optional) See :ref:`using-if-etag-not-match`
708
709 :type if_generation_match: long
710 :param if_generation_match:
711 (Optional) See :ref:`using-if-generation-match`
712
713 :type if_generation_not_match: long
714 :param if_generation_not_match:
715 (Optional) See :ref:`using-if-generation-not-match`
716
717 :type if_metageneration_match: long
718 :param if_metageneration_match:
719 (Optional) See :ref:`using-if-metageneration-match`
720
721 :type if_metageneration_not_match: long
722 :param if_metageneration_not_match:
723 (Optional) See :ref:`using-if-metageneration-not-match`
724
725 :type timeout: float or tuple
726 :param timeout:
727 (Optional) The amount of time, in seconds, to wait
728 for the server response. See: :ref:`configuring_timeouts`
729
730 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
731 :param retry:
732 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
733
734 :type soft_deleted: bool
735 :param soft_deleted:
736 (Optional) If True, looks for a soft-deleted object. Will only return True
737 if the object exists and is in a soft-deleted state.
738 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True.
739 See: https://cloud.google.com/storage/docs/soft-delete
740
741 :rtype: bool
742 :returns: True if the blob exists in Cloud Storage.
743 """
744 with create_trace_span(name="Storage.Blob.exists"):
745 client = self._require_client(client)
746 # We only need the status code (200 or not) so we seek to
747 # minimize the returned payload.
748 query_params = self._query_params
749 query_params["fields"] = "name"
750 if soft_deleted is not None:
751 query_params["softDeleted"] = soft_deleted
752
753 _add_generation_match_parameters(
754 query_params,
755 if_generation_match=if_generation_match,
756 if_generation_not_match=if_generation_not_match,
757 if_metageneration_match=if_metageneration_match,
758 if_metageneration_not_match=if_metageneration_not_match,
759 )
760
761 headers = {}
762 _add_etag_match_headers(
763 headers,
764 if_etag_match=if_etag_match,
765 if_etag_not_match=if_etag_not_match,
766 )
767
768 try:
769 # We intentionally pass `_target_object=None` since fields=name
770 # would limit the local properties.
771 client._get_resource(
772 self.path,
773 query_params=query_params,
774 headers=headers,
775 timeout=timeout,
776 retry=retry,
777 _target_object=None,
778 )
779 except NotFound:
780 # NOTE: This will not fail immediately in a batch. However, when
781 # Batch.finish() is called, the resulting `NotFound` will be
782 # raised.
783 return False
784 return True
785
786 def delete(
787 self,
788 client=None,
789 if_generation_match=None,
790 if_generation_not_match=None,
791 if_metageneration_match=None,
792 if_metageneration_not_match=None,
793 timeout=_DEFAULT_TIMEOUT,
794 retry=DEFAULT_RETRY,
795 ):
796 """Deletes a blob from Cloud Storage.
797
798 If :attr:`user_project` is set on the bucket, bills the API request
799 to that project.
800
801 :type client: :class:`~google.cloud.storage.client.Client`
802 :param client:
803 (Optional) The client to use. If not passed, falls back to the
804 ``client`` stored on the blob's bucket.
805
806 :type if_generation_match: long
807 :param if_generation_match:
808 (Optional) See :ref:`using-if-generation-match`
809
810 :type if_generation_not_match: long
811 :param if_generation_not_match:
812 (Optional) See :ref:`using-if-generation-not-match`
813
814 :type if_metageneration_match: long
815 :param if_metageneration_match:
816 (Optional) See :ref:`using-if-metageneration-match`
817
818 :type if_metageneration_not_match: long
819 :param if_metageneration_not_match:
820 (Optional) See :ref:`using-if-metageneration-not-match`
821
822 :type timeout: float or tuple
823 :param timeout:
824 (Optional) The amount of time, in seconds, to wait
825 for the server response. See: :ref:`configuring_timeouts`
826
827 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
828 :param retry: (Optional) How to retry the RPC. A None value will disable
829 retries. A google.api_core.retry.Retry value will enable retries,
830 and the object will define retriable response codes and errors and
831 configure backoff and timeout options.
832
833 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
834 Retry object and activates it only if certain conditions are met.
835 This class exists to provide safe defaults for RPC calls that are
836 not technically safe to retry normally (due to potential data
837 duplication or other side-effects) but become safe to retry if a
838 condition such as if_generation_match is set.
839
840 See the retry.py source code and docstrings in this package
841 (google.cloud.storage.retry) for information on retry types and how
842 to configure them.
843
844 :raises: :class:`google.cloud.exceptions.NotFound`
845 (propagated from
846 :meth:`google.cloud.storage.bucket.Bucket.delete_blob`).
847 """
848 with create_trace_span(name="Storage.Blob.delete"):
849 self.bucket.delete_blob(
850 self.name,
851 client=client,
852 generation=self.generation,
853 timeout=timeout,
854 if_generation_match=if_generation_match,
855 if_generation_not_match=if_generation_not_match,
856 if_metageneration_match=if_metageneration_match,
857 if_metageneration_not_match=if_metageneration_not_match,
858 retry=retry,
859 )
860
861 def _get_transport(self, client):
862 """Return the client's transport.
863
864 :type client: :class:`~google.cloud.storage.client.Client`
865 :param client:
866 (Optional) The client to use. If not passed, falls back to the
867 ``client`` stored on the blob's bucket.
868
869 :rtype transport:
870 :class:`~google.auth.transport.requests.AuthorizedSession`
871 :returns: The transport (with credentials) that will
872 make authenticated requests.
873 """
874 client = self._require_client(client)
875 return client._http
876
877 def _get_download_url(
878 self,
879 client,
880 if_generation_match=None,
881 if_generation_not_match=None,
882 if_metageneration_match=None,
883 if_metageneration_not_match=None,
884 ):
885 """Get the download URL for the current blob.
886
887 If the ``media_link`` has been loaded, it will be used, otherwise
888 the URL will be constructed from the current blob's path (and possibly
889 generation) to avoid a round trip.
890
891 :type client: :class:`~google.cloud.storage.client.Client`
892 :param client: The client to use.
893
894 :type if_generation_match: long
895 :param if_generation_match:
896 (Optional) See :ref:`using-if-generation-match`
897
898 :type if_generation_not_match: long
899 :param if_generation_not_match:
900 (Optional) See :ref:`using-if-generation-not-match`
901
902 :type if_metageneration_match: long
903 :param if_metageneration_match:
904 (Optional) See :ref:`using-if-metageneration-match`
905
906 :type if_metageneration_not_match: long
907 :param if_metageneration_not_match:
908 (Optional) See :ref:`using-if-metageneration-not-match`
909
910 :rtype: str
911 :returns: The download URL for the current blob.
912 """
913 name_value_pairs = []
914 if self.media_link is None:
915 hostname = _get_host_name(client._connection)
916 base_url = _DOWNLOAD_URL_TEMPLATE.format(
917 hostname=hostname, path=self.path, api_version=_API_VERSION
918 )
919 if self.generation is not None:
920 name_value_pairs.append(("generation", f"{self.generation:d}"))
921 else:
922 base_url = self.media_link
923
924 if self.user_project is not None:
925 name_value_pairs.append(("userProject", self.user_project))
926
927 _add_generation_match_parameters(
928 name_value_pairs,
929 if_generation_match=if_generation_match,
930 if_generation_not_match=if_generation_not_match,
931 if_metageneration_match=if_metageneration_match,
932 if_metageneration_not_match=if_metageneration_not_match,
933 )
934 return _add_query_parameters(base_url, name_value_pairs)
935
936 def _extract_headers_from_download(self, response):
937 """Extract headers from a non-chunked request's http object.
938
939 This avoids the need to make a second request for commonly used
940 headers.
941
942 :type response:
943 :class requests.models.Response
944 :param response: The server response from downloading a non-chunked file
945 """
946 self._properties["contentEncoding"] = response.headers.get(
947 "Content-Encoding", None
948 )
949 self._properties[_CONTENT_TYPE_FIELD] = response.headers.get(
950 "Content-Type", None
951 )
952 self._properties["cacheControl"] = response.headers.get("Cache-Control", None)
953 self._properties["storageClass"] = response.headers.get(
954 "X-Goog-Storage-Class", None
955 )
956 self._properties["contentLanguage"] = response.headers.get(
957 "Content-Language", None
958 )
959 self._properties["etag"] = response.headers.get("ETag", None)
960 self._properties["generation"] = response.headers.get("X-goog-generation", None)
961 self._properties["metageneration"] = response.headers.get(
962 "X-goog-metageneration", None
963 )
964 # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==',
965 x_goog_hash = response.headers.get("X-Goog-Hash", "")
966
967 if x_goog_hash:
968 digests = {}
969 for encoded_digest in x_goog_hash.split(","):
970 match = re.match(r"(crc32c|md5)=([\w\d/\+/]+={0,3})", encoded_digest)
971 if match:
972 method, digest = match.groups()
973 digests[method] = digest
974
975 self._properties["crc32c"] = digests.get("crc32c", None)
976 self._properties["md5Hash"] = digests.get("md5", None)
977
978 def _do_download(
979 self,
980 transport,
981 file_obj,
982 download_url,
983 headers,
984 start=None,
985 end=None,
986 raw_download=False,
987 timeout=_DEFAULT_TIMEOUT,
988 checksum="auto",
989 retry=DEFAULT_RETRY,
990 single_shot_download=False,
991 ):
992 """Perform a download without any error handling.
993
994 This is intended to be called by :meth:`_prep_and_do_download` so it can
995 be wrapped with error handling / remapping.
996
997 :type transport:
998 :class:`~google.auth.transport.requests.AuthorizedSession`
999 :param transport:
1000 The transport (with credentials) that will make authenticated
1001 requests.
1002
1003 :type file_obj: file
1004 :param file_obj: A file handle to which to write the blob's data.
1005
1006 :type download_url: str
1007 :param download_url: The URL where the media can be accessed.
1008
1009 :type headers: dict
1010 :param headers: Headers to be sent with the request(s).
1011
1012 :type start: int
1013 :param start: (Optional) The first byte in a range to be downloaded.
1014
1015 :type end: int
1016 :param end: (Optional) The last byte in a range to be downloaded.
1017
1018 :type raw_download: bool
1019 :param raw_download:
1020 (Optional) If true, download the object without any expansion.
1021
1022 :type timeout: float or tuple
1023 :param timeout:
1024 (Optional) The amount of time, in seconds, to wait
1025 for the server response. See: :ref:`configuring_timeouts`
1026
1027 :type checksum: str
1028 :param checksum:
1029 (Optional) The type of checksum to compute to verify the integrity
1030 of the object. The response headers must contain a checksum of the
1031 requested type. If the headers lack an appropriate checksum (for
1032 instance in the case of transcoded or ranged downloads where the
1033 remote service does not know the correct checksum, including
1034 downloads where chunk_size is set) an INFO-level log will be
1035 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1036 default is "auto", which will try to detect if the C extension for
1037 crc32c is installed and fall back to md5 otherwise.
1038
1039 :type retry: google.api_core.retry.Retry
1040 :param retry: (Optional) How to retry the RPC. A None value will disable
1041 retries. A google.api_core.retry.Retry value will enable retries,
1042 and the object will configure backoff and timeout options.
1043
1044 This private method does not accept ConditionalRetryPolicy values
1045 because the information necessary to evaluate the policy is instead
1046 evaluated in blob._prep_and_do_download().
1047
1048 See the retry.py source code and docstrings in this package
1049 (google.cloud.storage.retry) for information on retry types and how
1050 to configure them.
1051
1052 :type single_shot_download: bool
1053 :param single_shot_download:
1054 (Optional) If true, download the object in a single request.
1055 Caution: Enabling this will increase the memory overload for your application.
1056 Please enable this as per your use case.
1057 """
1058
1059 extra_attributes = {
1060 "url.full": download_url,
1061 "download.chunk_size": f"{self.chunk_size}",
1062 "download.raw_download": raw_download,
1063 "upload.checksum": f"{checksum}",
1064 "download.single_shot_download": single_shot_download,
1065 }
1066 args = {"timeout": timeout}
1067
1068 if self.chunk_size is None:
1069 if raw_download:
1070 klass = RawDownload
1071 download_class = "RawDownload"
1072 else:
1073 klass = Download
1074 download_class = "Download"
1075
1076 download = klass(
1077 download_url,
1078 stream=file_obj,
1079 headers=headers,
1080 start=start,
1081 end=end,
1082 checksum=checksum,
1083 retry=retry,
1084 # NOTE: single_shot_download is only supported in Download and RawDownload
1085 # classes, i.e., when chunk_size is set to None (the default value). It is
1086 # not supported for chunked downloads.
1087 single_shot_download=single_shot_download,
1088 )
1089 with create_trace_span(
1090 name=f"Storage.{download_class}/consume",
1091 attributes=extra_attributes,
1092 api_request=args,
1093 ):
1094 response = download.consume(transport, timeout=timeout)
1095 self._extract_headers_from_download(response)
1096 else:
1097 if checksum:
1098 msg = _CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE.format(checksum)
1099 _logger.info(msg)
1100
1101 if raw_download:
1102 klass = RawChunkedDownload
1103 download_class = "RawChunkedDownload"
1104 else:
1105 klass = ChunkedDownload
1106 download_class = "ChunkedDownload"
1107
1108 download = klass(
1109 download_url,
1110 self.chunk_size,
1111 file_obj,
1112 headers=headers,
1113 start=start if start else 0,
1114 end=end,
1115 retry=retry,
1116 )
1117
1118 with create_trace_span(
1119 name=f"Storage.{download_class}/consumeNextChunk",
1120 attributes=extra_attributes,
1121 api_request=args,
1122 ):
1123 while not download.finished:
1124 download.consume_next_chunk(transport, timeout=timeout)
1125
1126 def download_to_file(
1127 self,
1128 file_obj,
1129 client=None,
1130 start=None,
1131 end=None,
1132 raw_download=False,
1133 if_etag_match=None,
1134 if_etag_not_match=None,
1135 if_generation_match=None,
1136 if_generation_not_match=None,
1137 if_metageneration_match=None,
1138 if_metageneration_not_match=None,
1139 timeout=_DEFAULT_TIMEOUT,
1140 checksum="auto",
1141 retry=DEFAULT_RETRY,
1142 single_shot_download=False,
1143 ):
1144 """Download the contents of this blob into a file-like object.
1145
1146 .. note::
1147
1148 If the server-set property, :attr:`media_link`, is not yet
1149 initialized, makes an additional API request to load it.
1150
1151 If the :attr:`chunk_size` of a current blob is `None`, will download data
1152 in single download request otherwise it will download the :attr:`chunk_size`
1153 of data in each request.
1154
1155 For more fine-grained control over the download process, check out
1156 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
1157 For example, this library allows downloading **parts** of a blob rather than the whole thing.
1158
1159 If :attr:`user_project` is set on the bucket, bills the API request
1160 to that project.
1161
1162 :type file_obj: file
1163 :param file_obj: A file handle to which to write the blob's data.
1164
1165 :type client: :class:`~google.cloud.storage.client.Client`
1166 :param client:
1167 (Optional) The client to use. If not passed, falls back to the
1168 ``client`` stored on the blob's bucket.
1169
1170 :type start: int
1171 :param start: (Optional) The first byte in a range to be downloaded.
1172
1173 :type end: int
1174 :param end: (Optional) The last byte in a range to be downloaded.
1175
1176 :type raw_download: bool
1177 :param raw_download:
1178 (Optional) If true, download the object without any expansion.
1179
1180 :type if_etag_match: Union[str, Set[str]]
1181 :param if_etag_match:
1182 (Optional) See :ref:`using-if-etag-match`
1183
1184 :type if_etag_not_match: Union[str, Set[str]]
1185 :param if_etag_not_match:
1186 (Optional) See :ref:`using-if-etag-not-match`
1187
1188 :type if_generation_match: long
1189 :param if_generation_match:
1190 (Optional) See :ref:`using-if-generation-match`
1191
1192 :type if_generation_not_match: long
1193 :param if_generation_not_match:
1194 (Optional) See :ref:`using-if-generation-not-match`
1195
1196 :type if_metageneration_match: long
1197 :param if_metageneration_match:
1198 (Optional) See :ref:`using-if-metageneration-match`
1199
1200 :type if_metageneration_not_match: long
1201 :param if_metageneration_not_match:
1202 (Optional) See :ref:`using-if-metageneration-not-match`
1203
1204 :type timeout: float or tuple
1205 :param timeout:
1206 (Optional) The amount of time, in seconds, to wait
1207 for the server response. See: :ref:`configuring_timeouts`
1208
1209 :type checksum: str
1210 :param checksum:
1211 (Optional) The type of checksum to compute to verify the integrity
1212 of the object. The response headers must contain a checksum of the
1213 requested type. If the headers lack an appropriate checksum (for
1214 instance in the case of transcoded or ranged downloads where the
1215 remote service does not know the correct checksum, including
1216 downloads where chunk_size is set) an INFO-level log will be
1217 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1218 default is "auto", which will try to detect if the C extension for
1219 crc32c is installed and fall back to md5 otherwise.
1220
1221 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1222 :param retry: (Optional) How to retry the RPC. A None value will disable
1223 retries. A google.api_core.retry.Retry value will enable retries,
1224 and the object will define retriable response codes and errors and
1225 configure backoff and timeout options.
1226
1227 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1228 Retry object and activates it only if certain conditions are met.
1229 This class exists to provide safe defaults for RPC calls that are
1230 not technically safe to retry normally (due to potential data
1231 duplication or other side-effects) but become safe to retry if a
1232 condition such as if_metageneration_match is set.
1233
1234 See the retry.py source code and docstrings in this package
1235 (google.cloud.storage.retry) for information on retry types and how
1236 to configure them.
1237
1238 :type single_shot_download: bool
1239 :param single_shot_download:
1240 (Optional) If true, download the object in a single request.
1241 Caution: Enabling this will increase the memory overload for your application.
1242 Please enable this as per your use case.
1243
1244 :raises: :class:`google.cloud.exceptions.NotFound`
1245 """
1246 with create_trace_span(name="Storage.Blob.downloadToFile"):
1247 self._prep_and_do_download(
1248 file_obj,
1249 client=client,
1250 start=start,
1251 end=end,
1252 raw_download=raw_download,
1253 if_etag_match=if_etag_match,
1254 if_etag_not_match=if_etag_not_match,
1255 if_generation_match=if_generation_match,
1256 if_generation_not_match=if_generation_not_match,
1257 if_metageneration_match=if_metageneration_match,
1258 if_metageneration_not_match=if_metageneration_not_match,
1259 timeout=timeout,
1260 checksum=checksum,
1261 retry=retry,
1262 single_shot_download=single_shot_download,
1263 )
1264
1265 def _handle_filename_and_download(self, filename, *args, **kwargs):
1266 """Download the contents of this blob into a named file.
1267
1268 :type filename: str
1269 :param filename: A filename to be passed to ``open``.
1270
1271 For *args and **kwargs, refer to the documentation for download_to_filename() for more information.
1272 """
1273
1274 try:
1275 with open(filename, "wb") as file_obj:
1276 self._prep_and_do_download(
1277 file_obj,
1278 *args,
1279 **kwargs,
1280 )
1281
1282 except (DataCorruption, NotFound):
1283 # Delete the corrupt or empty downloaded file.
1284 os.remove(filename)
1285 raise
1286
1287 updated = self.updated
1288 if updated is not None:
1289 mtime = updated.timestamp()
1290 os.utime(file_obj.name, (mtime, mtime))
1291
1292 def download_to_filename(
1293 self,
1294 filename,
1295 client=None,
1296 start=None,
1297 end=None,
1298 raw_download=False,
1299 if_etag_match=None,
1300 if_etag_not_match=None,
1301 if_generation_match=None,
1302 if_generation_not_match=None,
1303 if_metageneration_match=None,
1304 if_metageneration_not_match=None,
1305 timeout=_DEFAULT_TIMEOUT,
1306 checksum="auto",
1307 retry=DEFAULT_RETRY,
1308 single_shot_download=False,
1309 ):
1310 """Download the contents of this blob into a named file.
1311
1312 If :attr:`user_project` is set on the bucket, bills the API request
1313 to that project.
1314
1315 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-download-encrypted-file#storage_download_encrypted_file-python)
1316 to download a file with a [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied).
1317
1318 :type filename: str
1319 :param filename: A filename to be passed to ``open``.
1320
1321 :type client: :class:`~google.cloud.storage.client.Client`
1322 :param client:
1323 (Optional) The client to use. If not passed, falls back to the
1324 ``client`` stored on the blob's bucket.
1325
1326 :type start: int
1327 :param start: (Optional) The first byte in a range to be downloaded.
1328
1329 :type end: int
1330 :param end: (Optional) The last byte in a range to be downloaded.
1331
1332 :type raw_download: bool
1333 :param raw_download:
1334 (Optional) If true, download the object without any expansion.
1335
1336 :type if_etag_match: Union[str, Set[str]]
1337 :param if_etag_match:
1338 (Optional) See :ref:`using-if-etag-match`
1339
1340 :type if_etag_not_match: Union[str, Set[str]]
1341 :param if_etag_not_match:
1342 (Optional) See :ref:`using-if-etag-not-match`
1343
1344 :type if_generation_match: long
1345 :param if_generation_match:
1346 (Optional) See :ref:`using-if-generation-match`
1347
1348 :type if_generation_not_match: long
1349 :param if_generation_not_match:
1350 (Optional) See :ref:`using-if-generation-not-match`
1351
1352 :type if_metageneration_match: long
1353 :param if_metageneration_match:
1354 (Optional) See :ref:`using-if-metageneration-match`
1355
1356 :type if_metageneration_not_match: long
1357 :param if_metageneration_not_match:
1358 (Optional) See :ref:`using-if-metageneration-not-match`
1359
1360 :type timeout: float or tuple
1361 :param timeout:
1362 (Optional) The amount of time, in seconds, to wait
1363 for the server response. See: :ref:`configuring_timeouts`
1364
1365 :type checksum: str
1366 :param checksum:
1367 (Optional) The type of checksum to compute to verify the integrity
1368 of the object. The response headers must contain a checksum of the
1369 requested type. If the headers lack an appropriate checksum (for
1370 instance in the case of transcoded or ranged downloads where the
1371 remote service does not know the correct checksum, including
1372 downloads where chunk_size is set) an INFO-level log will be
1373 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1374 default is "auto", which will try to detect if the C extension for
1375 crc32c is installed and fall back to md5 otherwise.
1376
1377 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1378 :param retry: (Optional) How to retry the RPC. A None value will disable
1379 retries. A google.api_core.retry.Retry value will enable retries,
1380 and the object will define retriable response codes and errors and
1381 configure backoff and timeout options.
1382
1383 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1384 Retry object and activates it only if certain conditions are met.
1385 This class exists to provide safe defaults for RPC calls that are
1386 not technically safe to retry normally (due to potential data
1387 duplication or other side-effects) but become safe to retry if a
1388 condition such as if_metageneration_match is set.
1389
1390 See the retry.py source code and docstrings in this package
1391 (google.cloud.storage.retry) for information on retry types and how
1392 to configure them.
1393
1394 :type single_shot_download: bool
1395 :param single_shot_download:
1396 (Optional) If true, download the object in a single request.
1397 Caution: Enabling this will increase the memory overload for your application.
1398 Please enable this as per your use case.
1399
1400 :raises: :class:`google.cloud.exceptions.NotFound`
1401 """
1402 with create_trace_span(name="Storage.Blob.downloadToFilename"):
1403 self._handle_filename_and_download(
1404 filename,
1405 client=client,
1406 start=start,
1407 end=end,
1408 raw_download=raw_download,
1409 if_etag_match=if_etag_match,
1410 if_etag_not_match=if_etag_not_match,
1411 if_generation_match=if_generation_match,
1412 if_generation_not_match=if_generation_not_match,
1413 if_metageneration_match=if_metageneration_match,
1414 if_metageneration_not_match=if_metageneration_not_match,
1415 timeout=timeout,
1416 checksum=checksum,
1417 retry=retry,
1418 single_shot_download=single_shot_download,
1419 )
1420
1421 def download_as_bytes(
1422 self,
1423 client=None,
1424 start=None,
1425 end=None,
1426 raw_download=False,
1427 if_etag_match=None,
1428 if_etag_not_match=None,
1429 if_generation_match=None,
1430 if_generation_not_match=None,
1431 if_metageneration_match=None,
1432 if_metageneration_not_match=None,
1433 timeout=_DEFAULT_TIMEOUT,
1434 checksum="auto",
1435 retry=DEFAULT_RETRY,
1436 single_shot_download=False,
1437 ):
1438 """Download the contents of this blob as a bytes object.
1439
1440 If :attr:`user_project` is set on the bucket, bills the API request
1441 to that project.
1442
1443 :type client: :class:`~google.cloud.storage.client.Client`
1444 :param client:
1445 (Optional) The client to use. If not passed, falls back to the
1446 ``client`` stored on the blob's bucket.
1447
1448 :type start: int
1449 :param start: (Optional) The first byte in a range to be downloaded.
1450
1451 :type end: int
1452 :param end: (Optional) The last byte in a range to be downloaded.
1453
1454 :type raw_download: bool
1455 :param raw_download:
1456 (Optional) If true, download the object without any expansion.
1457
1458 :type if_etag_match: Union[str, Set[str]]
1459 :param if_etag_match:
1460 (Optional) See :ref:`using-if-etag-match`
1461
1462 :type if_etag_not_match: Union[str, Set[str]]
1463 :param if_etag_not_match:
1464 (Optional) See :ref:`using-if-etag-not-match`
1465
1466 :type if_generation_match: long
1467 :param if_generation_match:
1468 (Optional) See :ref:`using-if-generation-match`
1469
1470 :type if_generation_not_match: long
1471 :param if_generation_not_match:
1472 (Optional) See :ref:`using-if-generation-not-match`
1473
1474 :type if_metageneration_match: long
1475 :param if_metageneration_match:
1476 (Optional) See :ref:`using-if-metageneration-match`
1477
1478 :type if_metageneration_not_match: long
1479 :param if_metageneration_not_match:
1480 (Optional) See :ref:`using-if-metageneration-not-match`
1481
1482 :type timeout: float or tuple
1483 :param timeout:
1484 (Optional) The amount of time, in seconds, to wait
1485 for the server response. See: :ref:`configuring_timeouts`
1486
1487 :type checksum: str
1488 :param checksum:
1489 (Optional) The type of checksum to compute to verify the integrity
1490 of the object. The response headers must contain a checksum of the
1491 requested type. If the headers lack an appropriate checksum (for
1492 instance in the case of transcoded or ranged downloads where the
1493 remote service does not know the correct checksum, including
1494 downloads where chunk_size is set) an INFO-level log will be
1495 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1496 default is "auto", which will try to detect if the C extension for
1497 crc32c is installed and fall back to md5 otherwise.
1498
1499 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1500 :param retry: (Optional) How to retry the RPC. A None value will disable
1501 retries. A google.api_core.retry.Retry value will enable retries,
1502 and the object will define retriable response codes and errors and
1503 configure backoff and timeout options.
1504
1505 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1506 Retry object and activates it only if certain conditions are met.
1507 This class exists to provide safe defaults for RPC calls that are
1508 not technically safe to retry normally (due to potential data
1509 duplication or other side-effects) but become safe to retry if a
1510 condition such as if_metageneration_match is set.
1511
1512 See the retry.py source code and docstrings in this package
1513 (google.cloud.storage.retry) for information on retry types and how
1514 to configure them.
1515
1516 :type single_shot_download: bool
1517 :param single_shot_download:
1518 (Optional) If true, download the object in a single request.
1519 Caution: Enabling this will increase the memory overload for your application.
1520 Please enable this as per your use case.
1521
1522 :rtype: bytes
1523 :returns: The data stored in this blob.
1524
1525 :raises: :class:`google.cloud.exceptions.NotFound`
1526 """
1527 with create_trace_span(name="Storage.Blob.downloadAsBytes"):
1528 string_buffer = BytesIO()
1529
1530 self._prep_and_do_download(
1531 string_buffer,
1532 client=client,
1533 start=start,
1534 end=end,
1535 raw_download=raw_download,
1536 if_etag_match=if_etag_match,
1537 if_etag_not_match=if_etag_not_match,
1538 if_generation_match=if_generation_match,
1539 if_generation_not_match=if_generation_not_match,
1540 if_metageneration_match=if_metageneration_match,
1541 if_metageneration_not_match=if_metageneration_not_match,
1542 timeout=timeout,
1543 checksum=checksum,
1544 retry=retry,
1545 single_shot_download=single_shot_download,
1546 )
1547 return string_buffer.getvalue()
1548
1549 def download_as_string(
1550 self,
1551 client=None,
1552 start=None,
1553 end=None,
1554 raw_download=False,
1555 if_etag_match=None,
1556 if_etag_not_match=None,
1557 if_generation_match=None,
1558 if_generation_not_match=None,
1559 if_metageneration_match=None,
1560 if_metageneration_not_match=None,
1561 timeout=_DEFAULT_TIMEOUT,
1562 retry=DEFAULT_RETRY,
1563 single_shot_download=False,
1564 ):
1565 """(Deprecated) Download the contents of this blob as a bytes object.
1566
1567 If :attr:`user_project` is set on the bucket, bills the API request
1568 to that project.
1569
1570 .. note::
1571 Deprecated alias for :meth:`download_as_bytes`.
1572
1573 :type client: :class:`~google.cloud.storage.client.Client`
1574 :param client:
1575 (Optional) The client to use. If not passed, falls back to the
1576 ``client`` stored on the blob's bucket.
1577
1578 :type start: int
1579 :param start: (Optional) The first byte in a range to be downloaded.
1580
1581 :type end: int
1582 :param end: (Optional) The last byte in a range to be downloaded.
1583
1584 :type raw_download: bool
1585 :param raw_download:
1586 (Optional) If true, download the object without any expansion.
1587
1588 :type if_etag_match: Union[str, Set[str]]
1589 :param if_etag_match:
1590 (Optional) See :ref:`using-if-etag-match`
1591
1592 :type if_etag_not_match: Union[str, Set[str]]
1593 :param if_etag_not_match:
1594 (Optional) See :ref:`using-if-etag-not-match`
1595
1596 :type if_generation_match: long
1597 :param if_generation_match:
1598 (Optional) See :ref:`using-if-generation-match`
1599
1600 :type if_generation_not_match: long
1601 :param if_generation_not_match:
1602 (Optional) See :ref:`using-if-generation-not-match`
1603
1604 :type if_metageneration_match: long
1605 :param if_metageneration_match:
1606 (Optional) See :ref:`using-if-metageneration-match`
1607
1608 :type if_metageneration_not_match: long
1609 :param if_metageneration_not_match:
1610 (Optional) See :ref:`using-if-metageneration-not-match`
1611
1612 :type timeout: float or tuple
1613 :param timeout:
1614 (Optional) The amount of time, in seconds, to wait
1615 for the server response. See: :ref:`configuring_timeouts`
1616
1617 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1618 :param retry: (Optional) How to retry the RPC. A None value will disable
1619 retries. A google.api_core.retry.Retry value will enable retries,
1620 and the object will define retriable response codes and errors and
1621 configure backoff and timeout options.
1622
1623 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1624 Retry object and activates it only if certain conditions are met.
1625 This class exists to provide safe defaults for RPC calls that are
1626 not technically safe to retry normally (due to potential data
1627 duplication or other side-effects) but become safe to retry if a
1628 condition such as if_metageneration_match is set.
1629
1630 See the retry.py source code and docstrings in this package
1631 (google.cloud.storage.retry) for information on retry types and how
1632 to configure them.
1633
1634 :type single_shot_download: bool
1635 :param single_shot_download:
1636 (Optional) If true, download the object in a single request.
1637 Caution: Enabling this will increase the memory overload for your application.
1638 Please enable this as per your use case.
1639
1640 :rtype: bytes
1641 :returns: The data stored in this blob.
1642
1643 :raises: :class:`google.cloud.exceptions.NotFound`
1644 """
1645 warnings.warn(
1646 _DOWNLOAD_AS_STRING_DEPRECATED, PendingDeprecationWarning, stacklevel=2
1647 )
1648 with create_trace_span(name="Storage.Blob.downloadAsString"):
1649 return self.download_as_bytes(
1650 client=client,
1651 start=start,
1652 end=end,
1653 raw_download=raw_download,
1654 if_etag_match=if_etag_match,
1655 if_etag_not_match=if_etag_not_match,
1656 if_generation_match=if_generation_match,
1657 if_generation_not_match=if_generation_not_match,
1658 if_metageneration_match=if_metageneration_match,
1659 if_metageneration_not_match=if_metageneration_not_match,
1660 timeout=timeout,
1661 retry=retry,
1662 single_shot_download=single_shot_download,
1663 )
1664
1665 def download_as_text(
1666 self,
1667 client=None,
1668 start=None,
1669 end=None,
1670 raw_download=False,
1671 encoding=None,
1672 if_etag_match=None,
1673 if_etag_not_match=None,
1674 if_generation_match=None,
1675 if_generation_not_match=None,
1676 if_metageneration_match=None,
1677 if_metageneration_not_match=None,
1678 timeout=_DEFAULT_TIMEOUT,
1679 retry=DEFAULT_RETRY,
1680 single_shot_download=False,
1681 ):
1682 """Download the contents of this blob as text (*not* bytes).
1683
1684 If :attr:`user_project` is set on the bucket, bills the API request
1685 to that project.
1686
1687 :type client: :class:`~google.cloud.storage.client.Client`
1688 :param client:
1689 (Optional) The client to use. If not passed, falls back to the
1690 ``client`` stored on the blob's bucket.
1691
1692 :type start: int
1693 :param start: (Optional) The first byte in a range to be downloaded.
1694
1695 :type end: int
1696 :param end: (Optional) The last byte in a range to be downloaded.
1697
1698 :type raw_download: bool
1699 :param raw_download:
1700 (Optional) If true, download the object without any expansion.
1701
1702 :type encoding: str
1703 :param encoding: (Optional) encoding to be used to decode the
1704 downloaded bytes. Defaults to the ``charset`` param of
1705 attr:`content_type`, or else to "utf-8".
1706
1707 :type if_etag_match: Union[str, Set[str]]
1708 :param if_etag_match:
1709 (Optional) See :ref:`using-if-etag-match`
1710
1711 :type if_etag_not_match: Union[str, Set[str]]
1712 :param if_etag_not_match:
1713 (Optional) See :ref:`using-if-etag-not-match`
1714
1715 :type if_generation_match: long
1716 :param if_generation_match:
1717 (Optional) See :ref:`using-if-generation-match`
1718
1719 :type if_generation_not_match: long
1720 :param if_generation_not_match:
1721 (Optional) See :ref:`using-if-generation-not-match`
1722
1723 :type if_metageneration_match: long
1724 :param if_metageneration_match:
1725 (Optional) See :ref:`using-if-metageneration-match`
1726
1727 :type if_metageneration_not_match: long
1728 :param if_metageneration_not_match:
1729 (Optional) See :ref:`using-if-metageneration-not-match`
1730
1731 :type timeout: float or tuple
1732 :param timeout:
1733 (Optional) The amount of time, in seconds, to wait
1734 for the server response. See: :ref:`configuring_timeouts`
1735
1736 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1737 :param retry: (Optional) How to retry the RPC. A None value will disable
1738 retries. A google.api_core.retry.Retry value will enable retries,
1739 and the object will define retriable response codes and errors and
1740 configure backoff and timeout options.
1741
1742 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1743 Retry object and activates it only if certain conditions are met.
1744 This class exists to provide safe defaults for RPC calls that are
1745 not technically safe to retry normally (due to potential data
1746 duplication or other side-effects) but become safe to retry if a
1747 condition such as if_metageneration_match is set.
1748
1749 See the retry.py source code and docstrings in this package
1750 (google.cloud.storage.retry) for information on retry types and how
1751 to configure them.
1752
1753 :type single_shot_download: bool
1754 :param single_shot_download:
1755 (Optional) If true, download the object in a single request.
1756 Caution: Enabling this will increase the memory overload for your application.
1757 Please enable this as per your use case.
1758
1759 :rtype: text
1760 :returns: The data stored in this blob, decoded to text.
1761 """
1762 with create_trace_span(name="Storage.Blob.downloadAsText"):
1763 data = self.download_as_bytes(
1764 client=client,
1765 start=start,
1766 end=end,
1767 raw_download=raw_download,
1768 if_etag_match=if_etag_match,
1769 if_etag_not_match=if_etag_not_match,
1770 if_generation_match=if_generation_match,
1771 if_generation_not_match=if_generation_not_match,
1772 if_metageneration_match=if_metageneration_match,
1773 if_metageneration_not_match=if_metageneration_not_match,
1774 timeout=timeout,
1775 retry=retry,
1776 single_shot_download=single_shot_download,
1777 )
1778
1779 if encoding is not None:
1780 return data.decode(encoding)
1781
1782 if self.content_type is not None:
1783 msg = HeaderParser().parsestr("Content-Type: " + self.content_type)
1784 params = dict(msg.get_params()[1:])
1785 if "charset" in params:
1786 return data.decode(params["charset"])
1787
1788 return data.decode("utf-8")
1789
1790 def _get_content_type(self, content_type, filename=None):
1791 """Determine the content type from the current object.
1792
1793 The return value will be determined in order of precedence:
1794
1795 - The value passed in to this method (if not :data:`None`)
1796 - The value stored on the current blob
1797 - The default value ('application/octet-stream')
1798
1799 :type content_type: str
1800 :param content_type: (Optional) Type of content.
1801
1802 :type filename: str
1803 :param filename:
1804 (Optional) The name of the file where the content is stored.
1805
1806 :rtype: str
1807 :returns: Type of content gathered from the object.
1808 """
1809 if content_type is None:
1810 content_type = self.content_type
1811
1812 if content_type is None and filename is not None:
1813 content_type, _ = mimetypes.guess_type(filename)
1814
1815 if content_type is None:
1816 content_type = _DEFAULT_CONTENT_TYPE
1817
1818 return content_type
1819
1820 def _get_writable_metadata(self):
1821 """Get the object / blob metadata which is writable.
1822
1823 This is intended to be used when creating a new object / blob.
1824
1825 See the [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects)
1826 for more information, the fields marked as writable are:
1827
1828 * ``acl``
1829 * ``cacheControl``
1830 * ``contentDisposition``
1831 * ``contentEncoding``
1832 * ``contentLanguage``
1833 * ``contentType``
1834 * ``crc32c``
1835 * ``customTime``
1836 * ``md5Hash``
1837 * ``metadata``
1838 * ``name``
1839 * ``retention``
1840 * ``storageClass``
1841
1842 For now, we don't support ``acl``, access control lists should be
1843 managed directly through :class:`ObjectACL` methods.
1844 """
1845 # NOTE: This assumes `self.name` is unicode.
1846 object_metadata = {"name": self.name}
1847 for key in self._changes:
1848 if key in _WRITABLE_FIELDS:
1849 object_metadata[key] = self._properties[key]
1850
1851 return object_metadata
1852
1853 def _get_upload_arguments(self, client, content_type, filename=None, command=None):
1854 """Get required arguments for performing an upload.
1855
1856 The content type returned will be determined in order of precedence:
1857
1858 - The value passed in to this method (if not :data:`None`)
1859 - The value stored on the current blob
1860 - The default value ('application/octet-stream')
1861
1862 :type content_type: str
1863 :param content_type: Type of content being uploaded (or :data:`None`).
1864
1865 :type command: str
1866 :param command:
1867 (Optional) Information about which interface for upload was used,
1868 to be included in the X-Goog-API-Client header. Please leave as None
1869 unless otherwise directed.
1870
1871 :rtype: tuple
1872 :returns: A triple of
1873
1874 * A header dictionary
1875 * An object metadata dictionary
1876 * The ``content_type`` as a string (according to precedence)
1877 """
1878 content_type = self._get_content_type(content_type, filename=filename)
1879 # Add any client attached custom headers to the upload headers.
1880 headers = {
1881 **_get_default_headers(
1882 client._connection.user_agent, content_type, command=command
1883 ),
1884 **_get_encryption_headers(self._encryption_key),
1885 **client._extra_headers,
1886 }
1887 object_metadata = self._get_writable_metadata()
1888 return headers, object_metadata, content_type
1889
1890 def _do_multipart_upload(
1891 self,
1892 client,
1893 stream,
1894 content_type,
1895 size,
1896 predefined_acl,
1897 if_generation_match,
1898 if_generation_not_match,
1899 if_metageneration_match,
1900 if_metageneration_not_match,
1901 timeout=_DEFAULT_TIMEOUT,
1902 checksum="auto",
1903 retry=None,
1904 command=None,
1905 ):
1906 """Perform a multipart upload.
1907
1908 The content type of the upload will be determined in order
1909 of precedence:
1910
1911 - The value passed in to this method (if not :data:`None`)
1912 - The value stored on the current blob
1913 - The default value ('application/octet-stream')
1914
1915 :type client: :class:`~google.cloud.storage.client.Client`
1916 :param client:
1917 (Optional) The client to use. If not passed, falls back to the
1918 ``client`` stored on the blob's bucket.
1919
1920 :type stream: IO[bytes]
1921 :param stream: A bytes IO object open for reading.
1922
1923 :type content_type: str
1924 :param content_type: Type of content being uploaded (or :data:`None`).
1925
1926 :type size: int
1927 :param size:
1928 The number of bytes to be uploaded (which will be read from
1929 ``stream``). If not provided, the upload will be concluded once
1930 ``stream`` is exhausted (or :data:`None`).
1931
1932 :type predefined_acl: str
1933 :param predefined_acl: (Optional) Predefined access control list
1934
1935 :type if_generation_match: long
1936 :param if_generation_match:
1937 (Optional) See :ref:`using-if-generation-match`
1938
1939 :type if_generation_not_match: long
1940 :param if_generation_not_match:
1941 (Optional) See :ref:`using-if-generation-not-match`
1942
1943 :type if_metageneration_match: long
1944 :param if_metageneration_match:
1945 (Optional) See :ref:`using-if-metageneration-match`
1946
1947 :type if_metageneration_not_match: long
1948 :param if_metageneration_not_match:
1949 (Optional) See :ref:`using-if-metageneration-not-match`
1950
1951 :type timeout: float or tuple
1952 :param timeout:
1953 (Optional) The amount of time, in seconds, to wait
1954 for the server response. See: :ref:`configuring_timeouts`
1955
1956 :type checksum: str
1957 :param checksum:
1958 (Optional) The type of checksum to compute to verify
1959 the integrity of the object. The request metadata will be amended
1960 to include the computed value. Using this option will override a
1961 manually-set checksum value. Supported values are "md5", "crc32c",
1962 "auto" and None. The default is "auto", which will try to detect if
1963 the C extension for crc32c is installed and fall back to md5
1964 otherwise.
1965 :type retry: google.api_core.retry.Retry
1966 :param retry: (Optional) How to retry the RPC. A None value will disable
1967 retries. A google.api_core.retry.Retry value will enable retries,
1968 and the object will configure backoff and timeout options.
1969
1970 This private method does not accept ConditionalRetryPolicy values
1971 because the information necessary to evaluate the policy is instead
1972 evaluated in blob._do_upload().
1973
1974 See the retry.py source code and docstrings in this package
1975 (google.cloud.storage.retry) for information on retry types and how
1976 to configure them.
1977
1978 :type command: str
1979 :param command:
1980 (Optional) Information about which interface for upload was used,
1981 to be included in the X-Goog-API-Client header. Please leave as None
1982 unless otherwise directed.
1983
1984 :rtype: :class:`~requests.Response`
1985 :returns: The "200 OK" response object returned after the multipart
1986 upload request.
1987 :raises: :exc:`ValueError` if ``size`` is not :data:`None` but the
1988 ``stream`` has fewer than ``size`` bytes remaining.
1989 """
1990 if size is None:
1991 data = stream.read()
1992 else:
1993 data = stream.read(size)
1994 if len(data) < size:
1995 msg = _READ_LESS_THAN_SIZE.format(size, len(data))
1996 raise ValueError(msg)
1997
1998 client = self._require_client(client)
1999 transport = self._get_transport(client)
2000 if "metadata" in self._properties and "metadata" not in self._changes:
2001 self._changes.add("metadata")
2002 info = self._get_upload_arguments(client, content_type, command=command)
2003 headers, object_metadata, content_type = info
2004
2005 hostname = _get_host_name(client._connection)
2006 base_url = _MULTIPART_URL_TEMPLATE.format(
2007 hostname=hostname, bucket_path=self.bucket.path, api_version=_API_VERSION
2008 )
2009 name_value_pairs = []
2010
2011 if self.user_project is not None:
2012 name_value_pairs.append(("userProject", self.user_project))
2013
2014 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
2015 # at rest, object resource metadata will store the version of the Key Management
2016 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
2017 # used to upload a new version of the object then the existing kmsKeyName version
2018 # value can't be used in the upload request and the client instead ignores it.
2019 if (
2020 self.kms_key_name is not None
2021 and "cryptoKeyVersions" not in self.kms_key_name
2022 ):
2023 name_value_pairs.append(("kmsKeyName", self.kms_key_name))
2024
2025 if predefined_acl is not None:
2026 name_value_pairs.append(("predefinedAcl", predefined_acl))
2027
2028 if if_generation_match is not None:
2029 name_value_pairs.append(("ifGenerationMatch", if_generation_match))
2030
2031 if if_generation_not_match is not None:
2032 name_value_pairs.append(("ifGenerationNotMatch", if_generation_not_match))
2033
2034 if if_metageneration_match is not None:
2035 name_value_pairs.append(("ifMetagenerationMatch", if_metageneration_match))
2036
2037 if if_metageneration_not_match is not None:
2038 name_value_pairs.append(
2039 ("ifMetaGenerationNotMatch", if_metageneration_not_match)
2040 )
2041
2042 upload_url = _add_query_parameters(base_url, name_value_pairs)
2043 upload = MultipartUpload(
2044 upload_url, headers=headers, checksum=checksum, retry=retry
2045 )
2046
2047 extra_attributes = {
2048 "url.full": upload_url,
2049 "upload.checksum": f"{checksum}",
2050 }
2051 args = {"timeout": timeout}
2052 with create_trace_span(
2053 name="Storage.MultipartUpload/transmit",
2054 attributes=extra_attributes,
2055 client=client,
2056 api_request=args,
2057 ):
2058 response = upload.transmit(
2059 transport, data, object_metadata, content_type, timeout=timeout
2060 )
2061
2062 return response
2063
2064 def _initiate_resumable_upload(
2065 self,
2066 client,
2067 stream,
2068 content_type,
2069 size,
2070 predefined_acl=None,
2071 extra_headers=None,
2072 chunk_size=None,
2073 if_generation_match=None,
2074 if_generation_not_match=None,
2075 if_metageneration_match=None,
2076 if_metageneration_not_match=None,
2077 timeout=_DEFAULT_TIMEOUT,
2078 checksum="auto",
2079 retry=None,
2080 command=None,
2081 ):
2082 """Initiate a resumable upload.
2083
2084 The content type of the upload will be determined in order
2085 of precedence:
2086
2087 - The value passed in to this method (if not :data:`None`)
2088 - The value stored on the current blob
2089 - The default value ('application/octet-stream')
2090
2091 :type client: :class:`~google.cloud.storage.client.Client`
2092 :param client:
2093 (Optional) The client to use. If not passed, falls back to the
2094 ``client`` stored on the blob's bucket.
2095
2096 :type stream: IO[bytes]
2097 :param stream: A bytes IO object open for reading.
2098
2099 :type content_type: str
2100 :param content_type: Type of content being uploaded (or :data:`None`).
2101
2102 :type size: int
2103 :param size:
2104 The number of bytes to be uploaded (which will be read from
2105 ``stream``). If not provided, the upload will be concluded once
2106 ``stream`` is exhausted (or :data:`None`).
2107
2108 :type predefined_acl: str
2109 :param predefined_acl: (Optional) Predefined access control list
2110
2111 :type extra_headers: dict
2112 :param extra_headers:
2113 (Optional) Extra headers to add to standard headers.
2114
2115 :type chunk_size: int
2116 :param chunk_size:
2117 (Optional) Chunk size to use when creating a
2118 :class:`~google.cloud.storage._media.requests.ResumableUpload`.
2119 If not passed, will fall back to the chunk size on the
2120 current blob, if the chunk size of a current blob is also
2121 `None`, will set the default value.
2122 The default value of ``chunk_size`` is 100 MB.
2123
2124 :type if_generation_match: long
2125 :param if_generation_match:
2126 (Optional) See :ref:`using-if-generation-match`
2127
2128 :type if_generation_not_match: long
2129 :param if_generation_not_match:
2130 (Optional) See :ref:`using-if-generation-not-match`
2131
2132 :type if_metageneration_match: long
2133 :param if_metageneration_match:
2134 (Optional) See :ref:`using-if-metageneration-match`
2135
2136 :type if_metageneration_not_match: long
2137 :param if_metageneration_not_match:
2138 (Optional) See :ref:`using-if-metageneration-not-match`
2139
2140 :type timeout: float or tuple
2141 :param timeout:
2142 (Optional) The amount of time, in seconds, to wait
2143 for the server response. See: :ref:`configuring_timeouts`
2144
2145 :type checksum: str
2146 :param checksum:
2147 (Optional) The type of checksum to compute to verify
2148 the integrity of the object. After the upload is complete, the
2149 server-computed checksum of the resulting object will be checked
2150 and google.cloud.storage.exceptions.DataCorruption will be raised on
2151 a mismatch. On a validation failure, the client will attempt to
2152 delete the uploaded object automatically. Supported values are
2153 "md5", "crc32c", "auto" and None. The default is "auto", which will
2154 try to detect if the C extension for crc32c is installed and fall
2155 back to md5 otherwise.
2156
2157 :type retry: google.api_core.retry.Retry
2158 :param retry: (Optional) How to retry the RPC. A None value will disable
2159 retries. A google.api_core.retry.Retry value will enable retries,
2160 and the object will configure backoff and timeout options.
2161
2162 This private method does not accept ConditionalRetryPolicy values
2163 because the information necessary to evaluate the policy is instead
2164 evaluated in blob._do_upload().
2165
2166 See the retry.py source code and docstrings in this package
2167 (google.cloud.storage.retry) for information on retry types and how
2168 to configure them.
2169
2170 :type command: str
2171 :param command:
2172 (Optional) Information about which interface for upload was used,
2173 to be included in the X-Goog-API-Client header. Please leave as None
2174 unless otherwise directed.
2175
2176 :rtype: tuple
2177 :returns:
2178 Pair of
2179
2180 * The :class:`~google.cloud.storage._media.requests.ResumableUpload`
2181 that was created
2182 * The ``transport`` used to initiate the upload.
2183 """
2184 client = self._require_client(client)
2185 if chunk_size is None:
2186 chunk_size = self.chunk_size
2187 if chunk_size is None:
2188 chunk_size = _DEFAULT_CHUNKSIZE
2189
2190 transport = self._get_transport(client)
2191 if "metadata" in self._properties and "metadata" not in self._changes:
2192 self._changes.add("metadata")
2193 info = self._get_upload_arguments(client, content_type, command=command)
2194 headers, object_metadata, content_type = info
2195 if extra_headers is not None:
2196 headers.update(extra_headers)
2197
2198 hostname = _get_host_name(client._connection)
2199 base_url = _RESUMABLE_URL_TEMPLATE.format(
2200 hostname=hostname, bucket_path=self.bucket.path, api_version=_API_VERSION
2201 )
2202 name_value_pairs = []
2203
2204 if self.user_project is not None:
2205 name_value_pairs.append(("userProject", self.user_project))
2206
2207 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
2208 # at rest, object resource metadata will store the version of the Key Management
2209 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
2210 # used to upload a new version of the object then the existing kmsKeyName version
2211 # value can't be used in the upload request and the client instead ignores it.
2212 if (
2213 self.kms_key_name is not None
2214 and "cryptoKeyVersions" not in self.kms_key_name
2215 ):
2216 name_value_pairs.append(("kmsKeyName", self.kms_key_name))
2217
2218 if predefined_acl is not None:
2219 name_value_pairs.append(("predefinedAcl", predefined_acl))
2220
2221 if if_generation_match is not None:
2222 name_value_pairs.append(("ifGenerationMatch", if_generation_match))
2223
2224 if if_generation_not_match is not None:
2225 name_value_pairs.append(("ifGenerationNotMatch", if_generation_not_match))
2226
2227 if if_metageneration_match is not None:
2228 name_value_pairs.append(("ifMetagenerationMatch", if_metageneration_match))
2229
2230 if if_metageneration_not_match is not None:
2231 name_value_pairs.append(
2232 ("ifMetaGenerationNotMatch", if_metageneration_not_match)
2233 )
2234
2235 upload_url = _add_query_parameters(base_url, name_value_pairs)
2236 upload = ResumableUpload(
2237 upload_url, chunk_size, headers=headers, checksum=checksum, retry=retry
2238 )
2239
2240 upload.initiate(
2241 transport,
2242 stream,
2243 object_metadata,
2244 content_type,
2245 total_bytes=size,
2246 stream_final=False,
2247 timeout=timeout,
2248 )
2249
2250 return upload, transport
2251
2252 def _do_resumable_upload(
2253 self,
2254 client,
2255 stream,
2256 content_type,
2257 size,
2258 predefined_acl,
2259 if_generation_match,
2260 if_generation_not_match,
2261 if_metageneration_match,
2262 if_metageneration_not_match,
2263 timeout=_DEFAULT_TIMEOUT,
2264 checksum="auto",
2265 retry=None,
2266 command=None,
2267 ):
2268 """Perform a resumable upload.
2269
2270 Assumes ``chunk_size`` is not :data:`None` on the current blob.
2271 The default value of ``chunk_size`` is 100 MB.
2272
2273 The content type of the upload will be determined in order
2274 of precedence:
2275
2276 - The value passed in to this method (if not :data:`None`)
2277 - The value stored on the current blob
2278 - The default value ('application/octet-stream')
2279
2280 :type client: :class:`~google.cloud.storage.client.Client`
2281 :param client:
2282 (Optional) The client to use. If not passed, falls back to the
2283 ``client`` stored on the blob's bucket.
2284
2285 :type stream: IO[bytes]
2286 :param stream: A bytes IO object open for reading.
2287
2288 :type content_type: str
2289 :param content_type: Type of content being uploaded (or :data:`None`).
2290
2291 :type size: int
2292 :param size:
2293 The number of bytes to be uploaded (which will be read from
2294 ``stream``). If not provided, the upload will be concluded once
2295 ``stream`` is exhausted (or :data:`None`).
2296
2297 :type predefined_acl: str
2298 :param predefined_acl: (Optional) Predefined access control list
2299
2300 :type if_generation_match: long
2301 :param if_generation_match:
2302 (Optional) See :ref:`using-if-generation-match`
2303
2304 :type if_generation_not_match: long
2305 :param if_generation_not_match:
2306 (Optional) See :ref:`using-if-generation-not-match`
2307
2308 :type if_metageneration_match: long
2309 :param if_metageneration_match:
2310 (Optional) See :ref:`using-if-metageneration-match`
2311
2312 :type if_metageneration_not_match: long
2313 :param if_metageneration_not_match:
2314 (Optional) See :ref:`using-if-metageneration-not-match`
2315
2316 :type timeout: float or tuple
2317 :param timeout:
2318 (Optional) The amount of time, in seconds, to wait
2319 for the server response. See: :ref:`configuring_timeouts`
2320
2321 :type checksum: str
2322 :param checksum:
2323 (Optional) The type of checksum to compute to verify
2324 the integrity of the object. After the upload is complete, the
2325 server-computed checksum of the resulting object will be checked
2326 and google.cloud.storage.exceptions.DataCorruption will be raised on
2327 a mismatch. On a validation failure, the client will attempt to
2328 delete the uploaded object automatically. Supported values are
2329 "md5", "crc32c", "auto" and None. The default is "auto", which will
2330 try to detect if the C extension for crc32c is installed and fall
2331 back to md5 otherwise.
2332
2333 :type retry: google.api_core.retry.Retry
2334 :param retry: (Optional) How to retry the RPC. A None value will disable
2335 retries. A google.api_core.retry.Retry value will enable retries,
2336 and the object will configure backoff and timeout options.
2337
2338 This private method does not accept ConditionalRetryPolicy values
2339 because the information necessary to evaluate the policy is instead
2340 evaluated in blob._do_upload().
2341
2342 See the retry.py source code and docstrings in this package
2343 (google.cloud.storage.retry) for information on retry types and how
2344 to configure them.
2345
2346 :type command: str
2347 :param command:
2348 (Optional) Information about which interface for upload was used,
2349 to be included in the X-Goog-API-Client header. Please leave as None
2350 unless otherwise directed.
2351
2352 :rtype: :class:`~requests.Response`
2353 :returns: The "200 OK" response object returned after the final chunk
2354 is uploaded.
2355 """
2356 upload, transport = self._initiate_resumable_upload(
2357 client,
2358 stream,
2359 content_type,
2360 size,
2361 predefined_acl=predefined_acl,
2362 if_generation_match=if_generation_match,
2363 if_generation_not_match=if_generation_not_match,
2364 if_metageneration_match=if_metageneration_match,
2365 if_metageneration_not_match=if_metageneration_not_match,
2366 timeout=timeout,
2367 checksum=checksum,
2368 retry=retry,
2369 command=command,
2370 )
2371 extra_attributes = {
2372 "url.full": upload.resumable_url,
2373 "upload.chunk_size": upload.chunk_size,
2374 "upload.checksum": f"{checksum}",
2375 }
2376 args = {"timeout": timeout}
2377 with create_trace_span(
2378 name="Storage.ResumableUpload/transmitNextChunk",
2379 attributes=extra_attributes,
2380 client=client,
2381 api_request=args,
2382 ):
2383 while not upload.finished:
2384 try:
2385 response = upload.transmit_next_chunk(transport, timeout=timeout)
2386 except DataCorruption:
2387 # Attempt to delete the corrupted object.
2388 self.delete()
2389 raise
2390 return response
2391
2392 def _do_upload(
2393 self,
2394 client,
2395 stream,
2396 content_type,
2397 size,
2398 predefined_acl,
2399 if_generation_match,
2400 if_generation_not_match,
2401 if_metageneration_match,
2402 if_metageneration_not_match,
2403 timeout=_DEFAULT_TIMEOUT,
2404 checksum="auto",
2405 retry=None,
2406 command=None,
2407 ):
2408 """Determine an upload strategy and then perform the upload.
2409
2410 If the size of the data to be uploaded exceeds 8 MB a resumable media
2411 request will be used, otherwise the content and the metadata will be
2412 uploaded in a single multipart upload request.
2413
2414 The content type of the upload will be determined in order
2415 of precedence:
2416
2417 - The value passed in to this method (if not :data:`None`)
2418 - The value stored on the current blob
2419 - The default value ('application/octet-stream')
2420
2421 :type client: :class:`~google.cloud.storage.client.Client`
2422 :param client:
2423 (Optional) The client to use. If not passed, falls back to the
2424 ``client`` stored on the blob's bucket.
2425
2426 :type stream: IO[bytes]
2427 :param stream: A bytes IO object open for reading.
2428
2429 :type content_type: str
2430 :param content_type: Type of content being uploaded (or :data:`None`).
2431
2432 :type size: int
2433 :param size:
2434 The number of bytes to be uploaded (which will be read from
2435 ``stream``). If not provided, the upload will be concluded once
2436 ``stream`` is exhausted (or :data:`None`).
2437
2438 :type predefined_acl: str
2439 :param predefined_acl: (Optional) Predefined access control list
2440
2441 :type if_generation_match: long
2442 :param if_generation_match:
2443 (Optional) See :ref:`using-if-generation-match`
2444
2445 :type if_generation_not_match: long
2446 :param if_generation_not_match:
2447 (Optional) See :ref:`using-if-generation-not-match`
2448
2449 :type if_metageneration_match: long
2450 :param if_metageneration_match:
2451 (Optional) See :ref:`using-if-metageneration-match`
2452
2453 :type if_metageneration_not_match: long
2454 :param if_metageneration_not_match:
2455 (Optional) See :ref:`using-if-metageneration-not-match`
2456
2457 :type timeout: float or tuple
2458 :param timeout:
2459 (Optional) The amount of time, in seconds, to wait
2460 for the server response. See: :ref:`configuring_timeouts`
2461
2462 :type checksum: str
2463 :param checksum:
2464 (Optional) The type of checksum to compute to verify
2465 the integrity of the object. If the upload is completed in a single
2466 request, the checksum will be entirely precomputed and the remote
2467 server will handle verification and error handling. If the upload
2468 is too large and must be transmitted in multiple requests, the
2469 checksum will be incrementally computed and the client will handle
2470 verification and error handling, raising
2471 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2472 attempting to delete the corrupted file. Supported values are
2473 "md5", "crc32c", "auto" and None. The default is "auto", which will
2474 try to detect if the C extension for crc32c is installed and fall
2475 back to md5 otherwise.
2476
2477 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2478 :param retry: (Optional) How to retry the RPC. A None value will disable
2479 retries. A google.api_core.retry.Retry value will enable retries,
2480 and the object will define retriable response codes and errors and
2481 configure backoff and timeout options.
2482
2483 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2484 Retry object and activates it only if certain conditions are met.
2485 This class exists to provide safe defaults for RPC calls that are
2486 not technically safe to retry normally (due to potential data
2487 duplication or other side-effects) but become safe to retry if a
2488 condition such as if_generation_match is set.
2489
2490 See the retry.py source code and docstrings in this package
2491 (google.cloud.storage.retry) for information on retry types and how
2492 to configure them.
2493
2494 :type command: str
2495 :param command:
2496 (Optional) Information about which interface for upload was used,
2497 to be included in the X-Goog-API-Client header. Please leave as None
2498 unless otherwise directed.
2499
2500 :rtype: dict
2501 :returns: The parsed JSON from the "200 OK" response. This will be the
2502 **only** response in the multipart case and it will be the
2503 **final** response in the resumable case.
2504 """
2505
2506 # Handle ConditionalRetryPolicy.
2507 if isinstance(retry, ConditionalRetryPolicy):
2508 # Conditional retries are designed for non-media calls, which change
2509 # arguments into query_params dictionaries. Media operations work
2510 # differently, so here we make a "fake" query_params to feed to the
2511 # ConditionalRetryPolicy.
2512 query_params = {
2513 "ifGenerationMatch": if_generation_match,
2514 "ifMetagenerationMatch": if_metageneration_match,
2515 }
2516 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
2517
2518 if size is not None and size <= _MAX_MULTIPART_SIZE:
2519 response = self._do_multipart_upload(
2520 client,
2521 stream,
2522 content_type,
2523 size,
2524 predefined_acl,
2525 if_generation_match,
2526 if_generation_not_match,
2527 if_metageneration_match,
2528 if_metageneration_not_match,
2529 timeout=timeout,
2530 checksum=checksum,
2531 retry=retry,
2532 command=command,
2533 )
2534 else:
2535 response = self._do_resumable_upload(
2536 client,
2537 stream,
2538 content_type,
2539 size,
2540 predefined_acl,
2541 if_generation_match,
2542 if_generation_not_match,
2543 if_metageneration_match,
2544 if_metageneration_not_match,
2545 timeout=timeout,
2546 checksum=checksum,
2547 retry=retry,
2548 command=command,
2549 )
2550
2551 return response.json()
2552
2553 def _prep_and_do_upload(
2554 self,
2555 file_obj,
2556 rewind=False,
2557 size=None,
2558 content_type=None,
2559 client=None,
2560 predefined_acl=None,
2561 if_generation_match=None,
2562 if_generation_not_match=None,
2563 if_metageneration_match=None,
2564 if_metageneration_not_match=None,
2565 timeout=_DEFAULT_TIMEOUT,
2566 checksum="auto",
2567 retry=DEFAULT_RETRY,
2568 command=None,
2569 ):
2570 """Upload the contents of this blob from a file-like object.
2571
2572 The content type of the upload will be determined in order
2573 of precedence:
2574
2575 - The value passed in to this method (if not :data:`None`)
2576 - The value stored on the current blob
2577 - The default value ('application/octet-stream')
2578
2579 .. note::
2580 The effect of uploading to an existing blob depends on the
2581 "versioning" and "lifecycle" policies defined on the blob's
2582 bucket. In the absence of those policies, upload will
2583 overwrite any existing contents.
2584
2585 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2586 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2587 API documents for details.
2588
2589 If the size of the data to be uploaded exceeds 8 MB a resumable media
2590 request will be used, otherwise the content and the metadata will be
2591 uploaded in a single multipart upload request.
2592
2593 For more fine-grained over the upload process, check out
2594 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
2595
2596 If :attr:`user_project` is set on the bucket, bills the API request
2597 to that project.
2598
2599 :type file_obj: file
2600 :param file_obj: A file handle opened in binary mode for reading.
2601
2602 :type rewind: bool
2603 :param rewind:
2604 If True, seek to the beginning of the file handle before writing
2605 the file to Cloud Storage.
2606
2607 :type size: int
2608 :param size:
2609 The number of bytes to be uploaded (which will be read from
2610 ``file_obj``). If not provided, the upload will be concluded once
2611 ``file_obj`` is exhausted.
2612
2613 :type content_type: str
2614 :param content_type: (Optional) Type of content being uploaded.
2615
2616 :type client: :class:`~google.cloud.storage.client.Client`
2617 :param client:
2618 (Optional) The client to use. If not passed, falls back to the
2619 ``client`` stored on the blob's bucket.
2620
2621 :type predefined_acl: str
2622 :param predefined_acl: (Optional) Predefined access control list
2623
2624 :type if_generation_match: long
2625 :param if_generation_match:
2626 (Optional) See :ref:`using-if-generation-match`
2627
2628 :type if_generation_not_match: long
2629 :param if_generation_not_match:
2630 (Optional) See :ref:`using-if-generation-not-match`
2631
2632 :type if_metageneration_match: long
2633 :param if_metageneration_match:
2634 (Optional) See :ref:`using-if-metageneration-match`
2635
2636 :type if_metageneration_not_match: long
2637 :param if_metageneration_not_match:
2638 (Optional) See :ref:`using-if-metageneration-not-match`
2639
2640 :type timeout: float or tuple
2641 :param timeout:
2642 (Optional) The amount of time, in seconds, to wait
2643 for the server response. See: :ref:`configuring_timeouts`
2644
2645 :type checksum: str
2646 :param checksum:
2647 (Optional) The type of checksum to compute to verify
2648 the integrity of the object. If the upload is completed in a single
2649 request, the checksum will be entirely precomputed and the remote
2650 server will handle verification and error handling. If the upload
2651 is too large and must be transmitted in multiple requests, the
2652 checksum will be incrementally computed and the client will handle
2653 verification and error handling, raising
2654 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2655 attempting to delete the corrupted file. Supported values are
2656 "md5", "crc32c", "auto" and None. The default is "auto", which will
2657 try to detect if the C extension for crc32c is installed and fall
2658 back to md5 otherwise.
2659
2660 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2661 :param retry: (Optional) How to retry the RPC. A None value will disable
2662 retries. A google.api_core.retry.Retry value will enable retries,
2663 and the object will define retriable response codes and errors and
2664 configure backoff and timeout options.
2665
2666 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2667 Retry object and activates it only if certain conditions are met.
2668 This class exists to provide safe defaults for RPC calls that are
2669 not technically safe to retry normally (due to potential data
2670 duplication or other side-effects) but become safe to retry if a
2671 condition such as if_generation_match is set.
2672
2673 See the retry.py source code and docstrings in this package
2674 (google.cloud.storage.retry) for information on retry types and how
2675 to configure them.
2676
2677 :type command: str
2678 :param command:
2679 (Optional) Information about which interface for upload was used,
2680 to be included in the X-Goog-API-Client header. Please leave as None
2681 unless otherwise directed.
2682
2683 :raises: :class:`~google.cloud.exceptions.GoogleCloudError`
2684 if the upload response returns an error status.
2685 """
2686 _maybe_rewind(file_obj, rewind=rewind)
2687 predefined_acl = ACL.validate_predefined(predefined_acl)
2688
2689 try:
2690 created_json = self._do_upload(
2691 client,
2692 file_obj,
2693 content_type,
2694 size,
2695 predefined_acl,
2696 if_generation_match,
2697 if_generation_not_match,
2698 if_metageneration_match,
2699 if_metageneration_not_match,
2700 timeout=timeout,
2701 checksum=checksum,
2702 retry=retry,
2703 command=command,
2704 )
2705 self._set_properties(created_json)
2706 except InvalidResponse as exc:
2707 _raise_from_invalid_response(exc)
2708
2709 def upload_from_file(
2710 self,
2711 file_obj,
2712 rewind=False,
2713 size=None,
2714 content_type=None,
2715 client=None,
2716 predefined_acl=None,
2717 if_generation_match=None,
2718 if_generation_not_match=None,
2719 if_metageneration_match=None,
2720 if_metageneration_not_match=None,
2721 timeout=_DEFAULT_TIMEOUT,
2722 checksum="auto",
2723 retry=DEFAULT_RETRY,
2724 ):
2725 """Upload the contents of this blob from a file-like object.
2726
2727 The content type of the upload will be determined in order
2728 of precedence:
2729
2730 - The value passed in to this method (if not :data:`None`)
2731 - The value stored on the current blob
2732 - The default value ('application/octet-stream')
2733
2734 .. note::
2735 The effect of uploading to an existing blob depends on the
2736 "versioning" and "lifecycle" policies defined on the blob's
2737 bucket. In the absence of those policies, upload will
2738 overwrite any existing contents.
2739
2740 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2741 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2742 API documents for details.
2743
2744 If the size of the data to be uploaded exceeds 8 MB a resumable media
2745 request will be used, otherwise the content and the metadata will be
2746 uploaded in a single multipart upload request.
2747
2748 For more fine-grained over the upload process, check out
2749 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
2750
2751 If :attr:`user_project` is set on the bucket, bills the API request
2752 to that project.
2753
2754 :type file_obj: file
2755 :param file_obj: A file handle opened in binary mode for reading.
2756
2757 :type rewind: bool
2758 :param rewind:
2759 If True, seek to the beginning of the file handle before writing
2760 the file to Cloud Storage.
2761
2762 :type size: int
2763 :param size:
2764 The number of bytes to be uploaded (which will be read from
2765 ``file_obj``). If not provided, the upload will be concluded once
2766 ``file_obj`` is exhausted.
2767
2768 :type content_type: str
2769 :param content_type: (Optional) Type of content being uploaded.
2770
2771 :type client: :class:`~google.cloud.storage.client.Client`
2772 :param client:
2773 (Optional) The client to use. If not passed, falls back to the
2774 ``client`` stored on the blob's bucket.
2775
2776 :type predefined_acl: str
2777 :param predefined_acl: (Optional) Predefined access control list
2778
2779 :type if_generation_match: long
2780 :param if_generation_match:
2781 (Optional) See :ref:`using-if-generation-match`
2782
2783 :type if_generation_not_match: long
2784 :param if_generation_not_match:
2785 (Optional) See :ref:`using-if-generation-not-match`
2786
2787 :type if_metageneration_match: long
2788 :param if_metageneration_match:
2789 (Optional) See :ref:`using-if-metageneration-match`
2790
2791 :type if_metageneration_not_match: long
2792 :param if_metageneration_not_match:
2793 (Optional) See :ref:`using-if-metageneration-not-match`
2794
2795 :type timeout: float or tuple
2796 :param timeout:
2797 (Optional) The amount of time, in seconds, to wait
2798 for the server response. See: :ref:`configuring_timeouts`
2799
2800 :type checksum: str
2801 :param checksum:
2802 (Optional) The type of checksum to compute to verify
2803 the integrity of the object. If the upload is completed in a single
2804 request, the checksum will be entirely precomputed and the remote
2805 server will handle verification and error handling. If the upload
2806 is too large and must be transmitted in multiple requests, the
2807 checksum will be incrementally computed and the client will handle
2808 verification and error handling, raising
2809 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2810 attempting to delete the corrupted file. Supported values are
2811 "md5", "crc32c", "auto" and None. The default is "auto", which will
2812 try to detect if the C extension for crc32c is installed and fall
2813 back to md5 otherwise.
2814
2815 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2816 :param retry: (Optional) How to retry the RPC. A None value will disable
2817 retries. A google.api_core.retry.Retry value will enable retries,
2818 and the object will define retriable response codes and errors and
2819 configure backoff and timeout options.
2820
2821 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2822 Retry object and activates it only if certain conditions are met.
2823 This class exists to provide safe defaults for RPC calls that are
2824 not technically safe to retry normally (due to potential data
2825 duplication or other side-effects) but become safe to retry if a
2826 condition such as if_generation_match is set.
2827
2828 See the retry.py source code and docstrings in this package
2829 (google.cloud.storage.retry) for information on retry types and how
2830 to configure them.
2831
2832 :raises: :class:`~google.cloud.exceptions.GoogleCloudError`
2833 if the upload response returns an error status.
2834 """
2835 with create_trace_span(name="Storage.Blob.uploadFromFile"):
2836 self._prep_and_do_upload(
2837 file_obj,
2838 rewind=rewind,
2839 size=size,
2840 content_type=content_type,
2841 client=client,
2842 predefined_acl=predefined_acl,
2843 if_generation_match=if_generation_match,
2844 if_generation_not_match=if_generation_not_match,
2845 if_metageneration_match=if_metageneration_match,
2846 if_metageneration_not_match=if_metageneration_not_match,
2847 timeout=timeout,
2848 checksum=checksum,
2849 retry=retry,
2850 )
2851
2852 def _handle_filename_and_upload(self, filename, content_type=None, *args, **kwargs):
2853 """Upload this blob's contents from the content of a named file.
2854
2855 :type filename: str
2856 :param filename: The path to the file.
2857
2858 :type content_type: str
2859 :param content_type: (Optional) Type of content being uploaded.
2860
2861 For *args and **kwargs, refer to the documentation for upload_from_filename() for more information.
2862 """
2863
2864 content_type = self._get_content_type(content_type, filename=filename)
2865
2866 with open(filename, "rb") as file_obj:
2867 total_bytes = os.fstat(file_obj.fileno()).st_size
2868 self._prep_and_do_upload(
2869 file_obj,
2870 content_type=content_type,
2871 size=total_bytes,
2872 *args,
2873 **kwargs,
2874 )
2875
2876 def upload_from_filename(
2877 self,
2878 filename,
2879 content_type=None,
2880 client=None,
2881 predefined_acl=None,
2882 if_generation_match=None,
2883 if_generation_not_match=None,
2884 if_metageneration_match=None,
2885 if_metageneration_not_match=None,
2886 timeout=_DEFAULT_TIMEOUT,
2887 checksum="auto",
2888 retry=DEFAULT_RETRY,
2889 ):
2890 """Upload this blob's contents from the content of a named file.
2891
2892 The content type of the upload will be determined in order
2893 of precedence:
2894
2895 - The value passed in to this method (if not :data:`None`)
2896 - The value stored on the current blob
2897 - The value given by ``mimetypes.guess_type``
2898 - The default value ('application/octet-stream')
2899
2900 .. note::
2901 The effect of uploading to an existing blob depends on the
2902 "versioning" and "lifecycle" policies defined on the blob's
2903 bucket. In the absence of those policies, upload will
2904 overwrite any existing contents.
2905
2906 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2907 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2908 API documents for details.
2909
2910 If :attr:`user_project` is set on the bucket, bills the API request
2911 to that project.
2912
2913 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-upload-encrypted-file#storage_upload_encrypted_file-python)
2914 to upload a file with a
2915 [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied).
2916
2917 :type filename: str
2918 :param filename: The path to the file.
2919
2920 :type content_type: str
2921 :param content_type: (Optional) Type of content being uploaded.
2922
2923 :type client: :class:`~google.cloud.storage.client.Client`
2924 :param client:
2925 (Optional) The client to use. If not passed, falls back to the
2926 ``client`` stored on the blob's bucket.
2927
2928 :type predefined_acl: str
2929 :param predefined_acl: (Optional) Predefined access control list
2930
2931 :type if_generation_match: long
2932 :param if_generation_match:
2933 (Optional) See :ref:`using-if-generation-match`
2934
2935 :type if_generation_not_match: long
2936 :param if_generation_not_match:
2937 (Optional) See :ref:`using-if-generation-not-match`
2938
2939 :type if_metageneration_match: long
2940 :param if_metageneration_match:
2941 (Optional) See :ref:`using-if-metageneration-match`
2942
2943 :type if_metageneration_not_match: long
2944 :param if_metageneration_not_match:
2945 (Optional) See :ref:`using-if-metageneration-not-match`
2946
2947 :type timeout: float or tuple
2948 :param timeout:
2949 (Optional) The amount of time, in seconds, to wait
2950 for the server response. See: :ref:`configuring_timeouts`
2951
2952 :type checksum: str
2953 :param checksum:
2954 (Optional) The type of checksum to compute to verify
2955 the integrity of the object. If the upload is completed in a single
2956 request, the checksum will be entirely precomputed and the remote
2957 server will handle verification and error handling. If the upload
2958 is too large and must be transmitted in multiple requests, the
2959 checksum will be incrementally computed and the client will handle
2960 verification and error handling, raising
2961 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2962 attempting to delete the corrupted file. Supported values are
2963 "md5", "crc32c", "auto" and None. The default is "auto", which will
2964 try to detect if the C extension for crc32c is installed and fall
2965 back to md5 otherwise.
2966
2967 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2968 :param retry: (Optional) How to retry the RPC. A None value will disable
2969 retries. A google.api_core.retry.Retry value will enable retries,
2970 and the object will define retriable response codes and errors and
2971 configure backoff and timeout options.
2972
2973 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2974 Retry object and activates it only if certain conditions are met.
2975 This class exists to provide safe defaults for RPC calls that are
2976 not technically safe to retry normally (due to potential data
2977 duplication or other side-effects) but become safe to retry if a
2978 condition such as if_generation_match is set.
2979
2980 See the retry.py source code and docstrings in this package
2981 (google.cloud.storage.retry) for information on retry types and how
2982 to configure them.
2983 """
2984 with create_trace_span(name="Storage.Blob.uploadFromFilename"):
2985 self._handle_filename_and_upload(
2986 filename,
2987 content_type=content_type,
2988 client=client,
2989 predefined_acl=predefined_acl,
2990 if_generation_match=if_generation_match,
2991 if_generation_not_match=if_generation_not_match,
2992 if_metageneration_match=if_metageneration_match,
2993 if_metageneration_not_match=if_metageneration_not_match,
2994 timeout=timeout,
2995 checksum=checksum,
2996 retry=retry,
2997 )
2998
2999 def upload_from_string(
3000 self,
3001 data,
3002 content_type="text/plain",
3003 client=None,
3004 predefined_acl=None,
3005 if_generation_match=None,
3006 if_generation_not_match=None,
3007 if_metageneration_match=None,
3008 if_metageneration_not_match=None,
3009 timeout=_DEFAULT_TIMEOUT,
3010 checksum="auto",
3011 retry=DEFAULT_RETRY,
3012 ):
3013 """Upload contents of this blob from the provided string.
3014
3015 .. note::
3016 The effect of uploading to an existing blob depends on the
3017 "versioning" and "lifecycle" policies defined on the blob's
3018 bucket. In the absence of those policies, upload will
3019 overwrite any existing contents.
3020
3021 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3022 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3023 API documents for details.
3024
3025 If :attr:`user_project` is set on the bucket, bills the API request
3026 to that project.
3027
3028 :type data: bytes or str
3029 :param data:
3030 The data to store in this blob. If the value is text, it will be
3031 encoded as UTF-8.
3032
3033 :type content_type: str
3034 :param content_type:
3035 (Optional) Type of content being uploaded. Defaults to
3036 ``'text/plain'``.
3037
3038 :type client: :class:`~google.cloud.storage.client.Client`
3039 :param client:
3040 (Optional) The client to use. If not passed, falls back to the
3041 ``client`` stored on the blob's bucket.
3042
3043 :type predefined_acl: str
3044 :param predefined_acl: (Optional) Predefined access control list
3045
3046 :type if_generation_match: long
3047 :param if_generation_match:
3048 (Optional) See :ref:`using-if-generation-match`
3049
3050 :type if_generation_not_match: long
3051 :param if_generation_not_match:
3052 (Optional) See :ref:`using-if-generation-not-match`
3053
3054 :type if_metageneration_match: long
3055 :param if_metageneration_match:
3056 (Optional) See :ref:`using-if-metageneration-match`
3057
3058 :type if_metageneration_not_match: long
3059 :param if_metageneration_not_match:
3060 (Optional) See :ref:`using-if-metageneration-not-match`
3061
3062 :type timeout: float or tuple
3063 :param timeout:
3064 (Optional) The amount of time, in seconds, to wait
3065 for the server response. See: :ref:`configuring_timeouts`
3066
3067 :type checksum: str
3068 :param checksum:
3069 (Optional) The type of checksum to compute to verify
3070 the integrity of the object. If the upload is completed in a single
3071 request, the checksum will be entirely precomputed and the remote
3072 server will handle verification and error handling. If the upload
3073 is too large and must be transmitted in multiple requests, the
3074 checksum will be incrementally computed and the client will handle
3075 verification and error handling, raising
3076 google.cloud.storage.exceptions.DataCorruption on a mismatch and
3077 attempting to delete the corrupted file. Supported values are
3078 "md5", "crc32c", "auto" and None. The default is "auto", which will
3079 try to detect if the C extension for crc32c is installed and fall
3080 back to md5 otherwise.
3081
3082 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3083 :param retry: (Optional) How to retry the RPC. A None value will disable
3084 retries. A google.api_core.retry.Retry value will enable retries,
3085 and the object will define retriable response codes and errors and
3086 configure backoff and timeout options.
3087
3088 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3089 Retry object and activates it only if certain conditions are met.
3090 This class exists to provide safe defaults for RPC calls that are
3091 not technically safe to retry normally (due to potential data
3092 duplication or other side-effects) but become safe to retry if a
3093 condition such as if_generation_match is set.
3094
3095 See the retry.py source code and docstrings in this package
3096 (google.cloud.storage.retry) for information on retry types and how
3097 to configure them.
3098 """
3099 with create_trace_span(name="Storage.Blob.uploadFromString"):
3100 data = _to_bytes(data, encoding="utf-8")
3101 string_buffer = BytesIO(data)
3102 self.upload_from_file(
3103 file_obj=string_buffer,
3104 size=len(data),
3105 content_type=content_type,
3106 client=client,
3107 predefined_acl=predefined_acl,
3108 if_generation_match=if_generation_match,
3109 if_generation_not_match=if_generation_not_match,
3110 if_metageneration_match=if_metageneration_match,
3111 if_metageneration_not_match=if_metageneration_not_match,
3112 timeout=timeout,
3113 checksum=checksum,
3114 retry=retry,
3115 )
3116
3117 def create_resumable_upload_session(
3118 self,
3119 content_type=None,
3120 size=None,
3121 origin=None,
3122 client=None,
3123 timeout=_DEFAULT_TIMEOUT,
3124 checksum="auto",
3125 predefined_acl=None,
3126 if_generation_match=None,
3127 if_generation_not_match=None,
3128 if_metageneration_match=None,
3129 if_metageneration_not_match=None,
3130 retry=DEFAULT_RETRY,
3131 ):
3132 """Create a resumable upload session.
3133
3134 Resumable upload sessions allow you to start an upload session from
3135 one client and complete the session in another. This method is called
3136 by the initiator to set the metadata and limits. The initiator then
3137 passes the session URL to the client that will upload the binary data.
3138 The client performs a PUT request on the session URL to complete the
3139 upload. This process allows untrusted clients to upload to an
3140 access-controlled bucket.
3141
3142 For more details, see the
3143 documentation on [`signed URLs`](https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable).
3144
3145 The content type of the upload will be determined in order
3146 of precedence:
3147
3148 - The value passed in to this method (if not :data:`None`)
3149 - The value stored on the current blob
3150 - The default value ('application/octet-stream')
3151
3152 .. note::
3153 The effect of uploading to an existing blob depends on the
3154 "versioning" and "lifecycle" policies defined on the blob's
3155 bucket. In the absence of those policies, upload will
3156 overwrite any existing contents.
3157
3158 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3159 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3160 API documents for details.
3161
3162 If :attr:`encryption_key` is set, the blob will be encrypted with
3163 a [`customer-supplied`](https://cloud.google.com/storage/docs/encryption#customer-supplied)
3164 encryption key.
3165
3166 If :attr:`user_project` is set on the bucket, bills the API request
3167 to that project.
3168
3169 :type size: int
3170 :param size:
3171 (Optional) The maximum number of bytes that can be uploaded using
3172 this session. If the size is not known when creating the session,
3173 this should be left blank.
3174
3175 :type content_type: str
3176 :param content_type: (Optional) Type of content being uploaded.
3177
3178 :type origin: str
3179 :param origin:
3180 (Optional) If set, the upload can only be completed by a user-agent
3181 that uploads from the given origin. This can be useful when passing
3182 the session to a web client.
3183
3184 :type client: :class:`~google.cloud.storage.client.Client`
3185 :param client:
3186 (Optional) The client to use. If not passed, falls back to the
3187 ``client`` stored on the blob's bucket.
3188
3189 :type timeout: float or tuple
3190 :param timeout:
3191 (Optional) The amount of time, in seconds, to wait
3192 for the server response. See: :ref:`configuring_timeouts`
3193
3194 :type checksum: str
3195 :param checksum:
3196 (Optional) The type of checksum to compute to verify
3197 the integrity of the object. After the upload is complete, the
3198 server-computed checksum of the resulting object will be checked
3199 and google.cloud.storage.exceptions.DataCorruption will be raised on
3200 a mismatch. On a validation failure, the client will attempt to
3201 delete the uploaded object automatically. Supported values are
3202 "md5", "crc32c", "auto" and None. The default is "auto", which will
3203 try to detect if the C extension for crc32c is installed and fall
3204 back to md5 otherwise.
3205
3206 :type predefined_acl: str
3207 :param predefined_acl: (Optional) Predefined access control list
3208
3209 :type if_generation_match: long
3210 :param if_generation_match:
3211 (Optional) See :ref:`using-if-generation-match`
3212
3213 :type if_generation_not_match: long
3214 :param if_generation_not_match:
3215 (Optional) See :ref:`using-if-generation-not-match`
3216
3217 :type if_metageneration_match: long
3218 :param if_metageneration_match:
3219 (Optional) See :ref:`using-if-metageneration-match`
3220
3221 :type if_metageneration_not_match: long
3222 :param if_metageneration_not_match:
3223 (Optional) See :ref:`using-if-metageneration-not-match`
3224
3225 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3226 :param retry: (Optional) How to retry the RPC. A None value will disable
3227 retries. A google.api_core.retry.Retry value will enable retries,
3228 and the object will define retriable response codes and errors and
3229 configure backoff and timeout options.
3230
3231 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3232 Retry object and activates it only if certain conditions are met.
3233 This class exists to provide safe defaults for RPC calls that are
3234 not technically safe to retry normally (due to potential data
3235 duplication or other side-effects) but become safe to retry if a
3236 condition such as if_generation_match is set.
3237
3238 See the retry.py source code and docstrings in this package
3239 (google.cloud.storage.retry) for information on retry types and how
3240 to configure them.
3241
3242 :rtype: str
3243 :returns: The resumable upload session URL. The upload can be
3244 completed by making an HTTP PUT request with the
3245 file's contents.
3246
3247 :raises: :class:`google.cloud.exceptions.GoogleCloudError`
3248 if the session creation response returns an error status.
3249 """
3250 with create_trace_span(name="Storage.Blob.createResumableUploadSession"):
3251 # Handle ConditionalRetryPolicy.
3252 if isinstance(retry, ConditionalRetryPolicy):
3253 # Conditional retries are designed for non-media calls, which change
3254 # arguments into query_params dictionaries. Media operations work
3255 # differently, so here we make a "fake" query_params to feed to the
3256 # ConditionalRetryPolicy.
3257 query_params = {
3258 "ifGenerationMatch": if_generation_match,
3259 "ifMetagenerationMatch": if_metageneration_match,
3260 }
3261 retry = retry.get_retry_policy_if_conditions_met(
3262 query_params=query_params
3263 )
3264
3265 extra_headers = {}
3266 if origin is not None:
3267 # This header is specifically for client-side uploads, it
3268 # determines the origins allowed for CORS.
3269 extra_headers["Origin"] = origin
3270
3271 try:
3272 fake_stream = BytesIO(b"")
3273 # Send a fake the chunk size which we **know** will be acceptable
3274 # to the `ResumableUpload` constructor. The chunk size only
3275 # matters when **sending** bytes to an upload.
3276 upload, _ = self._initiate_resumable_upload(
3277 client,
3278 fake_stream,
3279 content_type,
3280 size,
3281 predefined_acl=predefined_acl,
3282 if_generation_match=if_generation_match,
3283 if_generation_not_match=if_generation_not_match,
3284 if_metageneration_match=if_metageneration_match,
3285 if_metageneration_not_match=if_metageneration_not_match,
3286 extra_headers=extra_headers,
3287 chunk_size=self._CHUNK_SIZE_MULTIPLE,
3288 timeout=timeout,
3289 checksum=checksum,
3290 retry=retry,
3291 )
3292
3293 return upload.resumable_url
3294 except InvalidResponse as exc:
3295 _raise_from_invalid_response(exc)
3296
3297 def get_iam_policy(
3298 self,
3299 client=None,
3300 requested_policy_version=None,
3301 timeout=_DEFAULT_TIMEOUT,
3302 retry=DEFAULT_RETRY,
3303 ):
3304 """Retrieve the IAM policy for the object.
3305
3306 .. note::
3307
3308 Blob- / object-level IAM support does not yet exist and methods
3309 currently call an internal ACL backend not providing any utility
3310 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3311 in the future and is currently undocumented. Use :attr:`acl` for
3312 managing object access control.
3313
3314 If :attr:`user_project` is set on the bucket, bills the API request
3315 to that project.
3316
3317 :type client: :class:`~google.cloud.storage.client.Client`
3318 :param client:
3319 (Optional) The client to use. If not passed, falls back to the
3320 ``client`` stored on the current object's bucket.
3321
3322 :type requested_policy_version: int or ``NoneType``
3323 :param requested_policy_version:
3324 (Optional) The version of IAM policies to request. If a policy
3325 with a condition is requested without setting this, the server will
3326 return an error. This must be set to a value of 3 to retrieve IAM
3327 policies containing conditions. This is to prevent client code that
3328 isn't aware of IAM conditions from interpreting and modifying
3329 policies incorrectly. The service might return a policy with
3330 version lower than the one that was requested, based on the feature
3331 syntax in the policy fetched.
3332
3333 :type timeout: float or tuple
3334 :param timeout:
3335 (Optional) The amount of time, in seconds, to wait
3336 for the server response. See: :ref:`configuring_timeouts`
3337
3338 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3339 :param retry:
3340 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3341
3342 :rtype: :class:`google.api_core.iam.Policy`
3343 :returns: the policy instance, based on the resource returned from
3344 the ``getIamPolicy`` API request.
3345 """
3346 with create_trace_span(name="Storage.Blob.getIamPolicy"):
3347 client = self._require_client(client)
3348
3349 query_params = {}
3350
3351 if self.user_project is not None:
3352 query_params["userProject"] = self.user_project
3353
3354 if requested_policy_version is not None:
3355 query_params["optionsRequestedPolicyVersion"] = requested_policy_version
3356
3357 info = client._get_resource(
3358 f"{self.path}/iam",
3359 query_params=query_params,
3360 timeout=timeout,
3361 retry=retry,
3362 _target_object=None,
3363 )
3364 return Policy.from_api_repr(info)
3365
3366 def set_iam_policy(
3367 self,
3368 policy,
3369 client=None,
3370 timeout=_DEFAULT_TIMEOUT,
3371 retry=DEFAULT_RETRY_IF_ETAG_IN_JSON,
3372 ):
3373 """Update the IAM policy for the bucket.
3374
3375 .. note::
3376
3377 Blob- / object-level IAM support does not yet exist and methods
3378 currently call an internal ACL backend not providing any utility
3379 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3380 in the future and is currently undocumented. Use :attr:`acl` for
3381 managing object access control.
3382
3383 If :attr:`user_project` is set on the bucket, bills the API request
3384 to that project.
3385
3386 :type policy: :class:`google.api_core.iam.Policy`
3387 :param policy: policy instance used to update bucket's IAM policy.
3388
3389 :type client: :class:`~google.cloud.storage.client.Client`
3390 :param client:
3391 (Optional) The client to use. If not passed, falls back to the
3392 ``client`` stored on the current bucket.
3393
3394 :type timeout: float or tuple
3395 :param timeout:
3396 (Optional) The amount of time, in seconds, to wait
3397 for the server response. See: :ref:`configuring_timeouts`
3398
3399 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3400 :param retry:
3401 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3402
3403 :rtype: :class:`google.api_core.iam.Policy`
3404 :returns: the policy instance, based on the resource returned from
3405 the ``setIamPolicy`` API request.
3406 """
3407 with create_trace_span(name="Storage.Blob.setIamPolicy"):
3408 client = self._require_client(client)
3409
3410 query_params = {}
3411
3412 if self.user_project is not None:
3413 query_params["userProject"] = self.user_project
3414
3415 path = f"{self.path}/iam"
3416 resource = policy.to_api_repr()
3417 resource["resourceId"] = self.path
3418 info = client._put_resource(
3419 path,
3420 resource,
3421 query_params=query_params,
3422 timeout=timeout,
3423 retry=retry,
3424 _target_object=None,
3425 )
3426 return Policy.from_api_repr(info)
3427
3428 def test_iam_permissions(
3429 self, permissions, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY
3430 ):
3431 """API call: test permissions
3432
3433 .. note::
3434
3435 Blob- / object-level IAM support does not yet exist and methods
3436 currently call an internal ACL backend not providing any utility
3437 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3438 in the future and is currently undocumented. Use :attr:`acl` for
3439 managing object access control.
3440
3441 If :attr:`user_project` is set on the bucket, bills the API request
3442 to that project.
3443
3444 :type permissions: list of string
3445 :param permissions: the permissions to check
3446
3447 :type client: :class:`~google.cloud.storage.client.Client`
3448 :param client:
3449 (Optional) The client to use. If not passed, falls back to the
3450 ``client`` stored on the current bucket.
3451
3452 :type timeout: float or tuple
3453 :param timeout:
3454 (Optional) The amount of time, in seconds, to wait
3455 for the server response. See: :ref:`configuring_timeouts`
3456
3457 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3458 :param retry:
3459 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3460
3461 :rtype: list of string
3462 :returns: the permissions returned by the ``testIamPermissions`` API
3463 request.
3464 """
3465 with create_trace_span(name="Storage.Blob.testIamPermissions"):
3466 client = self._require_client(client)
3467 query_params = {"permissions": permissions}
3468
3469 if self.user_project is not None:
3470 query_params["userProject"] = self.user_project
3471
3472 path = f"{self.path}/iam/testPermissions"
3473 resp = client._get_resource(
3474 path,
3475 query_params=query_params,
3476 timeout=timeout,
3477 retry=retry,
3478 _target_object=None,
3479 )
3480
3481 return resp.get("permissions", [])
3482
3483 def make_public(
3484 self,
3485 client=None,
3486 timeout=_DEFAULT_TIMEOUT,
3487 if_generation_match=None,
3488 if_generation_not_match=None,
3489 if_metageneration_match=None,
3490 if_metageneration_not_match=None,
3491 retry=DEFAULT_RETRY,
3492 ):
3493 """Update blob's ACL, granting read access to anonymous users.
3494
3495 :type client: :class:`~google.cloud.storage.client.Client` or
3496 ``NoneType``
3497 :param client: (Optional) The client to use. If not passed, falls back
3498 to the ``client`` stored on the blob's bucket.
3499
3500 :type timeout: float or tuple
3501 :param timeout:
3502 (Optional) The amount of time, in seconds, to wait
3503 for the server response. See: :ref:`configuring_timeouts`
3504
3505 :type if_generation_match: long
3506 :param if_generation_match:
3507 (Optional) See :ref:`using-if-generation-match`
3508
3509 :type if_generation_not_match: long
3510 :param if_generation_not_match:
3511 (Optional) See :ref:`using-if-generation-not-match`
3512
3513 :type if_metageneration_match: long
3514 :param if_metageneration_match:
3515 (Optional) See :ref:`using-if-metageneration-match`
3516
3517 :type if_metageneration_not_match: long
3518 :param if_metageneration_not_match:
3519 (Optional) See :ref:`using-if-metageneration-not-match`
3520
3521 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3522 :param retry:
3523 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3524 """
3525 with create_trace_span(name="Storage.Blob.makePublic"):
3526 self.acl.all().grant_read()
3527 self.acl.save(
3528 client=client,
3529 timeout=timeout,
3530 if_generation_match=if_generation_match,
3531 if_generation_not_match=if_generation_not_match,
3532 if_metageneration_match=if_metageneration_match,
3533 if_metageneration_not_match=if_metageneration_not_match,
3534 retry=retry,
3535 )
3536
3537 def make_private(
3538 self,
3539 client=None,
3540 timeout=_DEFAULT_TIMEOUT,
3541 if_generation_match=None,
3542 if_generation_not_match=None,
3543 if_metageneration_match=None,
3544 if_metageneration_not_match=None,
3545 retry=DEFAULT_RETRY,
3546 ):
3547 """Update blob's ACL, revoking read access for anonymous users.
3548
3549 :type client: :class:`~google.cloud.storage.client.Client` or
3550 ``NoneType``
3551 :param client: (Optional) The client to use. If not passed, falls back
3552 to the ``client`` stored on the blob's bucket.
3553
3554 :type timeout: float or tuple
3555 :param timeout:
3556 (Optional) The amount of time, in seconds, to wait
3557 for the server response. See: :ref:`configuring_timeouts`
3558
3559 :type if_generation_match: long
3560 :param if_generation_match:
3561 (Optional) See :ref:`using-if-generation-match`
3562
3563 :type if_generation_not_match: long
3564 :param if_generation_not_match:
3565 (Optional) See :ref:`using-if-generation-not-match`
3566
3567 :type if_metageneration_match: long
3568 :param if_metageneration_match:
3569 (Optional) See :ref:`using-if-metageneration-match`
3570
3571 :type if_metageneration_not_match: long
3572 :param if_metageneration_not_match:
3573 (Optional) See :ref:`using-if-metageneration-not-match`
3574
3575 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3576 :param retry:
3577 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3578 """
3579 with create_trace_span(name="Storage.Blob.makePrivate"):
3580 self.acl.all().revoke_read()
3581 self.acl.save(
3582 client=client,
3583 timeout=timeout,
3584 if_generation_match=if_generation_match,
3585 if_generation_not_match=if_generation_not_match,
3586 if_metageneration_match=if_metageneration_match,
3587 if_metageneration_not_match=if_metageneration_not_match,
3588 retry=retry,
3589 )
3590
3591 def compose(
3592 self,
3593 sources,
3594 client=None,
3595 timeout=_DEFAULT_TIMEOUT,
3596 if_generation_match=None,
3597 if_metageneration_match=None,
3598 if_source_generation_match=None,
3599 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3600 ):
3601 """Concatenate source blobs into this one.
3602
3603 If :attr:`user_project` is set on the bucket, bills the API request
3604 to that project.
3605
3606 See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/objects/compose)
3607 and a [code sample](https://cloud.google.com/storage/docs/samples/storage-compose-file#storage_compose_file-python).
3608
3609 :type sources: list of :class:`Blob`
3610 :param sources: Blobs whose contents will be composed into this blob.
3611
3612 :type client: :class:`~google.cloud.storage.client.Client`
3613 :param client:
3614 (Optional) The client to use. If not passed, falls back to the
3615 ``client`` stored on the blob's bucket.
3616
3617 :type timeout: float or tuple
3618 :param timeout:
3619 (Optional) The amount of time, in seconds, to wait
3620 for the server response. See: :ref:`configuring_timeouts`
3621
3622 :type if_generation_match: long
3623 :param if_generation_match:
3624 (Optional) Makes the operation conditional on whether the
3625 destination object's current generation matches the given value.
3626 Setting to 0 makes the operation succeed only if there are no live
3627 versions of the object.
3628 Note: In a previous version, this argument worked identically to the
3629 ``if_source_generation_match`` argument. For
3630 backwards-compatibility reasons, if a list is passed in,
3631 this argument will behave like ``if_source_generation_match``
3632 and also issue a DeprecationWarning.
3633
3634 :type if_metageneration_match: long
3635 :param if_metageneration_match:
3636 (Optional) Makes the operation conditional on whether the
3637 destination object's current metageneration matches the given
3638 value.
3639
3640 If a list of long is passed in, no match operation will be
3641 performed. (Deprecated: type(list of long) is supported for
3642 backwards-compatability reasons only.)
3643
3644 :type if_source_generation_match: list of long
3645 :param if_source_generation_match:
3646 (Optional) Makes the operation conditional on whether the current
3647 generation of each source blob matches the corresponding generation.
3648 The list must match ``sources`` item-to-item.
3649
3650 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3651 :param retry:
3652 (Optional) How to retry the RPC.
3653 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
3654 policy which will only enable retries if ``if_generation_match`` or ``generation``
3655 is set, in order to ensure requests are idempotent before retrying them.
3656 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
3657 to enable retries regardless of generation precondition setting.
3658 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
3659 """
3660 with create_trace_span(name="Storage.Blob.compose"):
3661 sources_len = len(sources)
3662 client = self._require_client(client)
3663 query_params = {}
3664
3665 if isinstance(if_generation_match, list):
3666 warnings.warn(
3667 _COMPOSE_IF_GENERATION_LIST_DEPRECATED,
3668 DeprecationWarning,
3669 stacklevel=2,
3670 )
3671
3672 if if_source_generation_match is not None:
3673 raise ValueError(
3674 _COMPOSE_IF_GENERATION_LIST_AND_IF_SOURCE_GENERATION_ERROR
3675 )
3676
3677 if_source_generation_match = if_generation_match
3678 if_generation_match = None
3679
3680 if isinstance(if_metageneration_match, list):
3681 warnings.warn(
3682 _COMPOSE_IF_METAGENERATION_LIST_DEPRECATED,
3683 DeprecationWarning,
3684 stacklevel=2,
3685 )
3686
3687 if_metageneration_match = None
3688
3689 if if_source_generation_match is None:
3690 if_source_generation_match = [None] * sources_len
3691 if len(if_source_generation_match) != sources_len:
3692 raise ValueError(_COMPOSE_IF_SOURCE_GENERATION_MISMATCH_ERROR)
3693
3694 source_objects = []
3695 for source, source_generation in zip(sources, if_source_generation_match):
3696 source_object = {"name": source.name, "generation": source.generation}
3697
3698 preconditions = {}
3699 if source_generation is not None:
3700 preconditions["ifGenerationMatch"] = source_generation
3701
3702 if preconditions:
3703 source_object["objectPreconditions"] = preconditions
3704
3705 source_objects.append(source_object)
3706
3707 request = {
3708 "sourceObjects": source_objects,
3709 "destination": self._properties.copy(),
3710 }
3711
3712 if self.user_project is not None:
3713 query_params["userProject"] = self.user_project
3714
3715 _add_generation_match_parameters(
3716 query_params,
3717 if_generation_match=if_generation_match,
3718 if_metageneration_match=if_metageneration_match,
3719 )
3720
3721 api_response = client._post_resource(
3722 f"{self.path}/compose",
3723 request,
3724 query_params=query_params,
3725 timeout=timeout,
3726 retry=retry,
3727 _target_object=self,
3728 )
3729 self._set_properties(api_response)
3730
3731 def rewrite(
3732 self,
3733 source,
3734 token=None,
3735 client=None,
3736 if_generation_match=None,
3737 if_generation_not_match=None,
3738 if_metageneration_match=None,
3739 if_metageneration_not_match=None,
3740 if_source_generation_match=None,
3741 if_source_generation_not_match=None,
3742 if_source_metageneration_match=None,
3743 if_source_metageneration_not_match=None,
3744 timeout=_DEFAULT_TIMEOUT,
3745 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3746 ):
3747 """Rewrite source blob into this one.
3748
3749 If :attr:`user_project` is set on the bucket, bills the API request
3750 to that project.
3751
3752 .. note::
3753
3754 ``rewrite`` is not supported in a ``Batch`` context.
3755
3756 :type source: :class:`Blob`
3757 :param source: blob whose contents will be rewritten into this blob.
3758
3759 :type token: str
3760 :param token:
3761 (Optional) Token returned from an earlier, not-completed call to
3762 rewrite the same source blob. If passed, result will include
3763 updated status, total bytes written.
3764
3765 :type client: :class:`~google.cloud.storage.client.Client`
3766 :param client:
3767 (Optional) The client to use. If not passed, falls back to the
3768 ``client`` stored on the blob's bucket.
3769
3770 :type if_generation_match: long
3771 :param if_generation_match:
3772 (Optional) See :ref:`using-if-generation-match`
3773 Note that the generation to be matched is that of the
3774 ``destination`` blob.
3775
3776 :type if_generation_not_match: long
3777 :param if_generation_not_match:
3778 (Optional) See :ref:`using-if-generation-not-match`
3779 Note that the generation to be matched is that of the
3780 ``destination`` blob.
3781
3782 :type if_metageneration_match: long
3783 :param if_metageneration_match:
3784 (Optional) See :ref:`using-if-metageneration-match`
3785 Note that the metageneration to be matched is that of the
3786 ``destination`` blob.
3787
3788 :type if_metageneration_not_match: long
3789 :param if_metageneration_not_match:
3790 (Optional) See :ref:`using-if-metageneration-not-match`
3791 Note that the metageneration to be matched is that of the
3792 ``destination`` blob.
3793
3794 :type if_source_generation_match: long
3795 :param if_source_generation_match:
3796 (Optional) Makes the operation conditional on whether the source
3797 object's generation matches the given value.
3798
3799 :type if_source_generation_not_match: long
3800 :param if_source_generation_not_match:
3801 (Optional) Makes the operation conditional on whether the source
3802 object's generation does not match the given value.
3803
3804 :type if_source_metageneration_match: long
3805 :param if_source_metageneration_match:
3806 (Optional) Makes the operation conditional on whether the source
3807 object's current metageneration matches the given value.
3808
3809 :type if_source_metageneration_not_match: long
3810 :param if_source_metageneration_not_match:
3811 (Optional) Makes the operation conditional on whether the source
3812 object's current metageneration does not match the given value.
3813
3814 :type timeout: float or tuple
3815 :param timeout:
3816 (Optional) The amount of time, in seconds, to wait
3817 for the server response. See: :ref:`configuring_timeouts`
3818
3819 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3820 :param retry:
3821 (Optional) How to retry the RPC.
3822 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
3823 policy which will only enable retries if ``if_generation_match`` or ``generation``
3824 is set, in order to ensure requests are idempotent before retrying them.
3825 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
3826 to enable retries regardless of generation precondition setting.
3827 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
3828
3829 :rtype: tuple
3830 :returns: ``(token, bytes_rewritten, total_bytes)``, where ``token``
3831 is a rewrite token (``None`` if the rewrite is complete),
3832 ``bytes_rewritten`` is the number of bytes rewritten so far,
3833 and ``total_bytes`` is the total number of bytes to be
3834 rewritten.
3835 """
3836 with create_trace_span(name="Storage.Blob.rewrite"):
3837 client = self._require_client(client)
3838 headers = _get_encryption_headers(self._encryption_key)
3839 headers.update(_get_encryption_headers(source._encryption_key, source=True))
3840
3841 query_params = self._query_params
3842 if "generation" in query_params:
3843 del query_params["generation"]
3844
3845 if token:
3846 query_params["rewriteToken"] = token
3847
3848 if source.generation:
3849 query_params["sourceGeneration"] = source.generation
3850
3851 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
3852 # at rest, object resource metadata will store the version of the Key Management
3853 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
3854 # used to rewrite the object, then the existing kmsKeyName version
3855 # value can't be used in the rewrite request and the client instead ignores it.
3856 if (
3857 self.kms_key_name is not None
3858 and "cryptoKeyVersions" not in self.kms_key_name
3859 ):
3860 query_params["destinationKmsKeyName"] = self.kms_key_name
3861
3862 _add_generation_match_parameters(
3863 query_params,
3864 if_generation_match=if_generation_match,
3865 if_generation_not_match=if_generation_not_match,
3866 if_metageneration_match=if_metageneration_match,
3867 if_metageneration_not_match=if_metageneration_not_match,
3868 if_source_generation_match=if_source_generation_match,
3869 if_source_generation_not_match=if_source_generation_not_match,
3870 if_source_metageneration_match=if_source_metageneration_match,
3871 if_source_metageneration_not_match=if_source_metageneration_not_match,
3872 )
3873
3874 path = f"{source.path}/rewriteTo{self.path}"
3875 api_response = client._post_resource(
3876 path,
3877 self._properties,
3878 query_params=query_params,
3879 headers=headers,
3880 timeout=timeout,
3881 retry=retry,
3882 _target_object=self,
3883 )
3884 rewritten = int(api_response["totalBytesRewritten"])
3885 size = int(api_response["objectSize"])
3886
3887 # The resource key is set if and only if the API response is
3888 # completely done. Additionally, there is no rewrite token to return
3889 # in this case.
3890 if api_response["done"]:
3891 self._set_properties(api_response["resource"])
3892 return None, rewritten, size
3893
3894 return api_response["rewriteToken"], rewritten, size
3895
3896 def update_storage_class(
3897 self,
3898 new_class,
3899 client=None,
3900 if_generation_match=None,
3901 if_generation_not_match=None,
3902 if_metageneration_match=None,
3903 if_metageneration_not_match=None,
3904 if_source_generation_match=None,
3905 if_source_generation_not_match=None,
3906 if_source_metageneration_match=None,
3907 if_source_metageneration_not_match=None,
3908 timeout=_DEFAULT_TIMEOUT,
3909 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3910 ):
3911 """Update blob's storage class via a rewrite-in-place. This helper will
3912 wait for the rewrite to complete before returning, so it may take some
3913 time for large files.
3914
3915 See
3916 https://cloud.google.com/storage/docs/per-object-storage-class
3917
3918 If :attr:`user_project` is set on the bucket, bills the API request
3919 to that project.
3920
3921 :type new_class: str
3922 :param new_class:
3923 new storage class for the object. One of:
3924 :attr:`~google.cloud.storage.constants.NEARLINE_STORAGE_CLASS`,
3925 :attr:`~google.cloud.storage.constants.COLDLINE_STORAGE_CLASS`,
3926 :attr:`~google.cloud.storage.constants.ARCHIVE_STORAGE_CLASS`,
3927 :attr:`~google.cloud.storage.constants.STANDARD_STORAGE_CLASS`,
3928 :attr:`~google.cloud.storage.constants.MULTI_REGIONAL_LEGACY_STORAGE_CLASS`,
3929 or
3930 :attr:`~google.cloud.storage.constants.REGIONAL_LEGACY_STORAGE_CLASS`.
3931
3932 :type client: :class:`~google.cloud.storage.client.Client`
3933 :param client:
3934 (Optional) The client to use. If not passed, falls back to the
3935 ``client`` stored on the blob's bucket.
3936
3937 :type if_generation_match: long
3938 :param if_generation_match:
3939 (Optional) See :ref:`using-if-generation-match`
3940 Note that the generation to be matched is that of the
3941 ``destination`` blob.
3942
3943 :type if_generation_not_match: long
3944 :param if_generation_not_match:
3945 (Optional) See :ref:`using-if-generation-not-match`
3946 Note that the generation to be matched is that of the
3947 ``destination`` blob.
3948
3949 :type if_metageneration_match: long
3950 :param if_metageneration_match:
3951 (Optional) See :ref:`using-if-metageneration-match`
3952 Note that the metageneration to be matched is that of the
3953 ``destination`` blob.
3954
3955 :type if_metageneration_not_match: long
3956 :param if_metageneration_not_match:
3957 (Optional) See :ref:`using-if-metageneration-not-match`
3958 Note that the metageneration to be matched is that of the
3959 ``destination`` blob.
3960
3961 :type if_source_generation_match: long
3962 :param if_source_generation_match:
3963 (Optional) Makes the operation conditional on whether the source
3964 object's generation matches the given value.
3965
3966 :type if_source_generation_not_match: long
3967 :param if_source_generation_not_match:
3968 (Optional) Makes the operation conditional on whether the source
3969 object's generation does not match the given value.
3970
3971 :type if_source_metageneration_match: long
3972 :param if_source_metageneration_match:
3973 (Optional) Makes the operation conditional on whether the source
3974 object's current metageneration matches the given value.
3975
3976 :type if_source_metageneration_not_match: long
3977 :param if_source_metageneration_not_match:
3978 (Optional) Makes the operation conditional on whether the source
3979 object's current metageneration does not match the given value.
3980
3981 :type timeout: float or tuple
3982 :param timeout:
3983 (Optional) The amount of time, in seconds, to wait
3984 for the server response. See: :ref:`configuring_timeouts`
3985
3986 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3987 :param retry:
3988 (Optional) How to retry the RPC.
3989 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
3990 policy which will only enable retries if ``if_generation_match`` or ``generation``
3991 is set, in order to ensure requests are idempotent before retrying them.
3992 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
3993 to enable retries regardless of generation precondition setting.
3994 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
3995 """
3996 with create_trace_span(name="Storage.Blob.updateStorageClass"):
3997 # Update current blob's storage class prior to rewrite
3998 self._patch_property("storageClass", new_class)
3999
4000 # Execute consecutive rewrite operations until operation is done
4001 token, _, _ = self.rewrite(
4002 self,
4003 if_generation_match=if_generation_match,
4004 if_generation_not_match=if_generation_not_match,
4005 if_metageneration_match=if_metageneration_match,
4006 if_metageneration_not_match=if_metageneration_not_match,
4007 if_source_generation_match=if_source_generation_match,
4008 if_source_generation_not_match=if_source_generation_not_match,
4009 if_source_metageneration_match=if_source_metageneration_match,
4010 if_source_metageneration_not_match=if_source_metageneration_not_match,
4011 timeout=timeout,
4012 retry=retry,
4013 )
4014 while token is not None:
4015 token, _, _ = self.rewrite(
4016 self,
4017 token=token,
4018 if_generation_match=if_generation_match,
4019 if_generation_not_match=if_generation_not_match,
4020 if_metageneration_match=if_metageneration_match,
4021 if_metageneration_not_match=if_metageneration_not_match,
4022 if_source_generation_match=if_source_generation_match,
4023 if_source_generation_not_match=if_source_generation_not_match,
4024 if_source_metageneration_match=if_source_metageneration_match,
4025 if_source_metageneration_not_match=if_source_metageneration_not_match,
4026 timeout=timeout,
4027 retry=retry,
4028 )
4029
4030 def open(
4031 self,
4032 mode="r",
4033 chunk_size=None,
4034 ignore_flush=None,
4035 encoding=None,
4036 errors=None,
4037 newline=None,
4038 **kwargs,
4039 ):
4040 r"""Create a file handler for file-like I/O to or from this blob.
4041
4042 This method can be used as a context manager, just like Python's
4043 built-in 'open()' function.
4044
4045 While reading, as with other read methods, if blob.generation is not set
4046 the most recent blob generation will be used. Because the file-like IO
4047 reader downloads progressively in chunks, this could result in data from
4048 multiple versions being mixed together. If this is a concern, use
4049 either bucket.get_blob(), or blob.reload(), which will download the
4050 latest generation number and set it; or, if the generation is known, set
4051 it manually, for instance with bucket.blob(generation=123456).
4052
4053 Checksumming (hashing) to verify data integrity is disabled for reads
4054 using this feature because reads are implemented using request ranges,
4055 which do not provide checksums to validate. See
4056 https://cloud.google.com/storage/docs/hashes-etags for details.
4057
4058 See a [code sample](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_fileio_write_read.py).
4059
4060 Keyword arguments to pass to the underlying API calls.
4061 For both uploads and downloads, the following arguments are
4062 supported:
4063
4064 - ``if_generation_match``
4065 - ``if_generation_not_match``
4066 - ``if_metageneration_match``
4067 - ``if_metageneration_not_match``
4068 - ``timeout``
4069 - ``retry``
4070
4071 For downloads only, the following additional arguments are supported:
4072
4073 - ``raw_download``
4074 - ``single_shot_download``
4075
4076 For uploads only, the following additional arguments are supported:
4077
4078 - ``content_type``
4079 - ``predefined_acl``
4080 - ``checksum``
4081
4082 :type mode: str
4083 :param mode:
4084 (Optional) A mode string, as per standard Python `open()` semantics.The first
4085 character must be 'r', to open the blob for reading, or 'w' to open
4086 it for writing. The second character, if present, must be 't' for
4087 (unicode) text mode, or 'b' for bytes mode. If the second character
4088 is omitted, text mode is the default.
4089
4090 :type chunk_size: long
4091 :param chunk_size:
4092 (Optional) For reads, the minimum number of bytes to read at a time.
4093 If fewer bytes than the chunk_size are requested, the remainder is
4094 buffered. For writes, the maximum number of bytes to buffer before
4095 sending data to the server, and the size of each request when data
4096 is sent. Writes are implemented as a "resumable upload", so
4097 chunk_size for writes must be exactly a multiple of 256KiB as with
4098 other resumable uploads. The default is 40 MiB.
4099
4100 :type ignore_flush: bool
4101 :param ignore_flush:
4102 (Optional) For non text-mode writes, makes flush() do nothing
4103 instead of raising an error. flush() without closing is not
4104 supported by the remote service and therefore calling it normally
4105 results in io.UnsupportedOperation. However, that behavior is
4106 incompatible with some consumers and wrappers of file objects in
4107 Python, such as zipfile.ZipFile or io.TextIOWrapper. Setting
4108 ignore_flush will cause flush() to successfully do nothing, for
4109 compatibility with those contexts. The correct way to actually flush
4110 data to the remote server is to close() (using a context manager,
4111 such as in the example, will cause this to happen automatically).
4112
4113 :type encoding: str
4114 :param encoding:
4115 (Optional) For text mode only, the name of the encoding that the stream will
4116 be decoded or encoded with. If omitted, it defaults to
4117 locale.getpreferredencoding(False).
4118
4119 :type errors: str
4120 :param errors:
4121 (Optional) For text mode only, an optional string that specifies how encoding
4122 and decoding errors are to be handled. Pass 'strict' to raise a
4123 ValueError exception if there is an encoding error (the default of
4124 None has the same effect), or pass 'ignore' to ignore errors. (Note
4125 that ignoring encoding errors can lead to data loss.) Other more
4126 rarely-used options are also available; see the Python 'io' module
4127 documentation for 'io.TextIOWrapper' for a complete list.
4128
4129 :type newline: str
4130 :param newline:
4131 (Optional) For text mode only, controls how line endings are handled. It can
4132 be None, '', '\n', '\r', and '\r\n'. If None, reads use "universal
4133 newline mode" and writes use the system default. See the Python
4134 'io' module documentation for 'io.TextIOWrapper' for details.
4135
4136 :returns: A 'BlobReader' or 'BlobWriter' from
4137 'google.cloud.storage.fileio', or an 'io.TextIOWrapper' around one
4138 of those classes, depending on the 'mode' argument.
4139 """
4140 with create_trace_span(name="Storage.Blob.open"):
4141 if mode == "rb":
4142 if encoding or errors or newline:
4143 raise ValueError(
4144 "encoding, errors and newline arguments are for text mode only"
4145 )
4146 if ignore_flush:
4147 raise ValueError(
4148 "ignore_flush argument is for non-text write mode only"
4149 )
4150 return BlobReader(self, chunk_size=chunk_size, **kwargs)
4151 elif mode == "wb":
4152 if encoding or errors or newline:
4153 raise ValueError(
4154 "encoding, errors and newline arguments are for text mode only"
4155 )
4156 return BlobWriter(
4157 self, chunk_size=chunk_size, ignore_flush=ignore_flush, **kwargs
4158 )
4159 elif mode in ("r", "rt"):
4160 if ignore_flush:
4161 raise ValueError(
4162 "ignore_flush argument is for non-text write mode only"
4163 )
4164 return TextIOWrapper(
4165 BlobReader(self, chunk_size=chunk_size, **kwargs),
4166 encoding=encoding,
4167 errors=errors,
4168 newline=newline,
4169 )
4170 elif mode in ("w", "wt"):
4171 if ignore_flush is False:
4172 raise ValueError(
4173 "ignore_flush is required for text mode writing and "
4174 "cannot be set to False"
4175 )
4176 return TextIOWrapper(
4177 BlobWriter(
4178 self, chunk_size=chunk_size, ignore_flush=True, **kwargs
4179 ),
4180 encoding=encoding,
4181 errors=errors,
4182 newline=newline,
4183 )
4184 else:
4185 raise NotImplementedError(
4186 "Supported modes strings are 'r', 'rb', 'rt', 'w', 'wb', and 'wt' only."
4187 )
4188
4189 cache_control = _scalar_property("cacheControl")
4190 """HTTP 'Cache-Control' header for this object.
4191
4192 See [`RFC 7234`](https://tools.ietf.org/html/rfc7234#section-5.2)
4193 and [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4194
4195 :rtype: str or ``NoneType``
4196
4197 """
4198
4199 content_disposition = _scalar_property("contentDisposition")
4200 """HTTP 'Content-Disposition' header for this object.
4201
4202 See [`RFC 6266`](https://tools.ietf.org/html/rfc7234#section-5.2) and
4203 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4204
4205 :rtype: str or ``NoneType``
4206 """
4207
4208 content_encoding = _scalar_property("contentEncoding")
4209 """HTTP 'Content-Encoding' header for this object.
4210
4211 See [`RFC 7231`](https://tools.ietf.org/html/rfc7231#section-3.1.2.2) and
4212 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4213
4214 :rtype: str or ``NoneType``
4215 """
4216
4217 content_language = _scalar_property("contentLanguage")
4218 """HTTP 'Content-Language' header for this object.
4219
4220 See [`BCP47`](https://tools.ietf.org/html/bcp47) and
4221 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4222
4223 :rtype: str or ``NoneType``
4224 """
4225
4226 content_type = _scalar_property(_CONTENT_TYPE_FIELD)
4227 """HTTP 'Content-Type' header for this object.
4228
4229 See [`RFC 2616`](https://tools.ietf.org/html/rfc2616#section-14.17) and
4230 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4231
4232 :rtype: str or ``NoneType``
4233 """
4234
4235 crc32c = _scalar_property("crc32c")
4236 """CRC32C checksum for this object.
4237
4238 This returns the blob's CRC32C checksum. To retrieve the value, first use a
4239 reload method of the Blob class which loads the blob's properties from the server.
4240
4241 See [`RFC 4960`](https://tools.ietf.org/html/rfc4960#appendix-B) and
4242 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4243
4244 If not set before upload, the server will compute the hash.
4245
4246 :rtype: str or ``NoneType``
4247 """
4248
4249 def _prep_and_do_download(
4250 self,
4251 file_obj,
4252 client=None,
4253 start=None,
4254 end=None,
4255 raw_download=False,
4256 if_etag_match=None,
4257 if_etag_not_match=None,
4258 if_generation_match=None,
4259 if_generation_not_match=None,
4260 if_metageneration_match=None,
4261 if_metageneration_not_match=None,
4262 timeout=_DEFAULT_TIMEOUT,
4263 checksum="auto",
4264 retry=DEFAULT_RETRY,
4265 single_shot_download=False,
4266 command=None,
4267 ):
4268 """Download the contents of a blob object into a file-like object.
4269
4270 See https://cloud.google.com/storage/docs/downloading-objects
4271
4272 If :attr:`user_project` is set on the bucket, bills the API request
4273 to that project.
4274
4275 :type file_obj: file
4276 :param file_obj: A file handle to which to write the blob's data.
4277
4278 :type client: :class:`~google.cloud.storage.client.Client`
4279 :param client:
4280 (Optional) The client to use. If not passed, falls back to the
4281 ``client`` stored on the blob's bucket.
4282
4283 :type start: int
4284 :param start: (Optional) The first byte in a range to be downloaded.
4285
4286 :type end: int
4287 :param end: (Optional) The last byte in a range to be downloaded.
4288
4289 :type raw_download: bool
4290 :param raw_download:
4291 (Optional) If true, download the object without any expansion.
4292
4293 :type if_etag_match: Union[str, Set[str]]
4294 :param if_etag_match:
4295 (Optional) See :ref:`using-if-etag-match`
4296
4297 :type if_etag_not_match: Union[str, Set[str]]
4298 :param if_etag_not_match:
4299 (Optional) See :ref:`using-if-etag-not-match`
4300
4301 :type if_generation_match: long
4302 :param if_generation_match:
4303 (Optional) See :ref:`using-if-generation-match`
4304
4305 :type if_generation_not_match: long
4306 :param if_generation_not_match:
4307 (Optional) See :ref:`using-if-generation-not-match`
4308
4309 :type if_metageneration_match: long
4310 :param if_metageneration_match:
4311 (Optional) See :ref:`using-if-metageneration-match`
4312
4313 :type if_metageneration_not_match: long
4314 :param if_metageneration_not_match:
4315 (Optional) See :ref:`using-if-metageneration-not-match`
4316
4317 :type timeout: float or tuple
4318 :param timeout:
4319 (Optional) The amount of time, in seconds, to wait
4320 for the server response. See: :ref:`configuring_timeouts`
4321
4322 :type checksum: str
4323 :param checksum:
4324 (Optional) The type of checksum to compute to verify the integrity
4325 of the object. The response headers must contain a checksum of the
4326 requested type. If the headers lack an appropriate checksum (for
4327 instance in the case of transcoded or ranged downloads where the
4328 remote service does not know the correct checksum, including
4329 downloads where chunk_size is set) an INFO-level log will be
4330 emitted. Supported values are "md5", "crc32c", "auto" and None. The
4331 default is "auto", which will try to detect if the C extension for
4332 crc32c is installed and fall back to md5 otherwise.
4333
4334 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4335 :param retry: (Optional) How to retry the RPC. A None value will disable
4336 retries. A google.api_core.retry.Retry value will enable retries,
4337 and the object will define retriable response codes and errors and
4338 configure backoff and timeout options.
4339
4340 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
4341 Retry object and activates it only if certain conditions are met.
4342 This class exists to provide safe defaults for RPC calls that are
4343 not technically safe to retry normally (due to potential data
4344 duplication or other side-effects) but become safe to retry if a
4345 condition such as if_metageneration_match is set.
4346
4347 See the retry.py source code and docstrings in this package
4348 (google.cloud.storage.retry) for information on retry types and how
4349 to configure them.
4350
4351 :type single_shot_download: bool
4352 :param single_shot_download:
4353 (Optional) If true, download the object in a single request.
4354 Caution: Enabling this will increase the memory overload for your application.
4355 Please enable this as per your use case.
4356
4357 :type command: str
4358 :param command:
4359 (Optional) Information about which interface for download was used,
4360 to be included in the X-Goog-API-Client header. Please leave as None
4361 unless otherwise directed.
4362 """
4363 # Handle ConditionalRetryPolicy.
4364 if isinstance(retry, ConditionalRetryPolicy):
4365 # Conditional retries are designed for non-media calls, which change
4366 # arguments into query_params dictionaries. Media operations work
4367 # differently, so here we make a "fake" query_params to feed to the
4368 # ConditionalRetryPolicy.
4369 query_params = {
4370 "ifGenerationMatch": if_generation_match,
4371 "ifMetagenerationMatch": if_metageneration_match,
4372 }
4373 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
4374
4375 client = self._require_client(client)
4376
4377 download_url = self._get_download_url(
4378 client,
4379 if_generation_match=if_generation_match,
4380 if_generation_not_match=if_generation_not_match,
4381 if_metageneration_match=if_metageneration_match,
4382 if_metageneration_not_match=if_metageneration_not_match,
4383 )
4384 headers = _get_encryption_headers(self._encryption_key)
4385 headers["accept-encoding"] = "gzip"
4386 _add_etag_match_headers(
4387 headers,
4388 if_etag_match=if_etag_match,
4389 if_etag_not_match=if_etag_not_match,
4390 )
4391 # Add any client attached custom headers to be sent with the request.
4392 headers = {
4393 **_get_default_headers(client._connection.user_agent, command=command),
4394 **headers,
4395 **client._extra_headers,
4396 }
4397
4398 transport = client._http
4399
4400 try:
4401 self._do_download(
4402 transport,
4403 file_obj,
4404 download_url,
4405 headers,
4406 start,
4407 end,
4408 raw_download,
4409 timeout=timeout,
4410 checksum=checksum,
4411 retry=retry,
4412 single_shot_download=single_shot_download,
4413 )
4414 except InvalidResponse as exc:
4415 _raise_from_invalid_response(exc)
4416
4417 @property
4418 def component_count(self):
4419 """Number of underlying components that make up this object.
4420
4421 See https://cloud.google.com/storage/docs/json_api/v1/objects
4422
4423 :rtype: int or ``NoneType``
4424 :returns: The component count (in case of a composed object) or
4425 ``None`` if the blob's resource has not been loaded from
4426 the server. This property will not be set on objects
4427 not created via ``compose``.
4428 """
4429 component_count = self._properties.get("componentCount")
4430 if component_count is not None:
4431 return int(component_count)
4432
4433 @property
4434 def etag(self):
4435 """Retrieve the ETag for the object.
4436
4437 See [`RFC 2616 (etags)`](https://tools.ietf.org/html/rfc2616#section-3.11) and
4438 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4439
4440 :rtype: str or ``NoneType``
4441 :returns: The blob etag or ``None`` if the blob's resource has not
4442 been loaded from the server.
4443 """
4444 return self._properties.get("etag")
4445
4446 event_based_hold = _scalar_property("eventBasedHold")
4447 """Is an event-based hold active on the object?
4448
4449 See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4450
4451 If the property is not set locally, returns :data:`None`.
4452
4453 :rtype: bool or ``NoneType``
4454 """
4455
4456 @property
4457 def generation(self):
4458 """Retrieve the generation for the object.
4459
4460 See https://cloud.google.com/storage/docs/json_api/v1/objects
4461
4462 :rtype: int or ``NoneType``
4463 :returns: The generation of the blob or ``None`` if the blob's
4464 resource has not been loaded from the server.
4465 """
4466 generation = self._properties.get("generation")
4467 if generation is not None:
4468 return int(generation)
4469
4470 @property
4471 def id(self):
4472 """Retrieve the ID for the object.
4473
4474 See https://cloud.google.com/storage/docs/json_api/v1/objects
4475
4476 The ID consists of the bucket name, object name, and generation number.
4477
4478 :rtype: str or ``NoneType``
4479 :returns: The ID of the blob or ``None`` if the blob's
4480 resource has not been loaded from the server.
4481 """
4482 return self._properties.get("id")
4483
4484 md5_hash = _scalar_property("md5Hash")
4485 """MD5 hash for this object.
4486
4487 This returns the blob's MD5 hash. To retrieve the value, first use a
4488 reload method of the Blob class which loads the blob's properties from the server.
4489
4490 See [`RFC 1321`](https://tools.ietf.org/html/rfc1321) and
4491 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4492
4493 If not set before upload, the server will compute the hash.
4494
4495 :rtype: str or ``NoneType``
4496 """
4497
4498 @property
4499 def media_link(self):
4500 """Retrieve the media download URI for the object.
4501
4502 See https://cloud.google.com/storage/docs/json_api/v1/objects
4503
4504 :rtype: str or ``NoneType``
4505 :returns: The media link for the blob or ``None`` if the blob's
4506 resource has not been loaded from the server.
4507 """
4508 return self._properties.get("mediaLink")
4509
4510 @property
4511 def metadata(self):
4512 """Retrieve arbitrary/application specific metadata for the object.
4513
4514 See https://cloud.google.com/storage/docs/json_api/v1/objects
4515
4516 :setter: Update arbitrary/application specific metadata for the
4517 object.
4518 :getter: Retrieve arbitrary/application specific metadata for
4519 the object.
4520
4521 :rtype: dict or ``NoneType``
4522 :returns: The metadata associated with the blob or ``None`` if the
4523 property is not set.
4524 """
4525 return copy.deepcopy(self._properties.get("metadata"))
4526
4527 @metadata.setter
4528 def metadata(self, value):
4529 """Update arbitrary/application specific metadata for the object.
4530
4531 Values are stored to GCS as strings. To delete a key, set its value to
4532 None and call blob.patch().
4533
4534 See https://cloud.google.com/storage/docs/json_api/v1/objects
4535
4536 :type value: dict
4537 :param value: The blob metadata to set.
4538 """
4539 if value is not None:
4540 value = {k: str(v) if v is not None else None for k, v in value.items()}
4541 self._patch_property("metadata", value)
4542
4543 @property
4544 def metageneration(self):
4545 """Retrieve the metageneration for the object.
4546
4547 See https://cloud.google.com/storage/docs/json_api/v1/objects
4548
4549 :rtype: int or ``NoneType``
4550 :returns: The metageneration of the blob or ``None`` if the blob's
4551 resource has not been loaded from the server.
4552 """
4553 metageneration = self._properties.get("metageneration")
4554 if metageneration is not None:
4555 return int(metageneration)
4556
4557 @property
4558 def owner(self):
4559 """Retrieve info about the owner of the object.
4560
4561 See https://cloud.google.com/storage/docs/json_api/v1/objects
4562
4563 :rtype: dict or ``NoneType``
4564 :returns: Mapping of owner's role/ID, or ``None`` if the blob's
4565 resource has not been loaded from the server.
4566 """
4567 return copy.deepcopy(self._properties.get("owner"))
4568
4569 @property
4570 def retention_expiration_time(self):
4571 """Retrieve timestamp at which the object's retention period expires.
4572
4573 See https://cloud.google.com/storage/docs/json_api/v1/objects
4574
4575 :rtype: :class:`datetime.datetime` or ``NoneType``
4576 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4577 ``None`` if the property is not set locally.
4578 """
4579 value = self._properties.get("retentionExpirationTime")
4580 if value is not None:
4581 return _rfc3339_nanos_to_datetime(value)
4582
4583 @property
4584 def self_link(self):
4585 """Retrieve the URI for the object.
4586
4587 See https://cloud.google.com/storage/docs/json_api/v1/objects
4588
4589 :rtype: str or ``NoneType``
4590 :returns: The self link for the blob or ``None`` if the blob's
4591 resource has not been loaded from the server.
4592 """
4593 return self._properties.get("selfLink")
4594
4595 @property
4596 def size(self):
4597 """Size of the object, in bytes.
4598
4599 See https://cloud.google.com/storage/docs/json_api/v1/objects
4600
4601 :rtype: int or ``NoneType``
4602 :returns: The size of the blob or ``None`` if the blob's
4603 resource has not been loaded from the server.
4604 """
4605 size = self._properties.get("size")
4606 if size is not None:
4607 return int(size)
4608
4609 @property
4610 def kms_key_name(self):
4611 """Resource name of Cloud KMS key used to encrypt the blob's contents.
4612
4613 :rtype: str or ``NoneType``
4614 :returns:
4615 The resource name or ``None`` if no Cloud KMS key was used,
4616 or the blob's resource has not been loaded from the server.
4617 """
4618 return self._properties.get("kmsKeyName")
4619
4620 @kms_key_name.setter
4621 def kms_key_name(self, value):
4622 """Set KMS encryption key for object.
4623
4624 :type value: str or ``NoneType``
4625 :param value: new KMS key name (None to clear any existing key).
4626 """
4627 self._patch_property("kmsKeyName", value)
4628
4629 storage_class = _scalar_property("storageClass")
4630 """Retrieve the storage class for the object.
4631
4632 This can only be set at blob / object **creation** time. If you'd
4633 like to change the storage class **after** the blob / object already
4634 exists in a bucket, call :meth:`update_storage_class` (which uses
4635 :meth:`rewrite`).
4636
4637 See https://cloud.google.com/storage/docs/storage-classes
4638
4639 :rtype: str or ``NoneType``
4640 :returns:
4641 If set, one of
4642 :attr:`~google.cloud.storage.constants.STANDARD_STORAGE_CLASS`,
4643 :attr:`~google.cloud.storage.constants.NEARLINE_STORAGE_CLASS`,
4644 :attr:`~google.cloud.storage.constants.COLDLINE_STORAGE_CLASS`,
4645 :attr:`~google.cloud.storage.constants.ARCHIVE_STORAGE_CLASS`,
4646 :attr:`~google.cloud.storage.constants.MULTI_REGIONAL_LEGACY_STORAGE_CLASS`,
4647 :attr:`~google.cloud.storage.constants.REGIONAL_LEGACY_STORAGE_CLASS`,
4648 :attr:`~google.cloud.storage.constants.DURABLE_REDUCED_AVAILABILITY_STORAGE_CLASS`,
4649 else ``None``.
4650 """
4651
4652 temporary_hold = _scalar_property("temporaryHold")
4653 """Is a temporary hold active on the object?
4654
4655 See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4656
4657 If the property is not set locally, returns :data:`None`.
4658
4659 :rtype: bool or ``NoneType``
4660 """
4661
4662 @property
4663 def time_deleted(self):
4664 """Retrieve the timestamp at which the object was deleted.
4665
4666 See https://cloud.google.com/storage/docs/json_api/v1/objects
4667
4668 :rtype: :class:`datetime.datetime` or ``NoneType``
4669 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4670 ``None`` if the blob's resource has not been loaded from
4671 the server (see :meth:`reload`). If the blob has
4672 not been deleted, this will never be set.
4673 """
4674 value = self._properties.get("timeDeleted")
4675 if value is not None:
4676 return _rfc3339_nanos_to_datetime(value)
4677
4678 @property
4679 def time_created(self):
4680 """Retrieve the timestamp at which the object was created.
4681
4682 See https://cloud.google.com/storage/docs/json_api/v1/objects
4683
4684 :rtype: :class:`datetime.datetime` or ``NoneType``
4685 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4686 ``None`` if the blob's resource has not been loaded from
4687 the server (see :meth:`reload`).
4688 """
4689 value = self._properties.get("timeCreated")
4690 if value is not None:
4691 return _rfc3339_nanos_to_datetime(value)
4692
4693 @property
4694 def updated(self):
4695 """Retrieve the timestamp at which the object was updated.
4696
4697 See https://cloud.google.com/storage/docs/json_api/v1/objects
4698
4699 :rtype: :class:`datetime.datetime` or ``NoneType``
4700 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4701 ``None`` if the blob's resource has not been loaded from
4702 the server (see :meth:`reload`).
4703 """
4704 value = self._properties.get("updated")
4705 if value is not None:
4706 return _rfc3339_nanos_to_datetime(value)
4707
4708 @property
4709 def custom_time(self):
4710 """Retrieve the custom time for the object.
4711
4712 See https://cloud.google.com/storage/docs/json_api/v1/objects
4713
4714 :rtype: :class:`datetime.datetime` or ``NoneType``
4715 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4716 ``None`` if the blob's resource has not been loaded from
4717 the server (see :meth:`reload`).
4718 """
4719 value = self._properties.get("customTime")
4720 if value is not None:
4721 return _rfc3339_nanos_to_datetime(value)
4722
4723 @custom_time.setter
4724 def custom_time(self, value):
4725 """Set the custom time for the object.
4726
4727 Once set on the server side object, this value can't be unset, but may
4728 only changed to a custom datetime in the future.
4729
4730 If :attr:`custom_time` must be unset, either perform a rewrite
4731 operation or upload the data again.
4732
4733 See https://cloud.google.com/storage/docs/json_api/v1/objects
4734
4735 :type value: :class:`datetime.datetime`
4736 :param value: new value
4737 """
4738 if value is not None:
4739 value = _datetime_to_rfc3339(value)
4740
4741 self._patch_property("customTime", value)
4742
4743 @property
4744 def retention(self):
4745 """Retrieve the retention configuration for this object.
4746
4747 :rtype: :class:`Retention`
4748 :returns: an instance for managing the object's retention configuration.
4749 """
4750 info = self._properties.get("retention", {})
4751 return Retention.from_api_repr(info, self)
4752
4753 @property
4754 def soft_delete_time(self):
4755 """If this object has been soft-deleted, returns the time at which it became soft-deleted.
4756
4757 :rtype: :class:`datetime.datetime` or ``NoneType``
4758 :returns:
4759 (readonly) The time that the object became soft-deleted.
4760 Note this property is only set for soft-deleted objects.
4761 """
4762 soft_delete_time = self._properties.get("softDeleteTime")
4763 if soft_delete_time is not None:
4764 return _rfc3339_nanos_to_datetime(soft_delete_time)
4765
4766 @property
4767 def hard_delete_time(self):
4768 """If this object has been soft-deleted, returns the time at which it will be permanently deleted.
4769
4770 :rtype: :class:`datetime.datetime` or ``NoneType``
4771 :returns:
4772 (readonly) The time that the object will be permanently deleted.
4773 Note this property is only set for soft-deleted objects.
4774 """
4775 hard_delete_time = self._properties.get("hardDeleteTime")
4776 if hard_delete_time is not None:
4777 return _rfc3339_nanos_to_datetime(hard_delete_time)
4778
4779
4780def _get_host_name(connection):
4781 """Returns the host name from the given connection.
4782
4783 :type connection: :class:`~google.cloud.storage._http.Connection`
4784 :param connection: The connection object.
4785
4786 :rtype: str
4787 :returns: The host name.
4788 """
4789 # TODO: After google-cloud-core 1.6.0 is stable and we upgrade it
4790 # to 1.6.0 in setup.py, we no longer need to check the attribute
4791 # existence. We can simply return connection.get_api_base_url_for_mtls().
4792 return (
4793 connection.API_BASE_URL
4794 if not hasattr(connection, "get_api_base_url_for_mtls")
4795 else connection.get_api_base_url_for_mtls()
4796 )
4797
4798
4799def _get_encryption_headers(key, source=False):
4800 """Builds customer encryption key headers
4801
4802 :type key: bytes
4803 :param key: 32 byte key to build request key and hash.
4804
4805 :type source: bool
4806 :param source: If true, return headers for the "source" blob; otherwise,
4807 return headers for the "destination" blob.
4808
4809 :rtype: dict
4810 :returns: dict of HTTP headers being sent in request.
4811 """
4812 if key is None:
4813 return {}
4814
4815 key = _to_bytes(key)
4816 key_hash = hashlib.sha256(key).digest()
4817 key_hash = base64.b64encode(key_hash)
4818 key = base64.b64encode(key)
4819
4820 if source:
4821 prefix = "X-Goog-Copy-Source-Encryption-"
4822 else:
4823 prefix = "X-Goog-Encryption-"
4824
4825 return {
4826 prefix + "Algorithm": "AES256",
4827 prefix + "Key": _bytes_to_unicode(key),
4828 prefix + "Key-Sha256": _bytes_to_unicode(key_hash),
4829 }
4830
4831
4832def _quote(value, safe=b"~"):
4833 """URL-quote a string.
4834
4835 If the value is unicode, this method first UTF-8 encodes it as bytes and
4836 then quotes the bytes. (In Python 3, ``urllib.parse.quote`` does this
4837 encoding automatically, but in Python 2, non-ASCII characters cannot be
4838 quoted.)
4839
4840 :type value: str or bytes
4841 :param value: The value to be URL-quoted.
4842
4843 :type safe: bytes
4844 :param safe: Bytes *not* to be quoted. By default, includes only ``b'~'``.
4845
4846 :rtype: str
4847 :returns: The encoded value (bytes in Python 2, unicode in Python 3).
4848 """
4849 value = _to_bytes(value, encoding="utf-8")
4850 return quote(value, safe=safe)
4851
4852
4853def _maybe_rewind(stream, rewind=False):
4854 """Rewind the stream if desired.
4855
4856 :type stream: IO[bytes]
4857 :param stream: A bytes IO object open for reading.
4858
4859 :type rewind: bool
4860 :param rewind: Indicates if we should seek to the beginning of the stream.
4861 """
4862 if rewind:
4863 stream.seek(0, os.SEEK_SET)
4864
4865
4866def _raise_from_invalid_response(error):
4867 """Re-wrap and raise an ``InvalidResponse`` exception.
4868
4869 :type error: :exc:`google.cloud.storage.exceptions.InvalidResponse`
4870 :param error: A caught exception from the ``google-resumable-media``
4871 library.
4872
4873 :raises: :class:`~google.cloud.exceptions.GoogleCloudError` corresponding
4874 to the failed status code
4875 """
4876 response = error.response
4877
4878 # The 'response.text' gives the actual reason of error, where 'error' gives
4879 # the message of expected status code.
4880 if response.text:
4881 error_message = response.text + ": " + str(error)
4882 else:
4883 error_message = str(error)
4884
4885 message = f"{response.request.method} {response.request.url}: {error_message}"
4886
4887 raise exceptions.from_http_status(response.status_code, message, response=response)
4888
4889
4890def _add_query_parameters(base_url, name_value_pairs):
4891 """Add one query parameter to a base URL.
4892
4893 :type base_url: string
4894 :param base_url: Base URL (may already contain query parameters)
4895
4896 :type name_value_pairs: list of (string, string) tuples.
4897 :param name_value_pairs: Names and values of the query parameters to add
4898
4899 :rtype: string
4900 :returns: URL with additional query strings appended.
4901 """
4902 if len(name_value_pairs) == 0:
4903 return base_url
4904
4905 scheme, netloc, path, query, frag = urlsplit(base_url)
4906 query = parse_qsl(query)
4907 query.extend(name_value_pairs)
4908 return urlunsplit((scheme, netloc, path, urlencode(query), frag))
4909
4910
4911class Retention(dict):
4912 """Map an object's retention configuration.
4913
4914 :type blob: :class:`Blob`
4915 :params blob: blob for which this retention configuration applies to.
4916
4917 :type mode: str or ``NoneType``
4918 :params mode:
4919 (Optional) The mode of the retention configuration, which can be either Unlocked or Locked.
4920 See: https://cloud.google.com/storage/docs/object-lock
4921
4922 :type retain_until_time: :class:`datetime.datetime` or ``NoneType``
4923 :params retain_until_time:
4924 (Optional) The earliest time that the object can be deleted or replaced, which is the
4925 retention configuration set for this object.
4926
4927 :type retention_expiration_time: :class:`datetime.datetime` or ``NoneType``
4928 :params retention_expiration_time:
4929 (Optional) The earliest time that the object can be deleted, which depends on any
4930 retention configuration set for the object and any retention policy set for the bucket
4931 that contains the object. This value should normally only be set by the back-end API.
4932 """
4933
4934 def __init__(
4935 self,
4936 blob,
4937 mode=None,
4938 retain_until_time=None,
4939 retention_expiration_time=None,
4940 ):
4941 data = {"mode": mode}
4942 if retain_until_time is not None:
4943 retain_until_time = _datetime_to_rfc3339(retain_until_time)
4944 data["retainUntilTime"] = retain_until_time
4945
4946 if retention_expiration_time is not None:
4947 retention_expiration_time = _datetime_to_rfc3339(retention_expiration_time)
4948 data["retentionExpirationTime"] = retention_expiration_time
4949
4950 super(Retention, self).__init__(data)
4951 self._blob = blob
4952
4953 @classmethod
4954 def from_api_repr(cls, resource, blob):
4955 """Factory: construct instance from resource.
4956
4957 :type blob: :class:`Blob`
4958 :params blob: Blob for which this retention configuration applies to.
4959
4960 :type resource: dict
4961 :param resource: mapping as returned from API call.
4962
4963 :rtype: :class:`Retention`
4964 :returns: Retention configuration created from resource.
4965 """
4966 instance = cls(blob)
4967 instance.update(resource)
4968 return instance
4969
4970 @property
4971 def blob(self):
4972 """Blob for which this retention configuration applies to.
4973
4974 :rtype: :class:`Blob`
4975 :returns: the instance's blob.
4976 """
4977 return self._blob
4978
4979 @property
4980 def mode(self):
4981 """The mode of the retention configuration. Options are 'Unlocked' or 'Locked'.
4982
4983 :rtype: string
4984 :returns: The mode of the retention configuration, which can be either set to 'Unlocked' or 'Locked'.
4985 """
4986 return self.get("mode")
4987
4988 @mode.setter
4989 def mode(self, value):
4990 self["mode"] = value
4991 self.blob._patch_property("retention", self)
4992
4993 @property
4994 def retain_until_time(self):
4995 """The earliest time that the object can be deleted or replaced, which is the
4996 retention configuration set for this object.
4997
4998 :rtype: :class:`datetime.datetime` or ``NoneType``
4999 :returns: Datetime object parsed from RFC3339 valid timestamp, or
5000 ``None`` if the blob's resource has not been loaded from
5001 the server (see :meth:`reload`).
5002 """
5003 value = self.get("retainUntilTime")
5004 if value is not None:
5005 return _rfc3339_nanos_to_datetime(value)
5006
5007 @retain_until_time.setter
5008 def retain_until_time(self, value):
5009 """Set the retain_until_time for the object retention configuration.
5010
5011 :type value: :class:`datetime.datetime`
5012 :param value: The earliest time that the object can be deleted or replaced.
5013 """
5014 if value is not None:
5015 value = _datetime_to_rfc3339(value)
5016 self["retainUntilTime"] = value
5017 self.blob._patch_property("retention", self)
5018
5019 @property
5020 def retention_expiration_time(self):
5021 """The earliest time that the object can be deleted, which depends on any
5022 retention configuration set for the object and any retention policy set for
5023 the bucket that contains the object.
5024
5025 :rtype: :class:`datetime.datetime` or ``NoneType``
5026 :returns:
5027 (readonly) The earliest time that the object can be deleted.
5028 """
5029 retention_expiration_time = self.get("retentionExpirationTime")
5030 if retention_expiration_time is not None:
5031 return _rfc3339_nanos_to_datetime(retention_expiration_time)