1# Copyright 2014 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15# pylint: disable=too-many-lines
16
17"""Create / interact with Google Cloud Storage blobs."""
18
19import base64
20import copy
21import hashlib
22from io import BytesIO
23from io import TextIOWrapper
24import logging
25import mimetypes
26import os
27import re
28from email.parser import HeaderParser
29from urllib.parse import parse_qsl
30from urllib.parse import quote
31from urllib.parse import urlencode
32from urllib.parse import urlsplit
33from urllib.parse import urlunsplit
34import warnings
35
36from google.cloud.storage._media.requests import ChunkedDownload
37from google.cloud.storage._media.requests import Download
38from google.cloud.storage._media.requests import RawDownload
39from google.cloud.storage._media.requests import RawChunkedDownload
40from google.cloud.storage._media.requests import MultipartUpload
41from google.cloud.storage._media.requests import ResumableUpload
42
43from google.api_core.iam import Policy
44from google.cloud import exceptions
45from google.cloud._helpers import _bytes_to_unicode
46from google.cloud._helpers import _datetime_to_rfc3339
47from google.cloud._helpers import _rfc3339_nanos_to_datetime
48from google.cloud._helpers import _to_bytes
49from google.cloud.exceptions import NotFound
50from google.cloud.storage._helpers import _add_etag_match_headers
51from google.cloud.storage._helpers import _add_generation_match_parameters
52from google.cloud.storage._helpers import _PropertyMixin
53from google.cloud.storage._helpers import _scalar_property
54from google.cloud.storage._helpers import _bucket_bound_hostname_url
55from google.cloud.storage._helpers import _raise_if_more_than_one_set
56from google.cloud.storage._helpers import _get_default_headers
57from google.cloud.storage._helpers import _get_default_storage_base_url
58from google.cloud.storage._signing import generate_signed_url_v2
59from google.cloud.storage._signing import generate_signed_url_v4
60from google.cloud.storage._helpers import _API_VERSION
61from google.cloud.storage._helpers import _virtual_hosted_style_base_url
62from google.cloud.storage._opentelemetry_tracing import create_trace_span
63from google.cloud.storage.acl import ACL
64from google.cloud.storage.acl import ObjectACL
65from google.cloud.storage.constants import _DEFAULT_TIMEOUT
66from google.cloud.storage.constants import ARCHIVE_STORAGE_CLASS
67from google.cloud.storage.constants import COLDLINE_STORAGE_CLASS
68from google.cloud.storage.constants import MULTI_REGIONAL_LEGACY_STORAGE_CLASS
69from google.cloud.storage.constants import NEARLINE_STORAGE_CLASS
70from google.cloud.storage.constants import REGIONAL_LEGACY_STORAGE_CLASS
71from google.cloud.storage.constants import STANDARD_STORAGE_CLASS
72from google.cloud.storage.exceptions import DataCorruption
73from google.cloud.storage.exceptions import InvalidResponse
74from google.cloud.storage.retry import ConditionalRetryPolicy
75from google.cloud.storage.retry import DEFAULT_RETRY
76from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON
77from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED
78from google.cloud.storage.fileio import BlobReader
79from google.cloud.storage.fileio import BlobWriter
80
81
82_DEFAULT_CONTENT_TYPE = "application/octet-stream"
83_DOWNLOAD_URL_TEMPLATE = "{hostname}/download/storage/{api_version}{path}?alt=media"
84_BASE_UPLOAD_TEMPLATE = (
85 "{hostname}/upload/storage/{api_version}{bucket_path}/o?uploadType="
86)
87_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart"
88_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable"
89# NOTE: "acl" is also writeable but we defer ACL management to
90# the classes in the google.cloud.storage.acl module.
91_CONTENT_TYPE_FIELD = "contentType"
92_WRITABLE_FIELDS = (
93 "cacheControl",
94 "contentDisposition",
95 "contentEncoding",
96 "contentLanguage",
97 _CONTENT_TYPE_FIELD,
98 "crc32c",
99 "customTime",
100 "md5Hash",
101 "metadata",
102 "name",
103 "retention",
104 "storageClass",
105)
106_READ_LESS_THAN_SIZE = (
107 "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining."
108)
109_CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE = (
110 "A checksum of type `{}` was requested, but checksumming is not available "
111 "for downloads when chunk_size is set."
112)
113_COMPOSE_IF_GENERATION_LIST_DEPRECATED = (
114 "'if_generation_match: type list' is deprecated and supported for "
115 "backwards-compatability reasons only. Use 'if_source_generation_match' "
116 "instead' to match source objects' generations."
117)
118_COMPOSE_IF_GENERATION_LIST_AND_IF_SOURCE_GENERATION_ERROR = (
119 "Use 'if_generation_match' to match the generation of the destination "
120 "object by passing in a generation number, instead of a list. "
121 "Use 'if_source_generation_match' to match source objects generations."
122)
123_COMPOSE_IF_METAGENERATION_LIST_DEPRECATED = (
124 "'if_metageneration_match: type list' is deprecated and supported for "
125 "backwards-compatability reasons only. Note that the metageneration to "
126 "be matched is that of the destination blob. Please pass in a single "
127 "value (type long)."
128)
129_COMPOSE_IF_SOURCE_GENERATION_MISMATCH_ERROR = (
130 "'if_source_generation_match' length must be the same as 'sources' length"
131)
132_DOWNLOAD_AS_STRING_DEPRECATED = (
133 "Blob.download_as_string() is deprecated and will be removed in future. "
134 "Use Blob.download_as_bytes() instead."
135)
136_FROM_STRING_DEPRECATED = (
137 "Blob.from_string() is deprecated and will be removed in future. "
138 "Use Blob.from_uri() instead."
139)
140_GS_URL_REGEX_PATTERN = re.compile(
141 r"(?P<scheme>gs)://(?P<bucket_name>[a-z0-9_.-]+)/(?P<object_name>.+)"
142)
143
144_DEFAULT_CHUNKSIZE = 104857600 # 1024 * 1024 B * 100 = 100 MiB
145_MAX_MULTIPART_SIZE = 8388608 # 8 MiB
146
147_logger = logging.getLogger(__name__)
148
149
150class Blob(_PropertyMixin):
151 """A wrapper around Cloud Storage's concept of an ``Object``.
152
153 :type name: str
154 :param name: The name of the blob. This corresponds to the unique path of
155 the object in the bucket. If bytes, will be converted to a
156 unicode object. Blob / object names can contain any sequence
157 of valid unicode characters, of length 1-1024 bytes when
158 UTF-8 encoded.
159
160 :type bucket: :class:`google.cloud.storage.bucket.Bucket`
161 :param bucket: The bucket to which this blob belongs.
162
163 :type chunk_size: int
164 :param chunk_size:
165 (Optional) The size of a chunk of data whenever iterating (in bytes).
166 This must be a multiple of 256 KB per the API specification. If not
167 specified, the chunk_size of the blob itself is used. If that is not
168 specified, a default value of 40 MB is used.
169
170 :type encryption_key: bytes
171 :param encryption_key:
172 (Optional) 32 byte encryption key for customer-supplied encryption.
173 See https://cloud.google.com/storage/docs/encryption#customer-supplied.
174
175 :type kms_key_name: str
176 :param kms_key_name:
177 (Optional) Resource name of Cloud KMS key used to encrypt the blob's
178 contents.
179
180 :type generation: long
181 :param generation:
182 (Optional) If present, selects a specific revision of this object.
183
184 :type crc32c_checksum: str
185 :param crc32c_checksum:
186 (Optional) If set, the CRC32C checksum of the blob's content.
187 CRC32c checksum, as described in RFC 4960, Appendix B; encoded using
188 base64 in big-endian byte order. See
189 Apenndix B: https://datatracker.ietf.org/doc/html/rfc4960#appendix-B
190 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
191 """
192
193 _chunk_size = None # Default value for each instance.
194 _CHUNK_SIZE_MULTIPLE = 256 * 1024
195 """Number (256 KB, in bytes) that must divide the chunk size."""
196
197 STORAGE_CLASSES = (
198 STANDARD_STORAGE_CLASS,
199 NEARLINE_STORAGE_CLASS,
200 COLDLINE_STORAGE_CLASS,
201 ARCHIVE_STORAGE_CLASS,
202 MULTI_REGIONAL_LEGACY_STORAGE_CLASS,
203 REGIONAL_LEGACY_STORAGE_CLASS,
204 )
205 """Allowed values for :attr:`storage_class`.
206
207 See
208 https://cloud.google.com/storage/docs/json_api/v1/objects#storageClass
209 https://cloud.google.com/storage/docs/per-object-storage-class
210
211 .. note::
212 This list does not include 'DURABLE_REDUCED_AVAILABILITY', which
213 is only documented for buckets (and deprecated).
214 """
215
216 def __init__(
217 self,
218 name,
219 bucket,
220 chunk_size=None,
221 encryption_key=None,
222 kms_key_name=None,
223 generation=None,
224 crc32c_checksum=None,
225 ):
226 """
227 property :attr:`name`
228 Get the blob's name.
229 """
230 name = _bytes_to_unicode(name)
231 super(Blob, self).__init__(name=name)
232
233 self.chunk_size = chunk_size # Check that setter accepts value.
234 self._bucket = bucket
235 self._acl = ObjectACL(self)
236 _raise_if_more_than_one_set(
237 encryption_key=encryption_key, kms_key_name=kms_key_name
238 )
239
240 self._encryption_key = encryption_key
241
242 if kms_key_name is not None:
243 self._properties["kmsKeyName"] = kms_key_name
244
245 if generation is not None:
246 self._properties["generation"] = generation
247
248 if crc32c_checksum is not None:
249 self._properties["crc32c"] = crc32c_checksum
250
251 @property
252 def bucket(self):
253 """Bucket which contains the object.
254
255 :rtype: :class:`~google.cloud.storage.bucket.Bucket`
256 :returns: The object's bucket.
257 """
258 return self._bucket
259
260 @property
261 def chunk_size(self):
262 """Get the blob's default chunk size.
263
264 :rtype: int or ``NoneType``
265 :returns: The current blob's chunk size, if it is set.
266 """
267 return self._chunk_size
268
269 @chunk_size.setter
270 def chunk_size(self, value):
271 """Set the blob's default chunk size.
272
273 :type value: int
274 :param value: (Optional) The current blob's chunk size, if it is set.
275
276 :raises: :class:`ValueError` if ``value`` is not ``None`` and is not a
277 multiple of 256 KB.
278 """
279 if value is not None and value > 0 and value % self._CHUNK_SIZE_MULTIPLE != 0:
280 raise ValueError(
281 "Chunk size must be a multiple of %d." % (self._CHUNK_SIZE_MULTIPLE,)
282 )
283 self._chunk_size = value
284
285 @property
286 def encryption_key(self):
287 """Retrieve the customer-supplied encryption key for the object.
288
289 :rtype: bytes or ``NoneType``
290 :returns:
291 The encryption key or ``None`` if no customer-supplied encryption key was used,
292 or the blob's resource has not been loaded from the server.
293 """
294 return self._encryption_key
295
296 @encryption_key.setter
297 def encryption_key(self, value):
298 """Set the blob's encryption key.
299
300 See https://cloud.google.com/storage/docs/encryption#customer-supplied
301
302 To perform a key rotation for an encrypted blob, use :meth:`rewrite`.
303 See https://cloud.google.com/storage/docs/encryption/using-customer-supplied-keys?hl=ca#rotating
304
305 :type value: bytes
306 :param value: 32 byte encryption key for customer-supplied encryption.
307 """
308 self._encryption_key = value
309
310 @staticmethod
311 def path_helper(bucket_path, blob_name):
312 """Relative URL path for a blob.
313
314 :type bucket_path: str
315 :param bucket_path: The URL path for a bucket.
316
317 :type blob_name: str
318 :param blob_name: The name of the blob.
319
320 :rtype: str
321 :returns: The relative URL path for ``blob_name``.
322 """
323 return bucket_path + "/o/" + _quote(blob_name)
324
325 @property
326 def acl(self):
327 """Create our ACL on demand."""
328 return self._acl
329
330 def __repr__(self):
331 if self.bucket:
332 bucket_name = self.bucket.name
333 else:
334 bucket_name = None
335
336 return f"<Blob: {bucket_name}, {self.name}, {self.generation}>"
337
338 @property
339 def path(self):
340 """Getter property for the URL path to this Blob.
341
342 :rtype: str
343 :returns: The URL path to this Blob.
344 """
345 if not self.name:
346 raise ValueError("Cannot determine path without a blob name.")
347
348 return self.path_helper(self.bucket.path, self.name)
349
350 @property
351 def client(self):
352 """The client bound to this blob."""
353 return self.bucket.client
354
355 @property
356 def user_project(self):
357 """Project ID billed for API requests made via this blob.
358
359 Derived from bucket's value.
360
361 :rtype: str
362 """
363 return self.bucket.user_project
364
365 def _encryption_headers(self):
366 """Return any encryption headers needed to fetch the object.
367
368 :rtype: List(Tuple(str, str))
369 :returns: a list of tuples to be passed as headers.
370 """
371 return _get_encryption_headers(self._encryption_key)
372
373 @property
374 def _query_params(self):
375 """Default query parameters."""
376 params = {}
377 if self.generation is not None:
378 params["generation"] = self.generation
379 if self.user_project is not None:
380 params["userProject"] = self.user_project
381 return params
382
383 @property
384 def public_url(self):
385 """The public URL for this blob.
386
387 Use :meth:`make_public` to enable anonymous access via the returned
388 URL.
389
390 :rtype: `string`
391 :returns: The public URL for this blob.
392 """
393 if self.client:
394 endpoint = self.client.api_endpoint
395 else:
396 endpoint = _get_default_storage_base_url()
397 return "{storage_base_url}/{bucket_name}/{quoted_name}".format(
398 storage_base_url=endpoint,
399 bucket_name=self.bucket.name,
400 quoted_name=_quote(self.name, safe=b"/~"),
401 )
402
403 @classmethod
404 def from_uri(cls, uri, client=None):
405 """Get a constructor for blob object by URI.
406
407 .. code-block:: python
408
409 from google.cloud import storage
410 from google.cloud.storage.blob import Blob
411 client = storage.Client()
412 blob = Blob.from_uri("gs://bucket/object", client=client)
413
414 :type uri: str
415 :param uri: The blob uri following a gs://bucket/object pattern.
416 Both a bucket and object name is required to construct a blob object.
417
418 :type client: :class:`~google.cloud.storage.client.Client`
419 :param client:
420 (Optional) The client to use. Application code should
421 *always* pass ``client``.
422
423 :rtype: :class:`google.cloud.storage.blob.Blob`
424 :returns: The blob object created.
425 """
426 from google.cloud.storage.bucket import Bucket
427
428 match = _GS_URL_REGEX_PATTERN.match(uri)
429 if not match:
430 raise ValueError("URI pattern must be gs://bucket/object")
431 bucket = Bucket(client, name=match.group("bucket_name"))
432 return cls(match.group("object_name"), bucket)
433
434 @classmethod
435 def from_string(cls, uri, client=None):
436 """(Deprecated) Get a constructor for blob object by URI.
437
438 .. note::
439 Deprecated alias for :meth:`from_uri`.
440
441 .. code-block:: python
442
443 from google.cloud import storage
444 from google.cloud.storage.blob import Blob
445 client = storage.Client()
446 blob = Blob.from_string("gs://bucket/object", client=client)
447
448 :type uri: str
449 :param uri: The blob uri following a gs://bucket/object pattern.
450 Both a bucket and object name is required to construct a blob object.
451
452 :type client: :class:`~google.cloud.storage.client.Client`
453 :param client:
454 (Optional) The client to use. Application code should
455 *always* pass ``client``.
456
457 :rtype: :class:`google.cloud.storage.blob.Blob`
458 :returns: The blob object created.
459 """
460 warnings.warn(_FROM_STRING_DEPRECATED, PendingDeprecationWarning, stacklevel=2)
461 return Blob.from_uri(uri=uri, client=client)
462
463 def generate_signed_url(
464 self,
465 expiration=None,
466 api_access_endpoint=None,
467 method="GET",
468 content_md5=None,
469 content_type=None,
470 response_disposition=None,
471 response_type=None,
472 generation=None,
473 headers=None,
474 query_parameters=None,
475 client=None,
476 credentials=None,
477 version=None,
478 service_account_email=None,
479 access_token=None,
480 virtual_hosted_style=False,
481 bucket_bound_hostname=None,
482 scheme="http",
483 ):
484 """Generates a signed URL for this blob.
485
486 .. note::
487
488 If you are on Google Compute Engine, you can't generate a signed
489 URL using GCE service account.
490 If you'd like to be able to generate a signed URL from GCE,
491 you can use a standard service account from a JSON file rather
492 than a GCE service account.
493
494 If you have a blob that you want to allow access to for a set
495 amount of time, you can use this method to generate a URL that
496 is only valid within a certain time period.
497
498 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-generate-signed-url-v4#storage_generate_signed_url_v4-python).
499
500 This is particularly useful if you don't want publicly
501 accessible blobs, but don't want to require users to explicitly
502 log in.
503
504 If ``bucket_bound_hostname`` is set as an argument of :attr:`api_access_endpoint`,
505 ``https`` works only if using a ``CDN``.
506
507 :type expiration: Union[Integer, datetime.datetime, datetime.timedelta]
508 :param expiration:
509 Point in time when the signed URL should expire. If a ``datetime``
510 instance is passed without an explicit ``tzinfo`` set, it will be
511 assumed to be ``UTC``.
512
513 :type api_access_endpoint: str
514 :param api_access_endpoint: (Optional) URI base, for instance
515 "https://storage.googleapis.com". If not specified, the client's
516 api_endpoint will be used. Incompatible with bucket_bound_hostname.
517
518 :type method: str
519 :param method: The HTTP verb that will be used when requesting the URL.
520
521 :type content_md5: str
522 :param content_md5:
523 (Optional) The MD5 hash of the object referenced by ``resource``.
524
525 :type content_type: str
526 :param content_type:
527 (Optional) The content type of the object referenced by
528 ``resource``.
529
530 :type response_disposition: str
531 :param response_disposition:
532 (Optional) Content disposition of responses to requests for the
533 signed URL. For example, to enable the signed URL to initiate a
534 file of ``blog.png``, use the value ``'attachment;
535 filename=blob.png'``.
536
537 :type response_type: str
538 :param response_type:
539 (Optional) Content type of responses to requests for the signed
540 URL. Ignored if content_type is set on object/blob metadata.
541
542 :type generation: str
543 :param generation:
544 (Optional) A value that indicates which generation of the resource
545 to fetch.
546
547 :type headers: dict
548 :param headers:
549 (Optional) Additional HTTP headers to be included as part of the
550 signed URLs. See:
551 https://cloud.google.com/storage/docs/xml-api/reference-headers
552 Requests using the signed URL *must* pass the specified header
553 (name and value) with each request for the URL.
554
555 :type query_parameters: dict
556 :param query_parameters:
557 (Optional) Additional query parameters to be included as part of the
558 signed URLs. See:
559 https://cloud.google.com/storage/docs/xml-api/reference-headers#query
560
561 :type client: :class:`~google.cloud.storage.client.Client`
562 :param client:
563 (Optional) The client to use. If not passed, falls back to the
564 ``client`` stored on the blob's bucket.
565
566 :type credentials: :class:`google.auth.credentials.Credentials`
567 :param credentials:
568 (Optional) The authorization credentials to attach to requests.
569 These credentials identify this application to the service. If
570 none are specified, the client will attempt to ascertain the
571 credentials from the environment.
572
573 :type version: str
574 :param version:
575 (Optional) The version of signed credential to create. Must be one
576 of 'v2' | 'v4'.
577
578 :type service_account_email: str
579 :param service_account_email:
580 (Optional) E-mail address of the service account.
581
582 :type access_token: str
583 :param access_token: (Optional) Access token for a service account.
584
585 :type virtual_hosted_style: bool
586 :param virtual_hosted_style:
587 (Optional) If true, then construct the URL relative the bucket's
588 virtual hostname, e.g., '<bucket-name>.storage.googleapis.com'.
589 Incompatible with bucket_bound_hostname.
590
591 :type bucket_bound_hostname: str
592 :param bucket_bound_hostname:
593 (Optional) If passed, then construct the URL relative to the bucket-bound hostname.
594 Value can be a bare or with scheme, e.g., 'example.com' or 'http://example.com'.
595 Incompatible with api_access_endpoint and virtual_hosted_style.
596 See: https://cloud.google.com/storage/docs/request-endpoints#cname
597
598 :type scheme: str
599 :param scheme:
600 (Optional) If ``bucket_bound_hostname`` is passed as a bare
601 hostname, use this value as the scheme. ``https`` will work only
602 when using a CDN. Defaults to ``"http"``.
603
604 :raises: :exc:`ValueError` when version is invalid or mutually exclusive arguments are used.
605 :raises: :exc:`TypeError` when expiration is not a valid type.
606 :raises: :exc:`AttributeError` if credentials is not an instance
607 of :class:`google.auth.credentials.Signing`.
608
609 :rtype: str
610 :returns: A signed URL you can use to access the resource
611 until expiration.
612 """
613 if version is None:
614 version = "v2"
615 elif version not in ("v2", "v4"):
616 raise ValueError("'version' must be either 'v2' or 'v4'")
617
618 if (
619 api_access_endpoint is not None or virtual_hosted_style
620 ) and bucket_bound_hostname:
621 raise ValueError(
622 "The bucket_bound_hostname argument is not compatible with "
623 "either api_access_endpoint or virtual_hosted_style."
624 )
625
626 if api_access_endpoint is None:
627 client = self._require_client(client)
628 api_access_endpoint = client.api_endpoint
629
630 quoted_name = _quote(self.name, safe=b"/~")
631
632 # If you are on Google Compute Engine, you can't generate a signed URL
633 # using GCE service account.
634 # See https://github.com/googleapis/google-auth-library-python/issues/50
635 if virtual_hosted_style:
636 api_access_endpoint = _virtual_hosted_style_base_url(
637 api_access_endpoint, self.bucket.name
638 )
639 resource = f"/{quoted_name}"
640 elif bucket_bound_hostname:
641 api_access_endpoint = _bucket_bound_hostname_url(
642 bucket_bound_hostname, scheme
643 )
644 resource = f"/{quoted_name}"
645 else:
646 resource = f"/{self.bucket.name}/{quoted_name}"
647
648 if credentials is None:
649 client = self._require_client(client) # May be redundant, but that's ok.
650 credentials = client._credentials
651
652 client = self._require_client(client)
653 universe_domain = client.universe_domain
654
655 if version == "v2":
656 helper = generate_signed_url_v2
657 else:
658 helper = generate_signed_url_v4
659
660 if self._encryption_key is not None:
661 encryption_headers = _get_encryption_headers(self._encryption_key)
662 if headers is None:
663 headers = {}
664 if version == "v2":
665 # See: https://cloud.google.com/storage/docs/access-control/signed-urls-v2#about-canonical-extension-headers
666 v2_copy_only = "X-Goog-Encryption-Algorithm"
667 headers[v2_copy_only] = encryption_headers[v2_copy_only]
668 else:
669 headers.update(encryption_headers)
670
671 return helper(
672 credentials,
673 resource=resource,
674 expiration=expiration,
675 api_access_endpoint=api_access_endpoint,
676 method=method.upper(),
677 content_md5=content_md5,
678 content_type=content_type,
679 response_type=response_type,
680 response_disposition=response_disposition,
681 generation=generation,
682 headers=headers,
683 query_parameters=query_parameters,
684 service_account_email=service_account_email,
685 access_token=access_token,
686 universe_domain=universe_domain,
687 )
688
689 def exists(
690 self,
691 client=None,
692 if_etag_match=None,
693 if_etag_not_match=None,
694 if_generation_match=None,
695 if_generation_not_match=None,
696 if_metageneration_match=None,
697 if_metageneration_not_match=None,
698 timeout=_DEFAULT_TIMEOUT,
699 retry=DEFAULT_RETRY,
700 soft_deleted=None,
701 ):
702 """Determines whether or not this blob exists.
703
704 If :attr:`user_project` is set on the bucket, bills the API request
705 to that project.
706
707 :type client: :class:`~google.cloud.storage.client.Client`
708 :param client:
709 (Optional) The client to use. If not passed, falls back to the
710 ``client`` stored on the blob's bucket.
711
712 :type if_etag_match: Union[str, Set[str]]
713 :param if_etag_match:
714 (Optional) See :ref:`using-if-etag-match`
715
716 :type if_etag_not_match: Union[str, Set[str]]
717 :param if_etag_not_match:
718 (Optional) See :ref:`using-if-etag-not-match`
719
720 :type if_generation_match: long
721 :param if_generation_match:
722 (Optional) See :ref:`using-if-generation-match`
723
724 :type if_generation_not_match: long
725 :param if_generation_not_match:
726 (Optional) See :ref:`using-if-generation-not-match`
727
728 :type if_metageneration_match: long
729 :param if_metageneration_match:
730 (Optional) See :ref:`using-if-metageneration-match`
731
732 :type if_metageneration_not_match: long
733 :param if_metageneration_not_match:
734 (Optional) See :ref:`using-if-metageneration-not-match`
735
736 :type timeout: float or tuple
737 :param timeout:
738 (Optional) The amount of time, in seconds, to wait
739 for the server response. See: :ref:`configuring_timeouts`
740
741 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
742 :param retry:
743 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
744
745 :type soft_deleted: bool
746 :param soft_deleted:
747 (Optional) If True, looks for a soft-deleted object. Will only return True
748 if the object exists and is in a soft-deleted state.
749 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True.
750 See: https://cloud.google.com/storage/docs/soft-delete
751
752 :rtype: bool
753 :returns: True if the blob exists in Cloud Storage.
754 """
755 with create_trace_span(name="Storage.Blob.exists"):
756 client = self._require_client(client)
757 # We only need the status code (200 or not) so we seek to
758 # minimize the returned payload.
759 query_params = self._query_params
760 query_params["fields"] = "name"
761 if soft_deleted is not None:
762 query_params["softDeleted"] = soft_deleted
763
764 _add_generation_match_parameters(
765 query_params,
766 if_generation_match=if_generation_match,
767 if_generation_not_match=if_generation_not_match,
768 if_metageneration_match=if_metageneration_match,
769 if_metageneration_not_match=if_metageneration_not_match,
770 )
771
772 headers = {}
773 _add_etag_match_headers(
774 headers,
775 if_etag_match=if_etag_match,
776 if_etag_not_match=if_etag_not_match,
777 )
778
779 try:
780 # We intentionally pass `_target_object=None` since fields=name
781 # would limit the local properties.
782 client._get_resource(
783 self.path,
784 query_params=query_params,
785 headers=headers,
786 timeout=timeout,
787 retry=retry,
788 _target_object=None,
789 )
790 except NotFound:
791 # NOTE: This will not fail immediately in a batch. However, when
792 # Batch.finish() is called, the resulting `NotFound` will be
793 # raised.
794 return False
795 return True
796
797 def delete(
798 self,
799 client=None,
800 if_generation_match=None,
801 if_generation_not_match=None,
802 if_metageneration_match=None,
803 if_metageneration_not_match=None,
804 timeout=_DEFAULT_TIMEOUT,
805 retry=DEFAULT_RETRY,
806 ):
807 """Deletes a blob from Cloud Storage.
808
809 If :attr:`user_project` is set on the bucket, bills the API request
810 to that project.
811
812 :type client: :class:`~google.cloud.storage.client.Client`
813 :param client:
814 (Optional) The client to use. If not passed, falls back to the
815 ``client`` stored on the blob's bucket.
816
817 :type if_generation_match: long
818 :param if_generation_match:
819 (Optional) See :ref:`using-if-generation-match`
820
821 :type if_generation_not_match: long
822 :param if_generation_not_match:
823 (Optional) See :ref:`using-if-generation-not-match`
824
825 :type if_metageneration_match: long
826 :param if_metageneration_match:
827 (Optional) See :ref:`using-if-metageneration-match`
828
829 :type if_metageneration_not_match: long
830 :param if_metageneration_not_match:
831 (Optional) See :ref:`using-if-metageneration-not-match`
832
833 :type timeout: float or tuple
834 :param timeout:
835 (Optional) The amount of time, in seconds, to wait
836 for the server response. See: :ref:`configuring_timeouts`
837
838 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
839 :param retry: (Optional) How to retry the RPC. A None value will disable
840 retries. A google.api_core.retry.Retry value will enable retries,
841 and the object will define retriable response codes and errors and
842 configure backoff and timeout options.
843
844 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
845 Retry object and activates it only if certain conditions are met.
846 This class exists to provide safe defaults for RPC calls that are
847 not technically safe to retry normally (due to potential data
848 duplication or other side-effects) but become safe to retry if a
849 condition such as if_generation_match is set.
850
851 See the retry.py source code and docstrings in this package
852 (google.cloud.storage.retry) for information on retry types and how
853 to configure them.
854
855 :raises: :class:`google.cloud.exceptions.NotFound`
856 (propagated from
857 :meth:`google.cloud.storage.bucket.Bucket.delete_blob`).
858 """
859 with create_trace_span(name="Storage.Blob.delete"):
860 self.bucket.delete_blob(
861 self.name,
862 client=client,
863 generation=self.generation,
864 timeout=timeout,
865 if_generation_match=if_generation_match,
866 if_generation_not_match=if_generation_not_match,
867 if_metageneration_match=if_metageneration_match,
868 if_metageneration_not_match=if_metageneration_not_match,
869 retry=retry,
870 )
871
872 def _get_transport(self, client):
873 """Return the client's transport.
874
875 :type client: :class:`~google.cloud.storage.client.Client`
876 :param client:
877 (Optional) The client to use. If not passed, falls back to the
878 ``client`` stored on the blob's bucket.
879
880 :rtype transport:
881 :class:`~google.auth.transport.requests.AuthorizedSession`
882 :returns: The transport (with credentials) that will
883 make authenticated requests.
884 """
885 client = self._require_client(client)
886 return client._http
887
888 def _get_download_url(
889 self,
890 client,
891 if_generation_match=None,
892 if_generation_not_match=None,
893 if_metageneration_match=None,
894 if_metageneration_not_match=None,
895 ):
896 """Get the download URL for the current blob.
897
898 If the ``media_link`` has been loaded, it will be used, otherwise
899 the URL will be constructed from the current blob's path (and possibly
900 generation) to avoid a round trip.
901
902 :type client: :class:`~google.cloud.storage.client.Client`
903 :param client: The client to use.
904
905 :type if_generation_match: long
906 :param if_generation_match:
907 (Optional) See :ref:`using-if-generation-match`
908
909 :type if_generation_not_match: long
910 :param if_generation_not_match:
911 (Optional) See :ref:`using-if-generation-not-match`
912
913 :type if_metageneration_match: long
914 :param if_metageneration_match:
915 (Optional) See :ref:`using-if-metageneration-match`
916
917 :type if_metageneration_not_match: long
918 :param if_metageneration_not_match:
919 (Optional) See :ref:`using-if-metageneration-not-match`
920
921 :rtype: str
922 :returns: The download URL for the current blob.
923 """
924 name_value_pairs = []
925 if self.media_link is None:
926 hostname = _get_host_name(client._connection)
927 base_url = _DOWNLOAD_URL_TEMPLATE.format(
928 hostname=hostname, path=self.path, api_version=_API_VERSION
929 )
930 if self.generation is not None:
931 name_value_pairs.append(("generation", f"{self.generation:d}"))
932 else:
933 base_url = self.media_link
934
935 if self.user_project is not None:
936 name_value_pairs.append(("userProject", self.user_project))
937
938 _add_generation_match_parameters(
939 name_value_pairs,
940 if_generation_match=if_generation_match,
941 if_generation_not_match=if_generation_not_match,
942 if_metageneration_match=if_metageneration_match,
943 if_metageneration_not_match=if_metageneration_not_match,
944 )
945 return _add_query_parameters(base_url, name_value_pairs)
946
947 def _extract_headers_from_download(self, response):
948 """Extract headers from a non-chunked request's http object.
949
950 This avoids the need to make a second request for commonly used
951 headers.
952
953 :type response:
954 :class requests.models.Response
955 :param response: The server response from downloading a non-chunked file
956 """
957 self._properties["contentEncoding"] = response.headers.get(
958 "Content-Encoding", None
959 )
960 self._properties[_CONTENT_TYPE_FIELD] = response.headers.get(
961 "Content-Type", None
962 )
963 self._properties["cacheControl"] = response.headers.get("Cache-Control", None)
964 self._properties["storageClass"] = response.headers.get(
965 "X-Goog-Storage-Class", None
966 )
967 self._properties["contentLanguage"] = response.headers.get(
968 "Content-Language", None
969 )
970 self._properties["etag"] = response.headers.get("ETag", None)
971 self._properties["generation"] = response.headers.get("X-goog-generation", None)
972 self._properties["metageneration"] = response.headers.get(
973 "X-goog-metageneration", None
974 )
975 # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==',
976 x_goog_hash = response.headers.get("X-Goog-Hash", "")
977
978 if x_goog_hash:
979 digests = {}
980 for encoded_digest in x_goog_hash.split(","):
981 match = re.match(r"(crc32c|md5)=([\w\d/\+/]+={0,3})", encoded_digest)
982 if match:
983 method, digest = match.groups()
984 digests[method] = digest
985
986 self._properties["crc32c"] = digests.get("crc32c", None)
987 self._properties["md5Hash"] = digests.get("md5", None)
988
989 def _do_download(
990 self,
991 transport,
992 file_obj,
993 download_url,
994 headers,
995 start=None,
996 end=None,
997 raw_download=False,
998 timeout=_DEFAULT_TIMEOUT,
999 checksum="auto",
1000 retry=DEFAULT_RETRY,
1001 single_shot_download=False,
1002 ):
1003 """Perform a download without any error handling.
1004
1005 This is intended to be called by :meth:`_prep_and_do_download` so it can
1006 be wrapped with error handling / remapping.
1007
1008 :type transport:
1009 :class:`~google.auth.transport.requests.AuthorizedSession`
1010 :param transport:
1011 The transport (with credentials) that will make authenticated
1012 requests.
1013
1014 :type file_obj: file
1015 :param file_obj: A file handle to which to write the blob's data.
1016
1017 :type download_url: str
1018 :param download_url: The URL where the media can be accessed.
1019
1020 :type headers: dict
1021 :param headers: Headers to be sent with the request(s).
1022
1023 :type start: int
1024 :param start: (Optional) The first byte in a range to be downloaded.
1025
1026 :type end: int
1027 :param end: (Optional) The last byte in a range to be downloaded.
1028
1029 :type raw_download: bool
1030 :param raw_download:
1031 (Optional) If true, download the object without any expansion.
1032
1033 :type timeout: float or tuple
1034 :param timeout:
1035 (Optional) The amount of time, in seconds, to wait
1036 for the server response. See: :ref:`configuring_timeouts`
1037
1038 :type checksum: str
1039 :param checksum:
1040 (Optional) The type of checksum to compute to verify the integrity
1041 of the object. The response headers must contain a checksum of the
1042 requested type. If the headers lack an appropriate checksum (for
1043 instance in the case of transcoded or ranged downloads where the
1044 remote service does not know the correct checksum, including
1045 downloads where chunk_size is set) an INFO-level log will be
1046 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1047 default is "auto", which will try to detect if the C extension for
1048 crc32c is installed and fall back to md5 otherwise.
1049
1050 :type retry: google.api_core.retry.Retry
1051 :param retry: (Optional) How to retry the RPC. A None value will disable
1052 retries. A google.api_core.retry.Retry value will enable retries,
1053 and the object will configure backoff and timeout options.
1054
1055 This private method does not accept ConditionalRetryPolicy values
1056 because the information necessary to evaluate the policy is instead
1057 evaluated in blob._prep_and_do_download().
1058
1059 See the retry.py source code and docstrings in this package
1060 (google.cloud.storage.retry) for information on retry types and how
1061 to configure them.
1062
1063 :type single_shot_download: bool
1064 :param single_shot_download:
1065 (Optional) If true, download the object in a single request.
1066 Caution: Enabling this will increase the memory overload for your application.
1067 Please enable this as per your use case.
1068 """
1069
1070 extra_attributes = {
1071 "url.full": download_url,
1072 "download.chunk_size": f"{self.chunk_size}",
1073 "download.raw_download": raw_download,
1074 "upload.checksum": f"{checksum}",
1075 "download.single_shot_download": single_shot_download,
1076 }
1077 args = {"timeout": timeout}
1078
1079 if self.chunk_size is None:
1080 if raw_download:
1081 klass = RawDownload
1082 download_class = "RawDownload"
1083 else:
1084 klass = Download
1085 download_class = "Download"
1086
1087 download = klass(
1088 download_url,
1089 stream=file_obj,
1090 headers=headers,
1091 start=start,
1092 end=end,
1093 checksum=checksum,
1094 retry=retry,
1095 # NOTE: single_shot_download is only supported in Download and RawDownload
1096 # classes, i.e., when chunk_size is set to None (the default value). It is
1097 # not supported for chunked downloads.
1098 single_shot_download=single_shot_download,
1099 )
1100 with create_trace_span(
1101 name=f"Storage.{download_class}/consume",
1102 attributes=extra_attributes,
1103 api_request=args,
1104 ):
1105 response = download.consume(transport, timeout=timeout)
1106 self._extract_headers_from_download(response)
1107 else:
1108 if checksum:
1109 msg = _CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE.format(checksum)
1110 _logger.info(msg)
1111
1112 if raw_download:
1113 klass = RawChunkedDownload
1114 download_class = "RawChunkedDownload"
1115 else:
1116 klass = ChunkedDownload
1117 download_class = "ChunkedDownload"
1118
1119 download = klass(
1120 download_url,
1121 self.chunk_size,
1122 file_obj,
1123 headers=headers,
1124 start=start if start else 0,
1125 end=end,
1126 retry=retry,
1127 )
1128
1129 with create_trace_span(
1130 name=f"Storage.{download_class}/consumeNextChunk",
1131 attributes=extra_attributes,
1132 api_request=args,
1133 ):
1134 while not download.finished:
1135 download.consume_next_chunk(transport, timeout=timeout)
1136
1137 def download_to_file(
1138 self,
1139 file_obj,
1140 client=None,
1141 start=None,
1142 end=None,
1143 raw_download=False,
1144 if_etag_match=None,
1145 if_etag_not_match=None,
1146 if_generation_match=None,
1147 if_generation_not_match=None,
1148 if_metageneration_match=None,
1149 if_metageneration_not_match=None,
1150 timeout=_DEFAULT_TIMEOUT,
1151 checksum="auto",
1152 retry=DEFAULT_RETRY,
1153 single_shot_download=False,
1154 ):
1155 """Download the contents of this blob into a file-like object.
1156
1157 .. note::
1158
1159 If the server-set property, :attr:`media_link`, is not yet
1160 initialized, makes an additional API request to load it.
1161
1162 If the :attr:`chunk_size` of a current blob is `None`, will download data
1163 in single download request otherwise it will download the :attr:`chunk_size`
1164 of data in each request.
1165
1166 For more fine-grained control over the download process, check out
1167 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
1168 For example, this library allows downloading **parts** of a blob rather than the whole thing.
1169
1170 If :attr:`user_project` is set on the bucket, bills the API request
1171 to that project.
1172
1173 :type file_obj: file
1174 :param file_obj: A file handle to which to write the blob's data.
1175
1176 :type client: :class:`~google.cloud.storage.client.Client`
1177 :param client:
1178 (Optional) The client to use. If not passed, falls back to the
1179 ``client`` stored on the blob's bucket.
1180
1181 :type start: int
1182 :param start: (Optional) The first byte in a range to be downloaded.
1183
1184 :type end: int
1185 :param end: (Optional) The last byte in a range to be downloaded.
1186
1187 :type raw_download: bool
1188 :param raw_download:
1189 (Optional) If true, download the object without any expansion.
1190
1191 :type if_etag_match: Union[str, Set[str]]
1192 :param if_etag_match:
1193 (Optional) See :ref:`using-if-etag-match`
1194
1195 :type if_etag_not_match: Union[str, Set[str]]
1196 :param if_etag_not_match:
1197 (Optional) See :ref:`using-if-etag-not-match`
1198
1199 :type if_generation_match: long
1200 :param if_generation_match:
1201 (Optional) See :ref:`using-if-generation-match`
1202
1203 :type if_generation_not_match: long
1204 :param if_generation_not_match:
1205 (Optional) See :ref:`using-if-generation-not-match`
1206
1207 :type if_metageneration_match: long
1208 :param if_metageneration_match:
1209 (Optional) See :ref:`using-if-metageneration-match`
1210
1211 :type if_metageneration_not_match: long
1212 :param if_metageneration_not_match:
1213 (Optional) See :ref:`using-if-metageneration-not-match`
1214
1215 :type timeout: float or tuple
1216 :param timeout:
1217 (Optional) The amount of time, in seconds, to wait
1218 for the server response. See: :ref:`configuring_timeouts`
1219
1220 :type checksum: str
1221 :param checksum:
1222 (Optional) The type of checksum to compute to verify the integrity
1223 of the object. The response headers must contain a checksum of the
1224 requested type. If the headers lack an appropriate checksum (for
1225 instance in the case of transcoded or ranged downloads where the
1226 remote service does not know the correct checksum, including
1227 downloads where chunk_size is set) an INFO-level log will be
1228 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1229 default is "auto", which will try to detect if the C extension for
1230 crc32c is installed and fall back to md5 otherwise.
1231
1232 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1233 :param retry: (Optional) How to retry the RPC. A None value will disable
1234 retries. A google.api_core.retry.Retry value will enable retries,
1235 and the object will define retriable response codes and errors and
1236 configure backoff and timeout options.
1237
1238 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1239 Retry object and activates it only if certain conditions are met.
1240 This class exists to provide safe defaults for RPC calls that are
1241 not technically safe to retry normally (due to potential data
1242 duplication or other side-effects) but become safe to retry if a
1243 condition such as if_metageneration_match is set.
1244
1245 See the retry.py source code and docstrings in this package
1246 (google.cloud.storage.retry) for information on retry types and how
1247 to configure them.
1248
1249 :type single_shot_download: bool
1250 :param single_shot_download:
1251 (Optional) If true, download the object in a single request.
1252 Caution: Enabling this will increase the memory overload for your application.
1253 Please enable this as per your use case.
1254
1255 :raises: :class:`google.cloud.exceptions.NotFound`
1256 """
1257 with create_trace_span(name="Storage.Blob.downloadToFile"):
1258 self._prep_and_do_download(
1259 file_obj,
1260 client=client,
1261 start=start,
1262 end=end,
1263 raw_download=raw_download,
1264 if_etag_match=if_etag_match,
1265 if_etag_not_match=if_etag_not_match,
1266 if_generation_match=if_generation_match,
1267 if_generation_not_match=if_generation_not_match,
1268 if_metageneration_match=if_metageneration_match,
1269 if_metageneration_not_match=if_metageneration_not_match,
1270 timeout=timeout,
1271 checksum=checksum,
1272 retry=retry,
1273 single_shot_download=single_shot_download,
1274 )
1275
1276 def _handle_filename_and_download(self, filename, *args, **kwargs):
1277 """Download the contents of this blob into a named file.
1278
1279 :type filename: str
1280 :param filename: A filename to be passed to ``open``.
1281
1282 For *args and **kwargs, refer to the documentation for download_to_filename() for more information.
1283 """
1284
1285 try:
1286 with open(filename, "wb") as file_obj:
1287 self._prep_and_do_download(
1288 file_obj,
1289 *args,
1290 **kwargs,
1291 )
1292
1293 except (DataCorruption, NotFound):
1294 # Delete the corrupt or empty downloaded file.
1295 os.remove(filename)
1296 raise
1297
1298 updated = self.updated
1299 if updated is not None:
1300 mtime = updated.timestamp()
1301 os.utime(file_obj.name, (mtime, mtime))
1302
1303 def download_to_filename(
1304 self,
1305 filename,
1306 client=None,
1307 start=None,
1308 end=None,
1309 raw_download=False,
1310 if_etag_match=None,
1311 if_etag_not_match=None,
1312 if_generation_match=None,
1313 if_generation_not_match=None,
1314 if_metageneration_match=None,
1315 if_metageneration_not_match=None,
1316 timeout=_DEFAULT_TIMEOUT,
1317 checksum="auto",
1318 retry=DEFAULT_RETRY,
1319 single_shot_download=False,
1320 ):
1321 """Download the contents of this blob into a named file.
1322
1323 If :attr:`user_project` is set on the bucket, bills the API request
1324 to that project.
1325
1326 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-download-encrypted-file#storage_download_encrypted_file-python)
1327 to download a file with a [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied).
1328
1329 :type filename: str
1330 :param filename: A filename to be passed to ``open``.
1331
1332 :type client: :class:`~google.cloud.storage.client.Client`
1333 :param client:
1334 (Optional) The client to use. If not passed, falls back to the
1335 ``client`` stored on the blob's bucket.
1336
1337 :type start: int
1338 :param start: (Optional) The first byte in a range to be downloaded.
1339
1340 :type end: int
1341 :param end: (Optional) The last byte in a range to be downloaded.
1342
1343 :type raw_download: bool
1344 :param raw_download:
1345 (Optional) If true, download the object without any expansion.
1346
1347 :type if_etag_match: Union[str, Set[str]]
1348 :param if_etag_match:
1349 (Optional) See :ref:`using-if-etag-match`
1350
1351 :type if_etag_not_match: Union[str, Set[str]]
1352 :param if_etag_not_match:
1353 (Optional) See :ref:`using-if-etag-not-match`
1354
1355 :type if_generation_match: long
1356 :param if_generation_match:
1357 (Optional) See :ref:`using-if-generation-match`
1358
1359 :type if_generation_not_match: long
1360 :param if_generation_not_match:
1361 (Optional) See :ref:`using-if-generation-not-match`
1362
1363 :type if_metageneration_match: long
1364 :param if_metageneration_match:
1365 (Optional) See :ref:`using-if-metageneration-match`
1366
1367 :type if_metageneration_not_match: long
1368 :param if_metageneration_not_match:
1369 (Optional) See :ref:`using-if-metageneration-not-match`
1370
1371 :type timeout: float or tuple
1372 :param timeout:
1373 (Optional) The amount of time, in seconds, to wait
1374 for the server response. See: :ref:`configuring_timeouts`
1375
1376 :type checksum: str
1377 :param checksum:
1378 (Optional) The type of checksum to compute to verify the integrity
1379 of the object. The response headers must contain a checksum of the
1380 requested type. If the headers lack an appropriate checksum (for
1381 instance in the case of transcoded or ranged downloads where the
1382 remote service does not know the correct checksum, including
1383 downloads where chunk_size is set) an INFO-level log will be
1384 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1385 default is "auto", which will try to detect if the C extension for
1386 crc32c is installed and fall back to md5 otherwise.
1387
1388 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1389 :param retry: (Optional) How to retry the RPC. A None value will disable
1390 retries. A google.api_core.retry.Retry value will enable retries,
1391 and the object will define retriable response codes and errors and
1392 configure backoff and timeout options.
1393
1394 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1395 Retry object and activates it only if certain conditions are met.
1396 This class exists to provide safe defaults for RPC calls that are
1397 not technically safe to retry normally (due to potential data
1398 duplication or other side-effects) but become safe to retry if a
1399 condition such as if_metageneration_match is set.
1400
1401 See the retry.py source code and docstrings in this package
1402 (google.cloud.storage.retry) for information on retry types and how
1403 to configure them.
1404
1405 :type single_shot_download: bool
1406 :param single_shot_download:
1407 (Optional) If true, download the object in a single request.
1408 Caution: Enabling this will increase the memory overload for your application.
1409 Please enable this as per your use case.
1410
1411 :raises: :class:`google.cloud.exceptions.NotFound`
1412 """
1413 with create_trace_span(name="Storage.Blob.downloadToFilename"):
1414 self._handle_filename_and_download(
1415 filename,
1416 client=client,
1417 start=start,
1418 end=end,
1419 raw_download=raw_download,
1420 if_etag_match=if_etag_match,
1421 if_etag_not_match=if_etag_not_match,
1422 if_generation_match=if_generation_match,
1423 if_generation_not_match=if_generation_not_match,
1424 if_metageneration_match=if_metageneration_match,
1425 if_metageneration_not_match=if_metageneration_not_match,
1426 timeout=timeout,
1427 checksum=checksum,
1428 retry=retry,
1429 single_shot_download=single_shot_download,
1430 )
1431
1432 def download_as_bytes(
1433 self,
1434 client=None,
1435 start=None,
1436 end=None,
1437 raw_download=False,
1438 if_etag_match=None,
1439 if_etag_not_match=None,
1440 if_generation_match=None,
1441 if_generation_not_match=None,
1442 if_metageneration_match=None,
1443 if_metageneration_not_match=None,
1444 timeout=_DEFAULT_TIMEOUT,
1445 checksum="auto",
1446 retry=DEFAULT_RETRY,
1447 single_shot_download=False,
1448 ):
1449 """Download the contents of this blob as a bytes object.
1450
1451 If :attr:`user_project` is set on the bucket, bills the API request
1452 to that project.
1453
1454 :type client: :class:`~google.cloud.storage.client.Client`
1455 :param client:
1456 (Optional) The client to use. If not passed, falls back to the
1457 ``client`` stored on the blob's bucket.
1458
1459 :type start: int
1460 :param start: (Optional) The first byte in a range to be downloaded.
1461
1462 :type end: int
1463 :param end: (Optional) The last byte in a range to be downloaded.
1464
1465 :type raw_download: bool
1466 :param raw_download:
1467 (Optional) If true, download the object without any expansion.
1468
1469 :type if_etag_match: Union[str, Set[str]]
1470 :param if_etag_match:
1471 (Optional) See :ref:`using-if-etag-match`
1472
1473 :type if_etag_not_match: Union[str, Set[str]]
1474 :param if_etag_not_match:
1475 (Optional) See :ref:`using-if-etag-not-match`
1476
1477 :type if_generation_match: long
1478 :param if_generation_match:
1479 (Optional) See :ref:`using-if-generation-match`
1480
1481 :type if_generation_not_match: long
1482 :param if_generation_not_match:
1483 (Optional) See :ref:`using-if-generation-not-match`
1484
1485 :type if_metageneration_match: long
1486 :param if_metageneration_match:
1487 (Optional) See :ref:`using-if-metageneration-match`
1488
1489 :type if_metageneration_not_match: long
1490 :param if_metageneration_not_match:
1491 (Optional) See :ref:`using-if-metageneration-not-match`
1492
1493 :type timeout: float or tuple
1494 :param timeout:
1495 (Optional) The amount of time, in seconds, to wait
1496 for the server response. See: :ref:`configuring_timeouts`
1497
1498 :type checksum: str
1499 :param checksum:
1500 (Optional) The type of checksum to compute to verify the integrity
1501 of the object. The response headers must contain a checksum of the
1502 requested type. If the headers lack an appropriate checksum (for
1503 instance in the case of transcoded or ranged downloads where the
1504 remote service does not know the correct checksum, including
1505 downloads where chunk_size is set) an INFO-level log will be
1506 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1507 default is "auto", which will try to detect if the C extension for
1508 crc32c is installed and fall back to md5 otherwise.
1509
1510 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1511 :param retry: (Optional) How to retry the RPC. A None value will disable
1512 retries. A google.api_core.retry.Retry value will enable retries,
1513 and the object will define retriable response codes and errors and
1514 configure backoff and timeout options.
1515
1516 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1517 Retry object and activates it only if certain conditions are met.
1518 This class exists to provide safe defaults for RPC calls that are
1519 not technically safe to retry normally (due to potential data
1520 duplication or other side-effects) but become safe to retry if a
1521 condition such as if_metageneration_match is set.
1522
1523 See the retry.py source code and docstrings in this package
1524 (google.cloud.storage.retry) for information on retry types and how
1525 to configure them.
1526
1527 :type single_shot_download: bool
1528 :param single_shot_download:
1529 (Optional) If true, download the object in a single request.
1530 Caution: Enabling this will increase the memory overload for your application.
1531 Please enable this as per your use case.
1532
1533 :rtype: bytes
1534 :returns: The data stored in this blob.
1535
1536 :raises: :class:`google.cloud.exceptions.NotFound`
1537 """
1538 with create_trace_span(name="Storage.Blob.downloadAsBytes"):
1539 string_buffer = BytesIO()
1540
1541 self._prep_and_do_download(
1542 string_buffer,
1543 client=client,
1544 start=start,
1545 end=end,
1546 raw_download=raw_download,
1547 if_etag_match=if_etag_match,
1548 if_etag_not_match=if_etag_not_match,
1549 if_generation_match=if_generation_match,
1550 if_generation_not_match=if_generation_not_match,
1551 if_metageneration_match=if_metageneration_match,
1552 if_metageneration_not_match=if_metageneration_not_match,
1553 timeout=timeout,
1554 checksum=checksum,
1555 retry=retry,
1556 single_shot_download=single_shot_download,
1557 )
1558 return string_buffer.getvalue()
1559
1560 def download_as_string(
1561 self,
1562 client=None,
1563 start=None,
1564 end=None,
1565 raw_download=False,
1566 if_etag_match=None,
1567 if_etag_not_match=None,
1568 if_generation_match=None,
1569 if_generation_not_match=None,
1570 if_metageneration_match=None,
1571 if_metageneration_not_match=None,
1572 timeout=_DEFAULT_TIMEOUT,
1573 retry=DEFAULT_RETRY,
1574 single_shot_download=False,
1575 ):
1576 """(Deprecated) Download the contents of this blob as a bytes object.
1577
1578 If :attr:`user_project` is set on the bucket, bills the API request
1579 to that project.
1580
1581 .. note::
1582 Deprecated alias for :meth:`download_as_bytes`.
1583
1584 :type client: :class:`~google.cloud.storage.client.Client`
1585 :param client:
1586 (Optional) The client to use. If not passed, falls back to the
1587 ``client`` stored on the blob's bucket.
1588
1589 :type start: int
1590 :param start: (Optional) The first byte in a range to be downloaded.
1591
1592 :type end: int
1593 :param end: (Optional) The last byte in a range to be downloaded.
1594
1595 :type raw_download: bool
1596 :param raw_download:
1597 (Optional) If true, download the object without any expansion.
1598
1599 :type if_etag_match: Union[str, Set[str]]
1600 :param if_etag_match:
1601 (Optional) See :ref:`using-if-etag-match`
1602
1603 :type if_etag_not_match: Union[str, Set[str]]
1604 :param if_etag_not_match:
1605 (Optional) See :ref:`using-if-etag-not-match`
1606
1607 :type if_generation_match: long
1608 :param if_generation_match:
1609 (Optional) See :ref:`using-if-generation-match`
1610
1611 :type if_generation_not_match: long
1612 :param if_generation_not_match:
1613 (Optional) See :ref:`using-if-generation-not-match`
1614
1615 :type if_metageneration_match: long
1616 :param if_metageneration_match:
1617 (Optional) See :ref:`using-if-metageneration-match`
1618
1619 :type if_metageneration_not_match: long
1620 :param if_metageneration_not_match:
1621 (Optional) See :ref:`using-if-metageneration-not-match`
1622
1623 :type timeout: float or tuple
1624 :param timeout:
1625 (Optional) The amount of time, in seconds, to wait
1626 for the server response. See: :ref:`configuring_timeouts`
1627
1628 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1629 :param retry: (Optional) How to retry the RPC. A None value will disable
1630 retries. A google.api_core.retry.Retry value will enable retries,
1631 and the object will define retriable response codes and errors and
1632 configure backoff and timeout options.
1633
1634 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1635 Retry object and activates it only if certain conditions are met.
1636 This class exists to provide safe defaults for RPC calls that are
1637 not technically safe to retry normally (due to potential data
1638 duplication or other side-effects) but become safe to retry if a
1639 condition such as if_metageneration_match is set.
1640
1641 See the retry.py source code and docstrings in this package
1642 (google.cloud.storage.retry) for information on retry types and how
1643 to configure them.
1644
1645 :type single_shot_download: bool
1646 :param single_shot_download:
1647 (Optional) If true, download the object in a single request.
1648 Caution: Enabling this will increase the memory overload for your application.
1649 Please enable this as per your use case.
1650
1651 :rtype: bytes
1652 :returns: The data stored in this blob.
1653
1654 :raises: :class:`google.cloud.exceptions.NotFound`
1655 """
1656 warnings.warn(
1657 _DOWNLOAD_AS_STRING_DEPRECATED,
1658 PendingDeprecationWarning,
1659 stacklevel=2,
1660 )
1661 with create_trace_span(name="Storage.Blob.downloadAsString"):
1662 return self.download_as_bytes(
1663 client=client,
1664 start=start,
1665 end=end,
1666 raw_download=raw_download,
1667 if_etag_match=if_etag_match,
1668 if_etag_not_match=if_etag_not_match,
1669 if_generation_match=if_generation_match,
1670 if_generation_not_match=if_generation_not_match,
1671 if_metageneration_match=if_metageneration_match,
1672 if_metageneration_not_match=if_metageneration_not_match,
1673 timeout=timeout,
1674 retry=retry,
1675 single_shot_download=single_shot_download,
1676 )
1677
1678 def download_as_text(
1679 self,
1680 client=None,
1681 start=None,
1682 end=None,
1683 raw_download=False,
1684 encoding=None,
1685 if_etag_match=None,
1686 if_etag_not_match=None,
1687 if_generation_match=None,
1688 if_generation_not_match=None,
1689 if_metageneration_match=None,
1690 if_metageneration_not_match=None,
1691 timeout=_DEFAULT_TIMEOUT,
1692 retry=DEFAULT_RETRY,
1693 single_shot_download=False,
1694 ):
1695 """Download the contents of this blob as text (*not* bytes).
1696
1697 If :attr:`user_project` is set on the bucket, bills the API request
1698 to that project.
1699
1700 :type client: :class:`~google.cloud.storage.client.Client`
1701 :param client:
1702 (Optional) The client to use. If not passed, falls back to the
1703 ``client`` stored on the blob's bucket.
1704
1705 :type start: int
1706 :param start: (Optional) The first byte in a range to be downloaded.
1707
1708 :type end: int
1709 :param end: (Optional) The last byte in a range to be downloaded.
1710
1711 :type raw_download: bool
1712 :param raw_download:
1713 (Optional) If true, download the object without any expansion.
1714
1715 :type encoding: str
1716 :param encoding: (Optional) encoding to be used to decode the
1717 downloaded bytes. Defaults to the ``charset`` param of
1718 attr:`content_type`, or else to "utf-8".
1719
1720 :type if_etag_match: Union[str, Set[str]]
1721 :param if_etag_match:
1722 (Optional) See :ref:`using-if-etag-match`
1723
1724 :type if_etag_not_match: Union[str, Set[str]]
1725 :param if_etag_not_match:
1726 (Optional) See :ref:`using-if-etag-not-match`
1727
1728 :type if_generation_match: long
1729 :param if_generation_match:
1730 (Optional) See :ref:`using-if-generation-match`
1731
1732 :type if_generation_not_match: long
1733 :param if_generation_not_match:
1734 (Optional) See :ref:`using-if-generation-not-match`
1735
1736 :type if_metageneration_match: long
1737 :param if_metageneration_match:
1738 (Optional) See :ref:`using-if-metageneration-match`
1739
1740 :type if_metageneration_not_match: long
1741 :param if_metageneration_not_match:
1742 (Optional) See :ref:`using-if-metageneration-not-match`
1743
1744 :type timeout: float or tuple
1745 :param timeout:
1746 (Optional) The amount of time, in seconds, to wait
1747 for the server response. See: :ref:`configuring_timeouts`
1748
1749 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1750 :param retry: (Optional) How to retry the RPC. A None value will disable
1751 retries. A google.api_core.retry.Retry value will enable retries,
1752 and the object will define retriable response codes and errors and
1753 configure backoff and timeout options.
1754
1755 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1756 Retry object and activates it only if certain conditions are met.
1757 This class exists to provide safe defaults for RPC calls that are
1758 not technically safe to retry normally (due to potential data
1759 duplication or other side-effects) but become safe to retry if a
1760 condition such as if_metageneration_match is set.
1761
1762 See the retry.py source code and docstrings in this package
1763 (google.cloud.storage.retry) for information on retry types and how
1764 to configure them.
1765
1766 :type single_shot_download: bool
1767 :param single_shot_download:
1768 (Optional) If true, download the object in a single request.
1769 Caution: Enabling this will increase the memory overload for your application.
1770 Please enable this as per your use case.
1771
1772 :rtype: text
1773 :returns: The data stored in this blob, decoded to text.
1774 """
1775 with create_trace_span(name="Storage.Blob.downloadAsText"):
1776 data = self.download_as_bytes(
1777 client=client,
1778 start=start,
1779 end=end,
1780 raw_download=raw_download,
1781 if_etag_match=if_etag_match,
1782 if_etag_not_match=if_etag_not_match,
1783 if_generation_match=if_generation_match,
1784 if_generation_not_match=if_generation_not_match,
1785 if_metageneration_match=if_metageneration_match,
1786 if_metageneration_not_match=if_metageneration_not_match,
1787 timeout=timeout,
1788 retry=retry,
1789 single_shot_download=single_shot_download,
1790 )
1791
1792 if encoding is not None:
1793 return data.decode(encoding)
1794
1795 if self.content_type is not None:
1796 msg = HeaderParser().parsestr("Content-Type: " + self.content_type)
1797 params = dict(msg.get_params()[1:])
1798 if "charset" in params:
1799 return data.decode(params["charset"])
1800
1801 return data.decode("utf-8")
1802
1803 def _get_content_type(self, content_type, filename=None):
1804 """Determine the content type from the current object.
1805
1806 The return value will be determined in order of precedence:
1807
1808 - The value passed in to this method (if not :data:`None`)
1809 - The value stored on the current blob
1810 - The default value ('application/octet-stream')
1811
1812 :type content_type: str
1813 :param content_type: (Optional) Type of content.
1814
1815 :type filename: str
1816 :param filename:
1817 (Optional) The name of the file where the content is stored.
1818
1819 :rtype: str
1820 :returns: Type of content gathered from the object.
1821 """
1822 if content_type is None:
1823 content_type = self.content_type
1824
1825 if content_type is None and filename is not None:
1826 content_type, _ = mimetypes.guess_type(filename)
1827
1828 if content_type is None:
1829 content_type = _DEFAULT_CONTENT_TYPE
1830
1831 return content_type
1832
1833 def _get_writable_metadata(self):
1834 """Get the object / blob metadata which is writable.
1835
1836 This is intended to be used when creating a new object / blob.
1837
1838 See the [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects)
1839 for more information, the fields marked as writable are:
1840
1841 * ``acl``
1842 * ``cacheControl``
1843 * ``contentDisposition``
1844 * ``contentEncoding``
1845 * ``contentLanguage``
1846 * ``contentType``
1847 * ``crc32c``
1848 * ``customTime``
1849 * ``md5Hash``
1850 * ``metadata``
1851 * ``name``
1852 * ``retention``
1853 * ``storageClass``
1854
1855 For now, we don't support ``acl``, access control lists should be
1856 managed directly through :class:`ObjectACL` methods.
1857 """
1858 # NOTE: This assumes `self.name` is unicode.
1859 object_metadata = {"name": self.name}
1860 for key in self._changes:
1861 if key in _WRITABLE_FIELDS:
1862 object_metadata[key] = self._properties[key]
1863
1864 return object_metadata
1865
1866 def _get_upload_arguments(self, client, content_type, filename=None, command=None):
1867 """Get required arguments for performing an upload.
1868
1869 The content type returned will be determined in order of precedence:
1870
1871 - The value passed in to this method (if not :data:`None`)
1872 - The value stored on the current blob
1873 - The default value ('application/octet-stream')
1874
1875 :type content_type: str
1876 :param content_type: Type of content being uploaded (or :data:`None`).
1877
1878 :type command: str
1879 :param command:
1880 (Optional) Information about which interface for upload was used,
1881 to be included in the X-Goog-API-Client header. Please leave as None
1882 unless otherwise directed.
1883
1884 :rtype: tuple
1885 :returns: A triple of
1886
1887 * A header dictionary
1888 * An object metadata dictionary
1889 * The ``content_type`` as a string (according to precedence)
1890 """
1891 content_type = self._get_content_type(content_type, filename=filename)
1892 # Add any client attached custom headers to the upload headers.
1893 headers = {
1894 **_get_default_headers(
1895 client._connection.user_agent, content_type, command=command
1896 ),
1897 **_get_encryption_headers(self._encryption_key),
1898 **client._extra_headers,
1899 }
1900 object_metadata = self._get_writable_metadata()
1901 return headers, object_metadata, content_type
1902
1903 def _do_multipart_upload(
1904 self,
1905 client,
1906 stream,
1907 content_type,
1908 size,
1909 predefined_acl,
1910 if_generation_match,
1911 if_generation_not_match,
1912 if_metageneration_match,
1913 if_metageneration_not_match,
1914 timeout=_DEFAULT_TIMEOUT,
1915 checksum="auto",
1916 retry=None,
1917 command=None,
1918 ):
1919 """Perform a multipart upload.
1920
1921 The content type of the upload will be determined in order
1922 of precedence:
1923
1924 - The value passed in to this method (if not :data:`None`)
1925 - The value stored on the current blob
1926 - The default value ('application/octet-stream')
1927
1928 :type client: :class:`~google.cloud.storage.client.Client`
1929 :param client:
1930 (Optional) The client to use. If not passed, falls back to the
1931 ``client`` stored on the blob's bucket.
1932
1933 :type stream: IO[bytes]
1934 :param stream: A bytes IO object open for reading.
1935
1936 :type content_type: str
1937 :param content_type: Type of content being uploaded (or :data:`None`).
1938
1939 :type size: int
1940 :param size:
1941 The number of bytes to be uploaded (which will be read from
1942 ``stream``). If not provided, the upload will be concluded once
1943 ``stream`` is exhausted (or :data:`None`).
1944
1945 :type predefined_acl: str
1946 :param predefined_acl: (Optional) Predefined access control list
1947
1948 :type if_generation_match: long
1949 :param if_generation_match:
1950 (Optional) See :ref:`using-if-generation-match`
1951
1952 :type if_generation_not_match: long
1953 :param if_generation_not_match:
1954 (Optional) See :ref:`using-if-generation-not-match`
1955
1956 :type if_metageneration_match: long
1957 :param if_metageneration_match:
1958 (Optional) See :ref:`using-if-metageneration-match`
1959
1960 :type if_metageneration_not_match: long
1961 :param if_metageneration_not_match:
1962 (Optional) See :ref:`using-if-metageneration-not-match`
1963
1964 :type timeout: float or tuple
1965 :param timeout:
1966 (Optional) The amount of time, in seconds, to wait
1967 for the server response. See: :ref:`configuring_timeouts`
1968
1969 :type checksum: str
1970 :param checksum:
1971 (Optional) The type of checksum to compute to verify
1972 the integrity of the object. The request metadata will be amended
1973 to include the computed value. Using this option will override a
1974 manually-set checksum value. Supported values are "md5", "crc32c",
1975 "auto" and None. The default is "auto", which will try to detect if
1976 the C extension for crc32c is installed and fall back to md5
1977 otherwise.
1978 :type retry: google.api_core.retry.Retry
1979 :param retry: (Optional) How to retry the RPC. A None value will disable
1980 retries. A google.api_core.retry.Retry value will enable retries,
1981 and the object will configure backoff and timeout options.
1982
1983 This private method does not accept ConditionalRetryPolicy values
1984 because the information necessary to evaluate the policy is instead
1985 evaluated in blob._do_upload().
1986
1987 See the retry.py source code and docstrings in this package
1988 (google.cloud.storage.retry) for information on retry types and how
1989 to configure them.
1990
1991 :type command: str
1992 :param command:
1993 (Optional) Information about which interface for upload was used,
1994 to be included in the X-Goog-API-Client header. Please leave as None
1995 unless otherwise directed.
1996
1997 :rtype: :class:`~requests.Response`
1998 :returns: The "200 OK" response object returned after the multipart
1999 upload request.
2000 :raises: :exc:`ValueError` if ``size`` is not :data:`None` but the
2001 ``stream`` has fewer than ``size`` bytes remaining.
2002 """
2003 if size is None:
2004 data = stream.read()
2005 else:
2006 data = stream.read(size)
2007 if len(data) < size:
2008 msg = _READ_LESS_THAN_SIZE.format(size, len(data))
2009 raise ValueError(msg)
2010
2011 client = self._require_client(client)
2012 transport = self._get_transport(client)
2013 if "metadata" in self._properties and "metadata" not in self._changes:
2014 self._changes.add("metadata")
2015
2016 info = self._get_upload_arguments(client, content_type, command=command)
2017 headers, object_metadata, content_type = info
2018
2019 if "crc32c" in self._properties:
2020 object_metadata["crc32c"] = self._properties["crc32c"]
2021
2022 hostname = _get_host_name(client._connection)
2023 base_url = _MULTIPART_URL_TEMPLATE.format(
2024 hostname=hostname,
2025 bucket_path=self.bucket.path,
2026 api_version=_API_VERSION,
2027 )
2028 name_value_pairs = []
2029
2030 if self.user_project is not None:
2031 name_value_pairs.append(("userProject", self.user_project))
2032
2033 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
2034 # at rest, object resource metadata will store the version of the Key Management
2035 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
2036 # used to upload a new version of the object then the existing kmsKeyName version
2037 # value can't be used in the upload request and the client instead ignores it.
2038 if (
2039 self.kms_key_name is not None
2040 and "cryptoKeyVersions" not in self.kms_key_name
2041 ):
2042 name_value_pairs.append(("kmsKeyName", self.kms_key_name))
2043
2044 if predefined_acl is not None:
2045 name_value_pairs.append(("predefinedAcl", predefined_acl))
2046
2047 if if_generation_match is not None:
2048 name_value_pairs.append(("ifGenerationMatch", if_generation_match))
2049
2050 if if_generation_not_match is not None:
2051 name_value_pairs.append(("ifGenerationNotMatch", if_generation_not_match))
2052
2053 if if_metageneration_match is not None:
2054 name_value_pairs.append(("ifMetagenerationMatch", if_metageneration_match))
2055
2056 if if_metageneration_not_match is not None:
2057 name_value_pairs.append(
2058 ("ifMetaGenerationNotMatch", if_metageneration_not_match)
2059 )
2060
2061 upload_url = _add_query_parameters(base_url, name_value_pairs)
2062 upload = MultipartUpload(
2063 upload_url, headers=headers, checksum=checksum, retry=retry
2064 )
2065
2066 extra_attributes = {
2067 "url.full": upload_url,
2068 "upload.checksum": f"{checksum}",
2069 }
2070 args = {"timeout": timeout}
2071 with create_trace_span(
2072 name="Storage.MultipartUpload/transmit",
2073 attributes=extra_attributes,
2074 client=client,
2075 api_request=args,
2076 ):
2077 response = upload.transmit(
2078 transport, data, object_metadata, content_type, timeout=timeout
2079 )
2080
2081 return response
2082
2083 def _initiate_resumable_upload(
2084 self,
2085 client,
2086 stream,
2087 content_type,
2088 size,
2089 predefined_acl=None,
2090 extra_headers=None,
2091 chunk_size=None,
2092 if_generation_match=None,
2093 if_generation_not_match=None,
2094 if_metageneration_match=None,
2095 if_metageneration_not_match=None,
2096 timeout=_DEFAULT_TIMEOUT,
2097 checksum="auto",
2098 retry=None,
2099 command=None,
2100 ):
2101 """Initiate a resumable upload.
2102
2103 The content type of the upload will be determined in order
2104 of precedence:
2105
2106 - The value passed in to this method (if not :data:`None`)
2107 - The value stored on the current blob
2108 - The default value ('application/octet-stream')
2109
2110 :type client: :class:`~google.cloud.storage.client.Client`
2111 :param client:
2112 (Optional) The client to use. If not passed, falls back to the
2113 ``client`` stored on the blob's bucket.
2114
2115 :type stream: IO[bytes]
2116 :param stream: A bytes IO object open for reading.
2117
2118 :type content_type: str
2119 :param content_type: Type of content being uploaded (or :data:`None`).
2120
2121 :type size: int
2122 :param size:
2123 The number of bytes to be uploaded (which will be read from
2124 ``stream``). If not provided, the upload will be concluded once
2125 ``stream`` is exhausted (or :data:`None`).
2126
2127 :type predefined_acl: str
2128 :param predefined_acl: (Optional) Predefined access control list
2129
2130 :type extra_headers: dict
2131 :param extra_headers:
2132 (Optional) Extra headers to add to standard headers.
2133
2134 :type chunk_size: int
2135 :param chunk_size:
2136 (Optional) Chunk size to use when creating a
2137 :class:`~google.cloud.storage._media.requests.ResumableUpload`.
2138 If not passed, will fall back to the chunk size on the
2139 current blob, if the chunk size of a current blob is also
2140 `None`, will set the default value.
2141 The default value of ``chunk_size`` is 100 MB.
2142
2143 :type if_generation_match: long
2144 :param if_generation_match:
2145 (Optional) See :ref:`using-if-generation-match`
2146
2147 :type if_generation_not_match: long
2148 :param if_generation_not_match:
2149 (Optional) See :ref:`using-if-generation-not-match`
2150
2151 :type if_metageneration_match: long
2152 :param if_metageneration_match:
2153 (Optional) See :ref:`using-if-metageneration-match`
2154
2155 :type if_metageneration_not_match: long
2156 :param if_metageneration_not_match:
2157 (Optional) See :ref:`using-if-metageneration-not-match`
2158
2159 :type timeout: float or tuple
2160 :param timeout:
2161 (Optional) The amount of time, in seconds, to wait
2162 for the server response. See: :ref:`configuring_timeouts`
2163
2164 :type checksum: str
2165 :param checksum:
2166 (Optional) The type of checksum to compute to verify
2167 the integrity of the object. After the upload is complete, the
2168 server-computed checksum of the resulting object will be checked
2169 and google.cloud.storage.exceptions.DataCorruption will be raised on
2170 a mismatch. On a validation failure, the client will attempt to
2171 delete the uploaded object automatically. Supported values are
2172 "md5", "crc32c", "auto" and None. The default is "auto", which will
2173 try to detect if the C extension for crc32c is installed and fall
2174 back to md5 otherwise.
2175
2176 :type retry: google.api_core.retry.Retry
2177 :param retry: (Optional) How to retry the RPC. A None value will disable
2178 retries. A google.api_core.retry.Retry value will enable retries,
2179 and the object will configure backoff and timeout options.
2180
2181 This private method does not accept ConditionalRetryPolicy values
2182 because the information necessary to evaluate the policy is instead
2183 evaluated in blob._do_upload().
2184
2185 See the retry.py source code and docstrings in this package
2186 (google.cloud.storage.retry) for information on retry types and how
2187 to configure them.
2188
2189 :type command: str
2190 :param command:
2191 (Optional) Information about which interface for upload was used,
2192 to be included in the X-Goog-API-Client header. Please leave as None
2193 unless otherwise directed.
2194
2195 :rtype: tuple
2196 :returns:
2197 Pair of
2198
2199 * The :class:`~google.cloud.storage._media.requests.ResumableUpload`
2200 that was created
2201 * The ``transport`` used to initiate the upload.
2202 """
2203 client = self._require_client(client)
2204 if chunk_size is None:
2205 chunk_size = self.chunk_size
2206 if chunk_size is None:
2207 chunk_size = _DEFAULT_CHUNKSIZE
2208
2209 transport = self._get_transport(client)
2210 if "metadata" in self._properties and "metadata" not in self._changes:
2211 self._changes.add("metadata")
2212 info = self._get_upload_arguments(client, content_type, command=command)
2213 headers, object_metadata, content_type = info
2214 if extra_headers is not None:
2215 headers.update(extra_headers)
2216
2217 if "crc32c" in self._properties:
2218 object_metadata["crc32c"] = self._properties["crc32c"]
2219
2220 hostname = _get_host_name(client._connection)
2221 base_url = _RESUMABLE_URL_TEMPLATE.format(
2222 hostname=hostname,
2223 bucket_path=self.bucket.path,
2224 api_version=_API_VERSION,
2225 )
2226 name_value_pairs = []
2227
2228 if self.user_project is not None:
2229 name_value_pairs.append(("userProject", self.user_project))
2230
2231 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
2232 # at rest, object resource metadata will store the version of the Key Management
2233 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
2234 # used to upload a new version of the object then the existing kmsKeyName version
2235 # value can't be used in the upload request and the client instead ignores it.
2236 if (
2237 self.kms_key_name is not None
2238 and "cryptoKeyVersions" not in self.kms_key_name
2239 ):
2240 name_value_pairs.append(("kmsKeyName", self.kms_key_name))
2241
2242 if predefined_acl is not None:
2243 name_value_pairs.append(("predefinedAcl", predefined_acl))
2244
2245 if if_generation_match is not None:
2246 name_value_pairs.append(("ifGenerationMatch", if_generation_match))
2247
2248 if if_generation_not_match is not None:
2249 name_value_pairs.append(("ifGenerationNotMatch", if_generation_not_match))
2250
2251 if if_metageneration_match is not None:
2252 name_value_pairs.append(("ifMetagenerationMatch", if_metageneration_match))
2253
2254 if if_metageneration_not_match is not None:
2255 name_value_pairs.append(
2256 ("ifMetaGenerationNotMatch", if_metageneration_not_match)
2257 )
2258
2259 upload_url = _add_query_parameters(base_url, name_value_pairs)
2260 upload = ResumableUpload(
2261 upload_url,
2262 chunk_size,
2263 headers=headers,
2264 checksum=checksum,
2265 retry=retry,
2266 )
2267
2268 upload.initiate(
2269 transport,
2270 stream,
2271 object_metadata,
2272 content_type,
2273 total_bytes=size,
2274 stream_final=False,
2275 timeout=timeout,
2276 )
2277
2278 return upload, transport
2279
2280 def _do_resumable_upload(
2281 self,
2282 client,
2283 stream,
2284 content_type,
2285 size,
2286 predefined_acl,
2287 if_generation_match,
2288 if_generation_not_match,
2289 if_metageneration_match,
2290 if_metageneration_not_match,
2291 timeout=_DEFAULT_TIMEOUT,
2292 checksum="auto",
2293 retry=None,
2294 command=None,
2295 ):
2296 """Perform a resumable upload.
2297
2298 Assumes ``chunk_size`` is not :data:`None` on the current blob.
2299 The default value of ``chunk_size`` is 100 MB.
2300
2301 The content type of the upload will be determined in order
2302 of precedence:
2303
2304 - The value passed in to this method (if not :data:`None`)
2305 - The value stored on the current blob
2306 - The default value ('application/octet-stream')
2307
2308 :type client: :class:`~google.cloud.storage.client.Client`
2309 :param client:
2310 (Optional) The client to use. If not passed, falls back to the
2311 ``client`` stored on the blob's bucket.
2312
2313 :type stream: IO[bytes]
2314 :param stream: A bytes IO object open for reading.
2315
2316 :type content_type: str
2317 :param content_type: Type of content being uploaded (or :data:`None`).
2318
2319 :type size: int
2320 :param size:
2321 The number of bytes to be uploaded (which will be read from
2322 ``stream``). If not provided, the upload will be concluded once
2323 ``stream`` is exhausted (or :data:`None`).
2324
2325 :type predefined_acl: str
2326 :param predefined_acl: (Optional) Predefined access control list
2327
2328 :type if_generation_match: long
2329 :param if_generation_match:
2330 (Optional) See :ref:`using-if-generation-match`
2331
2332 :type if_generation_not_match: long
2333 :param if_generation_not_match:
2334 (Optional) See :ref:`using-if-generation-not-match`
2335
2336 :type if_metageneration_match: long
2337 :param if_metageneration_match:
2338 (Optional) See :ref:`using-if-metageneration-match`
2339
2340 :type if_metageneration_not_match: long
2341 :param if_metageneration_not_match:
2342 (Optional) See :ref:`using-if-metageneration-not-match`
2343
2344 :type timeout: float or tuple
2345 :param timeout:
2346 (Optional) The amount of time, in seconds, to wait
2347 for the server response. See: :ref:`configuring_timeouts`
2348
2349 :type checksum: str
2350 :param checksum:
2351 (Optional) The type of checksum to compute to verify
2352 the integrity of the object. After the upload is complete, the
2353 server-computed checksum of the resulting object will be checked
2354 and google.cloud.storage.exceptions.DataCorruption will be raised on
2355 a mismatch. On a validation failure, the client will attempt to
2356 delete the uploaded object automatically. Supported values are
2357 "md5", "crc32c", "auto" and None. The default is "auto", which will
2358 try to detect if the C extension for crc32c is installed and fall
2359 back to md5 otherwise.
2360
2361 :type retry: google.api_core.retry.Retry
2362 :param retry: (Optional) How to retry the RPC. A None value will disable
2363 retries. A google.api_core.retry.Retry value will enable retries,
2364 and the object will configure backoff and timeout options.
2365
2366 This private method does not accept ConditionalRetryPolicy values
2367 because the information necessary to evaluate the policy is instead
2368 evaluated in blob._do_upload().
2369
2370 See the retry.py source code and docstrings in this package
2371 (google.cloud.storage.retry) for information on retry types and how
2372 to configure them.
2373
2374 :type command: str
2375 :param command:
2376 (Optional) Information about which interface for upload was used,
2377 to be included in the X-Goog-API-Client header. Please leave as None
2378 unless otherwise directed.
2379
2380 :rtype: :class:`~requests.Response`
2381 :returns: The "200 OK" response object returned after the final chunk
2382 is uploaded.
2383 """
2384 upload, transport = self._initiate_resumable_upload(
2385 client,
2386 stream,
2387 content_type,
2388 size,
2389 predefined_acl=predefined_acl,
2390 if_generation_match=if_generation_match,
2391 if_generation_not_match=if_generation_not_match,
2392 if_metageneration_match=if_metageneration_match,
2393 if_metageneration_not_match=if_metageneration_not_match,
2394 timeout=timeout,
2395 checksum=checksum,
2396 retry=retry,
2397 command=command,
2398 )
2399 extra_attributes = {
2400 "url.full": upload.resumable_url,
2401 "upload.chunk_size": upload.chunk_size,
2402 "upload.checksum": f"{checksum}",
2403 }
2404 args = {"timeout": timeout}
2405 with create_trace_span(
2406 name="Storage.ResumableUpload/transmitNextChunk",
2407 attributes=extra_attributes,
2408 client=client,
2409 api_request=args,
2410 ):
2411 while not upload.finished:
2412 try:
2413 response = upload.transmit_next_chunk(transport, timeout=timeout)
2414 except DataCorruption:
2415 # Attempt to delete the corrupted object.
2416 self.delete()
2417 raise
2418 return response
2419
2420 def _do_upload(
2421 self,
2422 client,
2423 stream,
2424 content_type,
2425 size,
2426 predefined_acl,
2427 if_generation_match,
2428 if_generation_not_match,
2429 if_metageneration_match,
2430 if_metageneration_not_match,
2431 timeout=_DEFAULT_TIMEOUT,
2432 checksum="auto",
2433 retry=None,
2434 command=None,
2435 ):
2436 """Determine an upload strategy and then perform the upload.
2437
2438 If the size of the data to be uploaded exceeds 8 MB a resumable media
2439 request will be used, otherwise the content and the metadata will be
2440 uploaded in a single multipart upload request.
2441
2442 The content type of the upload will be determined in order
2443 of precedence:
2444
2445 - The value passed in to this method (if not :data:`None`)
2446 - The value stored on the current blob
2447 - The default value ('application/octet-stream')
2448
2449 :type client: :class:`~google.cloud.storage.client.Client`
2450 :param client:
2451 (Optional) The client to use. If not passed, falls back to the
2452 ``client`` stored on the blob's bucket.
2453
2454 :type stream: IO[bytes]
2455 :param stream: A bytes IO object open for reading.
2456
2457 :type content_type: str
2458 :param content_type: Type of content being uploaded (or :data:`None`).
2459
2460 :type size: int
2461 :param size:
2462 The number of bytes to be uploaded (which will be read from
2463 ``stream``). If not provided, the upload will be concluded once
2464 ``stream`` is exhausted (or :data:`None`).
2465
2466 :type predefined_acl: str
2467 :param predefined_acl: (Optional) Predefined access control list
2468
2469 :type if_generation_match: long
2470 :param if_generation_match:
2471 (Optional) See :ref:`using-if-generation-match`
2472
2473 :type if_generation_not_match: long
2474 :param if_generation_not_match:
2475 (Optional) See :ref:`using-if-generation-not-match`
2476
2477 :type if_metageneration_match: long
2478 :param if_metageneration_match:
2479 (Optional) See :ref:`using-if-metageneration-match`
2480
2481 :type if_metageneration_not_match: long
2482 :param if_metageneration_not_match:
2483 (Optional) See :ref:`using-if-metageneration-not-match`
2484
2485 :type timeout: float or tuple
2486 :param timeout:
2487 (Optional) The amount of time, in seconds, to wait
2488 for the server response. See: :ref:`configuring_timeouts`
2489
2490 :type checksum: str
2491 :param checksum:
2492 (Optional) The type of checksum to compute to verify
2493 the integrity of the object. If the upload is completed in a single
2494 request, the checksum will be entirely precomputed and the remote
2495 server will handle verification and error handling. If the upload
2496 is too large and must be transmitted in multiple requests, the
2497 checksum will be incrementally computed and the client will handle
2498 verification and error handling, raising
2499 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2500 attempting to delete the corrupted file. Supported values are
2501 "md5", "crc32c", "auto" and None. The default is "auto", which will
2502 try to detect if the C extension for crc32c is installed and fall
2503 back to md5 otherwise.
2504
2505 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2506 :param retry: (Optional) How to retry the RPC. A None value will disable
2507 retries. A google.api_core.retry.Retry value will enable retries,
2508 and the object will define retriable response codes and errors and
2509 configure backoff and timeout options.
2510
2511 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2512 Retry object and activates it only if certain conditions are met.
2513 This class exists to provide safe defaults for RPC calls that are
2514 not technically safe to retry normally (due to potential data
2515 duplication or other side-effects) but become safe to retry if a
2516 condition such as if_generation_match is set.
2517
2518 See the retry.py source code and docstrings in this package
2519 (google.cloud.storage.retry) for information on retry types and how
2520 to configure them.
2521
2522 :type command: str
2523 :param command:
2524 (Optional) Information about which interface for upload was used,
2525 to be included in the X-Goog-API-Client header. Please leave as None
2526 unless otherwise directed.
2527
2528 :rtype: dict
2529 :returns: The parsed JSON from the "200 OK" response. This will be the
2530 **only** response in the multipart case and it will be the
2531 **final** response in the resumable case.
2532 """
2533
2534 # Handle ConditionalRetryPolicy.
2535 if isinstance(retry, ConditionalRetryPolicy):
2536 # Conditional retries are designed for non-media calls, which change
2537 # arguments into query_params dictionaries. Media operations work
2538 # differently, so here we make a "fake" query_params to feed to the
2539 # ConditionalRetryPolicy.
2540 query_params = {
2541 "ifGenerationMatch": if_generation_match,
2542 "ifMetagenerationMatch": if_metageneration_match,
2543 }
2544 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
2545
2546 if size is not None and size <= _MAX_MULTIPART_SIZE:
2547 response = self._do_multipart_upload(
2548 client,
2549 stream,
2550 content_type,
2551 size,
2552 predefined_acl,
2553 if_generation_match,
2554 if_generation_not_match,
2555 if_metageneration_match,
2556 if_metageneration_not_match,
2557 timeout=timeout,
2558 checksum=checksum,
2559 retry=retry,
2560 command=command,
2561 )
2562 else:
2563 response = self._do_resumable_upload(
2564 client,
2565 stream,
2566 content_type,
2567 size,
2568 predefined_acl,
2569 if_generation_match,
2570 if_generation_not_match,
2571 if_metageneration_match,
2572 if_metageneration_not_match,
2573 timeout=timeout,
2574 checksum=checksum,
2575 retry=retry,
2576 command=command,
2577 )
2578
2579 return response.json()
2580
2581 def _prep_and_do_upload(
2582 self,
2583 file_obj,
2584 rewind=False,
2585 size=None,
2586 content_type=None,
2587 client=None,
2588 predefined_acl=None,
2589 if_generation_match=None,
2590 if_generation_not_match=None,
2591 if_metageneration_match=None,
2592 if_metageneration_not_match=None,
2593 timeout=_DEFAULT_TIMEOUT,
2594 checksum="auto",
2595 retry=DEFAULT_RETRY,
2596 command=None,
2597 ):
2598 """Upload the contents of this blob from a file-like object.
2599
2600 The content type of the upload will be determined in order
2601 of precedence:
2602
2603 - The value passed in to this method (if not :data:`None`)
2604 - The value stored on the current blob
2605 - The default value ('application/octet-stream')
2606
2607 .. note::
2608 The effect of uploading to an existing blob depends on the
2609 "versioning" and "lifecycle" policies defined on the blob's
2610 bucket. In the absence of those policies, upload will
2611 overwrite any existing contents.
2612
2613 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2614 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2615 API documents for details.
2616
2617 If the size of the data to be uploaded exceeds 8 MB a resumable media
2618 request will be used, otherwise the content and the metadata will be
2619 uploaded in a single multipart upload request.
2620
2621 For more fine-grained over the upload process, check out
2622 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
2623
2624 If :attr:`user_project` is set on the bucket, bills the API request
2625 to that project.
2626
2627 :type file_obj: file
2628 :param file_obj: A file handle opened in binary mode for reading.
2629
2630 :type rewind: bool
2631 :param rewind:
2632 If True, seek to the beginning of the file handle before writing
2633 the file to Cloud Storage.
2634
2635 :type size: int
2636 :param size:
2637 The number of bytes to be uploaded (which will be read from
2638 ``file_obj``). If not provided, the upload will be concluded once
2639 ``file_obj`` is exhausted.
2640
2641 :type content_type: str
2642 :param content_type: (Optional) Type of content being uploaded.
2643
2644 :type client: :class:`~google.cloud.storage.client.Client`
2645 :param client:
2646 (Optional) The client to use. If not passed, falls back to the
2647 ``client`` stored on the blob's bucket.
2648
2649 :type predefined_acl: str
2650 :param predefined_acl: (Optional) Predefined access control list
2651
2652 :type if_generation_match: long
2653 :param if_generation_match:
2654 (Optional) See :ref:`using-if-generation-match`
2655
2656 :type if_generation_not_match: long
2657 :param if_generation_not_match:
2658 (Optional) See :ref:`using-if-generation-not-match`
2659
2660 :type if_metageneration_match: long
2661 :param if_metageneration_match:
2662 (Optional) See :ref:`using-if-metageneration-match`
2663
2664 :type if_metageneration_not_match: long
2665 :param if_metageneration_not_match:
2666 (Optional) See :ref:`using-if-metageneration-not-match`
2667
2668 :type timeout: float or tuple
2669 :param timeout:
2670 (Optional) The amount of time, in seconds, to wait
2671 for the server response. See: :ref:`configuring_timeouts`
2672
2673 :type checksum: str
2674 :param checksum:
2675 (Optional) The type of checksum to compute to verify
2676 the integrity of the object. If the upload is completed in a single
2677 request, the checksum will be entirely precomputed and the remote
2678 server will handle verification and error handling. If the upload
2679 is too large and must be transmitted in multiple requests, the
2680 checksum will be incrementally computed and the client will handle
2681 verification and error handling, raising
2682 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2683 attempting to delete the corrupted file. Supported values are
2684 "md5", "crc32c", "auto" and None. The default is "auto", which will
2685 try to detect if the C extension for crc32c is installed and fall
2686 back to md5 otherwise.
2687
2688 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2689 :param retry: (Optional) How to retry the RPC. A None value will disable
2690 retries. A google.api_core.retry.Retry value will enable retries,
2691 and the object will define retriable response codes and errors and
2692 configure backoff and timeout options.
2693
2694 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2695 Retry object and activates it only if certain conditions are met.
2696 This class exists to provide safe defaults for RPC calls that are
2697 not technically safe to retry normally (due to potential data
2698 duplication or other side-effects) but become safe to retry if a
2699 condition such as if_generation_match is set.
2700
2701 See the retry.py source code and docstrings in this package
2702 (google.cloud.storage.retry) for information on retry types and how
2703 to configure them.
2704
2705 :type command: str
2706 :param command:
2707 (Optional) Information about which interface for upload was used,
2708 to be included in the X-Goog-API-Client header. Please leave as None
2709 unless otherwise directed.
2710
2711 :raises: :class:`~google.cloud.exceptions.GoogleCloudError`
2712 if the upload response returns an error status.
2713 """
2714 _maybe_rewind(file_obj, rewind=rewind)
2715 predefined_acl = ACL.validate_predefined(predefined_acl)
2716
2717 try:
2718 created_json = self._do_upload(
2719 client,
2720 file_obj,
2721 content_type,
2722 size,
2723 predefined_acl,
2724 if_generation_match,
2725 if_generation_not_match,
2726 if_metageneration_match,
2727 if_metageneration_not_match,
2728 timeout=timeout,
2729 checksum=checksum,
2730 retry=retry,
2731 command=command,
2732 )
2733 self._set_properties(created_json)
2734 except InvalidResponse as exc:
2735 _raise_from_invalid_response(exc)
2736
2737 def upload_from_file(
2738 self,
2739 file_obj,
2740 rewind=False,
2741 size=None,
2742 content_type=None,
2743 client=None,
2744 predefined_acl=None,
2745 if_generation_match=None,
2746 if_generation_not_match=None,
2747 if_metageneration_match=None,
2748 if_metageneration_not_match=None,
2749 timeout=_DEFAULT_TIMEOUT,
2750 checksum="auto",
2751 retry=DEFAULT_RETRY,
2752 ):
2753 """Upload the contents of this blob from a file-like object.
2754
2755 The content type of the upload will be determined in order
2756 of precedence:
2757
2758 - The value passed in to this method (if not :data:`None`)
2759 - The value stored on the current blob
2760 - The default value ('application/octet-stream')
2761
2762 .. note::
2763 The effect of uploading to an existing blob depends on the
2764 "versioning" and "lifecycle" policies defined on the blob's
2765 bucket. In the absence of those policies, upload will
2766 overwrite any existing contents.
2767
2768 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2769 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2770 API documents for details.
2771
2772 If the size of the data to be uploaded exceeds 8 MB a resumable media
2773 request will be used, otherwise the content and the metadata will be
2774 uploaded in a single multipart upload request.
2775
2776 For more fine-grained over the upload process, check out
2777 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
2778
2779 If :attr:`user_project` is set on the bucket, bills the API request
2780 to that project.
2781
2782 :type file_obj: file
2783 :param file_obj: A file handle opened in binary mode for reading.
2784
2785 :type rewind: bool
2786 :param rewind:
2787 If True, seek to the beginning of the file handle before writing
2788 the file to Cloud Storage.
2789
2790 :type size: int
2791 :param size:
2792 The number of bytes to be uploaded (which will be read from
2793 ``file_obj``). If not provided, the upload will be concluded once
2794 ``file_obj`` is exhausted.
2795
2796 :type content_type: str
2797 :param content_type: (Optional) Type of content being uploaded.
2798
2799 :type client: :class:`~google.cloud.storage.client.Client`
2800 :param client:
2801 (Optional) The client to use. If not passed, falls back to the
2802 ``client`` stored on the blob's bucket.
2803
2804 :type predefined_acl: str
2805 :param predefined_acl: (Optional) Predefined access control list
2806
2807 :type if_generation_match: long
2808 :param if_generation_match:
2809 (Optional) See :ref:`using-if-generation-match`
2810
2811 :type if_generation_not_match: long
2812 :param if_generation_not_match:
2813 (Optional) See :ref:`using-if-generation-not-match`
2814
2815 :type if_metageneration_match: long
2816 :param if_metageneration_match:
2817 (Optional) See :ref:`using-if-metageneration-match`
2818
2819 :type if_metageneration_not_match: long
2820 :param if_metageneration_not_match:
2821 (Optional) See :ref:`using-if-metageneration-not-match`
2822
2823 :type timeout: float or tuple
2824 :param timeout:
2825 (Optional) The amount of time, in seconds, to wait
2826 for the server response. See: :ref:`configuring_timeouts`
2827
2828 :type checksum: str
2829 :param checksum:
2830 (Optional) The type of checksum to compute to verify
2831 the integrity of the object. If the upload is completed in a single
2832 request, the checksum will be entirely precomputed and the remote
2833 server will handle verification and error handling. If the upload
2834 is too large and must be transmitted in multiple requests, the
2835 checksum will be incrementally computed and the client will handle
2836 verification and error handling, raising
2837 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2838 attempting to delete the corrupted file. Supported values are
2839 "md5", "crc32c", "auto" and None. The default is "auto", which will
2840 try to detect if the C extension for crc32c is installed and fall
2841 back to md5 otherwise.
2842
2843 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2844 :param retry: (Optional) How to retry the RPC. A None value will disable
2845 retries. A google.api_core.retry.Retry value will enable retries,
2846 and the object will define retriable response codes and errors and
2847 configure backoff and timeout options.
2848
2849 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2850 Retry object and activates it only if certain conditions are met.
2851 This class exists to provide safe defaults for RPC calls that are
2852 not technically safe to retry normally (due to potential data
2853 duplication or other side-effects) but become safe to retry if a
2854 condition such as if_generation_match is set.
2855
2856 See the retry.py source code and docstrings in this package
2857 (google.cloud.storage.retry) for information on retry types and how
2858 to configure them.
2859
2860 :raises: :class:`~google.cloud.exceptions.GoogleCloudError`
2861 if the upload response returns an error status.
2862 """
2863 with create_trace_span(name="Storage.Blob.uploadFromFile"):
2864 self._prep_and_do_upload(
2865 file_obj,
2866 rewind=rewind,
2867 size=size,
2868 content_type=content_type,
2869 client=client,
2870 predefined_acl=predefined_acl,
2871 if_generation_match=if_generation_match,
2872 if_generation_not_match=if_generation_not_match,
2873 if_metageneration_match=if_metageneration_match,
2874 if_metageneration_not_match=if_metageneration_not_match,
2875 timeout=timeout,
2876 checksum=checksum,
2877 retry=retry,
2878 )
2879
2880 def _handle_filename_and_upload(self, filename, content_type=None, *args, **kwargs):
2881 """Upload this blob's contents from the content of a named file.
2882
2883 :type filename: str
2884 :param filename: The path to the file.
2885
2886 :type content_type: str
2887 :param content_type: (Optional) Type of content being uploaded.
2888
2889 For *args and **kwargs, refer to the documentation for upload_from_filename() for more information.
2890 """
2891
2892 content_type = self._get_content_type(content_type, filename=filename)
2893
2894 with open(filename, "rb") as file_obj:
2895 total_bytes = os.fstat(file_obj.fileno()).st_size
2896 self._prep_and_do_upload(
2897 file_obj,
2898 content_type=content_type,
2899 size=total_bytes,
2900 *args,
2901 **kwargs,
2902 )
2903
2904 def upload_from_filename(
2905 self,
2906 filename,
2907 content_type=None,
2908 client=None,
2909 predefined_acl=None,
2910 if_generation_match=None,
2911 if_generation_not_match=None,
2912 if_metageneration_match=None,
2913 if_metageneration_not_match=None,
2914 timeout=_DEFAULT_TIMEOUT,
2915 checksum="auto",
2916 retry=DEFAULT_RETRY,
2917 ):
2918 """Upload this blob's contents from the content of a named file.
2919
2920 The content type of the upload will be determined in order
2921 of precedence:
2922
2923 - The value passed in to this method (if not :data:`None`)
2924 - The value stored on the current blob
2925 - The value given by ``mimetypes.guess_type``
2926 - The default value ('application/octet-stream')
2927
2928 .. note::
2929 The effect of uploading to an existing blob depends on the
2930 "versioning" and "lifecycle" policies defined on the blob's
2931 bucket. In the absence of those policies, upload will
2932 overwrite any existing contents.
2933
2934 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2935 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2936 API documents for details.
2937
2938 If :attr:`user_project` is set on the bucket, bills the API request
2939 to that project.
2940
2941 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-upload-encrypted-file#storage_upload_encrypted_file-python)
2942 to upload a file with a
2943 [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied).
2944
2945 :type filename: str
2946 :param filename: The path to the file.
2947
2948 :type content_type: str
2949 :param content_type: (Optional) Type of content being uploaded.
2950
2951 :type client: :class:`~google.cloud.storage.client.Client`
2952 :param client:
2953 (Optional) The client to use. If not passed, falls back to the
2954 ``client`` stored on the blob's bucket.
2955
2956 :type predefined_acl: str
2957 :param predefined_acl: (Optional) Predefined access control list
2958
2959 :type if_generation_match: long
2960 :param if_generation_match:
2961 (Optional) See :ref:`using-if-generation-match`
2962
2963 :type if_generation_not_match: long
2964 :param if_generation_not_match:
2965 (Optional) See :ref:`using-if-generation-not-match`
2966
2967 :type if_metageneration_match: long
2968 :param if_metageneration_match:
2969 (Optional) See :ref:`using-if-metageneration-match`
2970
2971 :type if_metageneration_not_match: long
2972 :param if_metageneration_not_match:
2973 (Optional) See :ref:`using-if-metageneration-not-match`
2974
2975 :type timeout: float or tuple
2976 :param timeout:
2977 (Optional) The amount of time, in seconds, to wait
2978 for the server response. See: :ref:`configuring_timeouts`
2979
2980 :type checksum: str
2981 :param checksum:
2982 (Optional) The type of checksum to compute to verify
2983 the integrity of the object. If the upload is completed in a single
2984 request, the checksum will be entirely precomputed and the remote
2985 server will handle verification and error handling. If the upload
2986 is too large and must be transmitted in multiple requests, the
2987 checksum will be incrementally computed and the client will handle
2988 verification and error handling, raising
2989 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2990 attempting to delete the corrupted file. Supported values are
2991 "md5", "crc32c", "auto" and None. The default is "auto", which will
2992 try to detect if the C extension for crc32c is installed and fall
2993 back to md5 otherwise.
2994
2995 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2996 :param retry: (Optional) How to retry the RPC. A None value will disable
2997 retries. A google.api_core.retry.Retry value will enable retries,
2998 and the object will define retriable response codes and errors and
2999 configure backoff and timeout options.
3000
3001 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3002 Retry object and activates it only if certain conditions are met.
3003 This class exists to provide safe defaults for RPC calls that are
3004 not technically safe to retry normally (due to potential data
3005 duplication or other side-effects) but become safe to retry if a
3006 condition such as if_generation_match is set.
3007
3008 See the retry.py source code and docstrings in this package
3009 (google.cloud.storage.retry) for information on retry types and how
3010 to configure them.
3011 """
3012 with create_trace_span(name="Storage.Blob.uploadFromFilename"):
3013 self._handle_filename_and_upload(
3014 filename,
3015 content_type=content_type,
3016 client=client,
3017 predefined_acl=predefined_acl,
3018 if_generation_match=if_generation_match,
3019 if_generation_not_match=if_generation_not_match,
3020 if_metageneration_match=if_metageneration_match,
3021 if_metageneration_not_match=if_metageneration_not_match,
3022 timeout=timeout,
3023 checksum=checksum,
3024 retry=retry,
3025 )
3026
3027 def upload_from_string(
3028 self,
3029 data,
3030 content_type="text/plain",
3031 client=None,
3032 predefined_acl=None,
3033 if_generation_match=None,
3034 if_generation_not_match=None,
3035 if_metageneration_match=None,
3036 if_metageneration_not_match=None,
3037 timeout=_DEFAULT_TIMEOUT,
3038 checksum="auto",
3039 retry=DEFAULT_RETRY,
3040 ):
3041 """Upload contents of this blob from the provided string.
3042
3043 .. note::
3044 The effect of uploading to an existing blob depends on the
3045 "versioning" and "lifecycle" policies defined on the blob's
3046 bucket. In the absence of those policies, upload will
3047 overwrite any existing contents.
3048
3049 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3050 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3051 API documents for details.
3052
3053 If :attr:`user_project` is set on the bucket, bills the API request
3054 to that project.
3055
3056 :type data: bytes or str
3057 :param data:
3058 The data to store in this blob. If the value is text, it will be
3059 encoded as UTF-8.
3060
3061 :type content_type: str
3062 :param content_type:
3063 (Optional) Type of content being uploaded. Defaults to
3064 ``'text/plain'``.
3065
3066 :type client: :class:`~google.cloud.storage.client.Client`
3067 :param client:
3068 (Optional) The client to use. If not passed, falls back to the
3069 ``client`` stored on the blob's bucket.
3070
3071 :type predefined_acl: str
3072 :param predefined_acl: (Optional) Predefined access control list
3073
3074 :type if_generation_match: long
3075 :param if_generation_match:
3076 (Optional) See :ref:`using-if-generation-match`
3077
3078 :type if_generation_not_match: long
3079 :param if_generation_not_match:
3080 (Optional) See :ref:`using-if-generation-not-match`
3081
3082 :type if_metageneration_match: long
3083 :param if_metageneration_match:
3084 (Optional) See :ref:`using-if-metageneration-match`
3085
3086 :type if_metageneration_not_match: long
3087 :param if_metageneration_not_match:
3088 (Optional) See :ref:`using-if-metageneration-not-match`
3089
3090 :type timeout: float or tuple
3091 :param timeout:
3092 (Optional) The amount of time, in seconds, to wait
3093 for the server response. See: :ref:`configuring_timeouts`
3094
3095 :type checksum: str
3096 :param checksum:
3097 (Optional) The type of checksum to compute to verify
3098 the integrity of the object. If the upload is completed in a single
3099 request, the checksum will be entirely precomputed and the remote
3100 server will handle verification and error handling. If the upload
3101 is too large and must be transmitted in multiple requests, the
3102 checksum will be incrementally computed and the client will handle
3103 verification and error handling, raising
3104 google.cloud.storage.exceptions.DataCorruption on a mismatch and
3105 attempting to delete the corrupted file. Supported values are
3106 "md5", "crc32c", "auto" and None. The default is "auto", which will
3107 try to detect if the C extension for crc32c is installed and fall
3108 back to md5 otherwise.
3109
3110 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3111 :param retry: (Optional) How to retry the RPC. A None value will disable
3112 retries. A google.api_core.retry.Retry value will enable retries,
3113 and the object will define retriable response codes and errors and
3114 configure backoff and timeout options.
3115
3116 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3117 Retry object and activates it only if certain conditions are met.
3118 This class exists to provide safe defaults for RPC calls that are
3119 not technically safe to retry normally (due to potential data
3120 duplication or other side-effects) but become safe to retry if a
3121 condition such as if_generation_match is set.
3122
3123 See the retry.py source code and docstrings in this package
3124 (google.cloud.storage.retry) for information on retry types and how
3125 to configure them.
3126 """
3127 with create_trace_span(name="Storage.Blob.uploadFromString"):
3128 data = _to_bytes(data, encoding="utf-8")
3129 string_buffer = BytesIO(data)
3130 self.upload_from_file(
3131 file_obj=string_buffer,
3132 size=len(data),
3133 content_type=content_type,
3134 client=client,
3135 predefined_acl=predefined_acl,
3136 if_generation_match=if_generation_match,
3137 if_generation_not_match=if_generation_not_match,
3138 if_metageneration_match=if_metageneration_match,
3139 if_metageneration_not_match=if_metageneration_not_match,
3140 timeout=timeout,
3141 checksum=checksum,
3142 retry=retry,
3143 )
3144
3145 def create_resumable_upload_session(
3146 self,
3147 content_type=None,
3148 size=None,
3149 origin=None,
3150 client=None,
3151 timeout=_DEFAULT_TIMEOUT,
3152 checksum="auto",
3153 predefined_acl=None,
3154 if_generation_match=None,
3155 if_generation_not_match=None,
3156 if_metageneration_match=None,
3157 if_metageneration_not_match=None,
3158 retry=DEFAULT_RETRY,
3159 ):
3160 """Create a resumable upload session.
3161
3162 Resumable upload sessions allow you to start an upload session from
3163 one client and complete the session in another. This method is called
3164 by the initiator to set the metadata and limits. The initiator then
3165 passes the session URL to the client that will upload the binary data.
3166 The client performs a PUT request on the session URL to complete the
3167 upload. This process allows untrusted clients to upload to an
3168 access-controlled bucket.
3169
3170 For more details, see the
3171 documentation on [`signed URLs`](https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable).
3172
3173 The content type of the upload will be determined in order
3174 of precedence:
3175
3176 - The value passed in to this method (if not :data:`None`)
3177 - The value stored on the current blob
3178 - The default value ('application/octet-stream')
3179
3180 .. note::
3181 The effect of uploading to an existing blob depends on the
3182 "versioning" and "lifecycle" policies defined on the blob's
3183 bucket. In the absence of those policies, upload will
3184 overwrite any existing contents.
3185
3186 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3187 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3188 API documents for details.
3189
3190 If :attr:`encryption_key` is set, the blob will be encrypted with
3191 a [`customer-supplied`](https://cloud.google.com/storage/docs/encryption#customer-supplied)
3192 encryption key.
3193
3194 If :attr:`user_project` is set on the bucket, bills the API request
3195 to that project.
3196
3197 :type size: int
3198 :param size:
3199 (Optional) The maximum number of bytes that can be uploaded using
3200 this session. If the size is not known when creating the session,
3201 this should be left blank.
3202
3203 :type content_type: str
3204 :param content_type: (Optional) Type of content being uploaded.
3205
3206 :type origin: str
3207 :param origin:
3208 (Optional) If set, the upload can only be completed by a user-agent
3209 that uploads from the given origin. This can be useful when passing
3210 the session to a web client.
3211
3212 :type client: :class:`~google.cloud.storage.client.Client`
3213 :param client:
3214 (Optional) The client to use. If not passed, falls back to the
3215 ``client`` stored on the blob's bucket.
3216
3217 :type timeout: float or tuple
3218 :param timeout:
3219 (Optional) The amount of time, in seconds, to wait
3220 for the server response. See: :ref:`configuring_timeouts`
3221
3222 :type checksum: str
3223 :param checksum:
3224 (Optional) The type of checksum to compute to verify
3225 the integrity of the object. After the upload is complete, the
3226 server-computed checksum of the resulting object will be checked
3227 and google.cloud.storage.exceptions.DataCorruption will be raised on
3228 a mismatch. On a validation failure, the client will attempt to
3229 delete the uploaded object automatically. Supported values are
3230 "md5", "crc32c", "auto" and None. The default is "auto", which will
3231 try to detect if the C extension for crc32c is installed and fall
3232 back to md5 otherwise.
3233
3234 :type predefined_acl: str
3235 :param predefined_acl: (Optional) Predefined access control list
3236
3237 :type if_generation_match: long
3238 :param if_generation_match:
3239 (Optional) See :ref:`using-if-generation-match`
3240
3241 :type if_generation_not_match: long
3242 :param if_generation_not_match:
3243 (Optional) See :ref:`using-if-generation-not-match`
3244
3245 :type if_metageneration_match: long
3246 :param if_metageneration_match:
3247 (Optional) See :ref:`using-if-metageneration-match`
3248
3249 :type if_metageneration_not_match: long
3250 :param if_metageneration_not_match:
3251 (Optional) See :ref:`using-if-metageneration-not-match`
3252
3253 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3254 :param retry: (Optional) How to retry the RPC. A None value will disable
3255 retries. A google.api_core.retry.Retry value will enable retries,
3256 and the object will define retriable response codes and errors and
3257 configure backoff and timeout options.
3258
3259 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3260 Retry object and activates it only if certain conditions are met.
3261 This class exists to provide safe defaults for RPC calls that are
3262 not technically safe to retry normally (due to potential data
3263 duplication or other side-effects) but become safe to retry if a
3264 condition such as if_generation_match is set.
3265
3266 See the retry.py source code and docstrings in this package
3267 (google.cloud.storage.retry) for information on retry types and how
3268 to configure them.
3269
3270 :rtype: str
3271 :returns: The resumable upload session URL. The upload can be
3272 completed by making an HTTP PUT request with the
3273 file's contents.
3274
3275 :raises: :class:`google.cloud.exceptions.GoogleCloudError`
3276 if the session creation response returns an error status.
3277 """
3278 with create_trace_span(name="Storage.Blob.createResumableUploadSession"):
3279 # Handle ConditionalRetryPolicy.
3280 if isinstance(retry, ConditionalRetryPolicy):
3281 # Conditional retries are designed for non-media calls, which change
3282 # arguments into query_params dictionaries. Media operations work
3283 # differently, so here we make a "fake" query_params to feed to the
3284 # ConditionalRetryPolicy.
3285 query_params = {
3286 "ifGenerationMatch": if_generation_match,
3287 "ifMetagenerationMatch": if_metageneration_match,
3288 }
3289 retry = retry.get_retry_policy_if_conditions_met(
3290 query_params=query_params
3291 )
3292
3293 extra_headers = {}
3294 if origin is not None:
3295 # This header is specifically for client-side uploads, it
3296 # determines the origins allowed for CORS.
3297 extra_headers["Origin"] = origin
3298
3299 try:
3300 fake_stream = BytesIO(b"")
3301 # Send a fake the chunk size which we **know** will be acceptable
3302 # to the `ResumableUpload` constructor. The chunk size only
3303 # matters when **sending** bytes to an upload.
3304 upload, _ = self._initiate_resumable_upload(
3305 client,
3306 fake_stream,
3307 content_type,
3308 size,
3309 predefined_acl=predefined_acl,
3310 if_generation_match=if_generation_match,
3311 if_generation_not_match=if_generation_not_match,
3312 if_metageneration_match=if_metageneration_match,
3313 if_metageneration_not_match=if_metageneration_not_match,
3314 extra_headers=extra_headers,
3315 chunk_size=self._CHUNK_SIZE_MULTIPLE,
3316 timeout=timeout,
3317 checksum=checksum,
3318 retry=retry,
3319 )
3320
3321 return upload.resumable_url
3322 except InvalidResponse as exc:
3323 _raise_from_invalid_response(exc)
3324
3325 def get_iam_policy(
3326 self,
3327 client=None,
3328 requested_policy_version=None,
3329 timeout=_DEFAULT_TIMEOUT,
3330 retry=DEFAULT_RETRY,
3331 ):
3332 """Retrieve the IAM policy for the object.
3333
3334 .. note::
3335
3336 Blob- / object-level IAM support does not yet exist and methods
3337 currently call an internal ACL backend not providing any utility
3338 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3339 in the future and is currently undocumented. Use :attr:`acl` for
3340 managing object access control.
3341
3342 If :attr:`user_project` is set on the bucket, bills the API request
3343 to that project.
3344
3345 :type client: :class:`~google.cloud.storage.client.Client`
3346 :param client:
3347 (Optional) The client to use. If not passed, falls back to the
3348 ``client`` stored on the current object's bucket.
3349
3350 :type requested_policy_version: int or ``NoneType``
3351 :param requested_policy_version:
3352 (Optional) The version of IAM policies to request. If a policy
3353 with a condition is requested without setting this, the server will
3354 return an error. This must be set to a value of 3 to retrieve IAM
3355 policies containing conditions. This is to prevent client code that
3356 isn't aware of IAM conditions from interpreting and modifying
3357 policies incorrectly. The service might return a policy with
3358 version lower than the one that was requested, based on the feature
3359 syntax in the policy fetched.
3360
3361 :type timeout: float or tuple
3362 :param timeout:
3363 (Optional) The amount of time, in seconds, to wait
3364 for the server response. See: :ref:`configuring_timeouts`
3365
3366 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3367 :param retry:
3368 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3369
3370 :rtype: :class:`google.api_core.iam.Policy`
3371 :returns: the policy instance, based on the resource returned from
3372 the ``getIamPolicy`` API request.
3373 """
3374 with create_trace_span(name="Storage.Blob.getIamPolicy"):
3375 client = self._require_client(client)
3376
3377 query_params = {}
3378
3379 if self.user_project is not None:
3380 query_params["userProject"] = self.user_project
3381
3382 if requested_policy_version is not None:
3383 query_params["optionsRequestedPolicyVersion"] = requested_policy_version
3384
3385 info = client._get_resource(
3386 f"{self.path}/iam",
3387 query_params=query_params,
3388 timeout=timeout,
3389 retry=retry,
3390 _target_object=None,
3391 )
3392 return Policy.from_api_repr(info)
3393
3394 def set_iam_policy(
3395 self,
3396 policy,
3397 client=None,
3398 timeout=_DEFAULT_TIMEOUT,
3399 retry=DEFAULT_RETRY_IF_ETAG_IN_JSON,
3400 ):
3401 """Update the IAM policy for the bucket.
3402
3403 .. note::
3404
3405 Blob- / object-level IAM support does not yet exist and methods
3406 currently call an internal ACL backend not providing any utility
3407 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3408 in the future and is currently undocumented. Use :attr:`acl` for
3409 managing object access control.
3410
3411 If :attr:`user_project` is set on the bucket, bills the API request
3412 to that project.
3413
3414 :type policy: :class:`google.api_core.iam.Policy`
3415 :param policy: policy instance used to update bucket's IAM policy.
3416
3417 :type client: :class:`~google.cloud.storage.client.Client`
3418 :param client:
3419 (Optional) The client to use. If not passed, falls back to the
3420 ``client`` stored on the current bucket.
3421
3422 :type timeout: float or tuple
3423 :param timeout:
3424 (Optional) The amount of time, in seconds, to wait
3425 for the server response. See: :ref:`configuring_timeouts`
3426
3427 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3428 :param retry:
3429 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3430
3431 :rtype: :class:`google.api_core.iam.Policy`
3432 :returns: the policy instance, based on the resource returned from
3433 the ``setIamPolicy`` API request.
3434 """
3435 with create_trace_span(name="Storage.Blob.setIamPolicy"):
3436 client = self._require_client(client)
3437
3438 query_params = {}
3439
3440 if self.user_project is not None:
3441 query_params["userProject"] = self.user_project
3442
3443 path = f"{self.path}/iam"
3444 resource = policy.to_api_repr()
3445 resource["resourceId"] = self.path
3446 info = client._put_resource(
3447 path,
3448 resource,
3449 query_params=query_params,
3450 timeout=timeout,
3451 retry=retry,
3452 _target_object=None,
3453 )
3454 return Policy.from_api_repr(info)
3455
3456 def test_iam_permissions(
3457 self,
3458 permissions,
3459 client=None,
3460 timeout=_DEFAULT_TIMEOUT,
3461 retry=DEFAULT_RETRY,
3462 ):
3463 """API call: test permissions
3464
3465 .. note::
3466
3467 Blob- / object-level IAM support does not yet exist and methods
3468 currently call an internal ACL backend not providing any utility
3469 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3470 in the future and is currently undocumented. Use :attr:`acl` for
3471 managing object access control.
3472
3473 If :attr:`user_project` is set on the bucket, bills the API request
3474 to that project.
3475
3476 :type permissions: list of string
3477 :param permissions: the permissions to check
3478
3479 :type client: :class:`~google.cloud.storage.client.Client`
3480 :param client:
3481 (Optional) The client to use. If not passed, falls back to the
3482 ``client`` stored on the current bucket.
3483
3484 :type timeout: float or tuple
3485 :param timeout:
3486 (Optional) The amount of time, in seconds, to wait
3487 for the server response. See: :ref:`configuring_timeouts`
3488
3489 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3490 :param retry:
3491 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3492
3493 :rtype: list of string
3494 :returns: the permissions returned by the ``testIamPermissions`` API
3495 request.
3496 """
3497 with create_trace_span(name="Storage.Blob.testIamPermissions"):
3498 client = self._require_client(client)
3499 query_params = {"permissions": permissions}
3500
3501 if self.user_project is not None:
3502 query_params["userProject"] = self.user_project
3503
3504 path = f"{self.path}/iam/testPermissions"
3505 resp = client._get_resource(
3506 path,
3507 query_params=query_params,
3508 timeout=timeout,
3509 retry=retry,
3510 _target_object=None,
3511 )
3512
3513 return resp.get("permissions", [])
3514
3515 def make_public(
3516 self,
3517 client=None,
3518 timeout=_DEFAULT_TIMEOUT,
3519 if_generation_match=None,
3520 if_generation_not_match=None,
3521 if_metageneration_match=None,
3522 if_metageneration_not_match=None,
3523 retry=DEFAULT_RETRY,
3524 ):
3525 """Update blob's ACL, granting read access to anonymous users.
3526
3527 :type client: :class:`~google.cloud.storage.client.Client` or
3528 ``NoneType``
3529 :param client: (Optional) The client to use. If not passed, falls back
3530 to the ``client`` stored on the blob's bucket.
3531
3532 :type timeout: float or tuple
3533 :param timeout:
3534 (Optional) The amount of time, in seconds, to wait
3535 for the server response. See: :ref:`configuring_timeouts`
3536
3537 :type if_generation_match: long
3538 :param if_generation_match:
3539 (Optional) See :ref:`using-if-generation-match`
3540
3541 :type if_generation_not_match: long
3542 :param if_generation_not_match:
3543 (Optional) See :ref:`using-if-generation-not-match`
3544
3545 :type if_metageneration_match: long
3546 :param if_metageneration_match:
3547 (Optional) See :ref:`using-if-metageneration-match`
3548
3549 :type if_metageneration_not_match: long
3550 :param if_metageneration_not_match:
3551 (Optional) See :ref:`using-if-metageneration-not-match`
3552
3553 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3554 :param retry:
3555 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3556 """
3557 with create_trace_span(name="Storage.Blob.makePublic"):
3558 self.acl.all().grant_read()
3559 self.acl.save(
3560 client=client,
3561 timeout=timeout,
3562 if_generation_match=if_generation_match,
3563 if_generation_not_match=if_generation_not_match,
3564 if_metageneration_match=if_metageneration_match,
3565 if_metageneration_not_match=if_metageneration_not_match,
3566 retry=retry,
3567 )
3568
3569 def make_private(
3570 self,
3571 client=None,
3572 timeout=_DEFAULT_TIMEOUT,
3573 if_generation_match=None,
3574 if_generation_not_match=None,
3575 if_metageneration_match=None,
3576 if_metageneration_not_match=None,
3577 retry=DEFAULT_RETRY,
3578 ):
3579 """Update blob's ACL, revoking read access for anonymous users.
3580
3581 :type client: :class:`~google.cloud.storage.client.Client` or
3582 ``NoneType``
3583 :param client: (Optional) The client to use. If not passed, falls back
3584 to the ``client`` stored on the blob's bucket.
3585
3586 :type timeout: float or tuple
3587 :param timeout:
3588 (Optional) The amount of time, in seconds, to wait
3589 for the server response. See: :ref:`configuring_timeouts`
3590
3591 :type if_generation_match: long
3592 :param if_generation_match:
3593 (Optional) See :ref:`using-if-generation-match`
3594
3595 :type if_generation_not_match: long
3596 :param if_generation_not_match:
3597 (Optional) See :ref:`using-if-generation-not-match`
3598
3599 :type if_metageneration_match: long
3600 :param if_metageneration_match:
3601 (Optional) See :ref:`using-if-metageneration-match`
3602
3603 :type if_metageneration_not_match: long
3604 :param if_metageneration_not_match:
3605 (Optional) See :ref:`using-if-metageneration-not-match`
3606
3607 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3608 :param retry:
3609 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3610 """
3611 with create_trace_span(name="Storage.Blob.makePrivate"):
3612 self.acl.all().revoke_read()
3613 self.acl.save(
3614 client=client,
3615 timeout=timeout,
3616 if_generation_match=if_generation_match,
3617 if_generation_not_match=if_generation_not_match,
3618 if_metageneration_match=if_metageneration_match,
3619 if_metageneration_not_match=if_metageneration_not_match,
3620 retry=retry,
3621 )
3622
3623 def compose(
3624 self,
3625 sources,
3626 client=None,
3627 timeout=_DEFAULT_TIMEOUT,
3628 if_generation_match=None,
3629 if_metageneration_match=None,
3630 if_source_generation_match=None,
3631 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3632 ):
3633 """Concatenate source blobs into this one.
3634
3635 If :attr:`user_project` is set on the bucket, bills the API request
3636 to that project.
3637
3638 See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/objects/compose)
3639 and a [code sample](https://cloud.google.com/storage/docs/samples/storage-compose-file#storage_compose_file-python).
3640
3641 :type sources: list of :class:`Blob`
3642 :param sources: Blobs whose contents will be composed into this blob.
3643
3644 :type client: :class:`~google.cloud.storage.client.Client`
3645 :param client:
3646 (Optional) The client to use. If not passed, falls back to the
3647 ``client`` stored on the blob's bucket.
3648
3649 :type timeout: float or tuple
3650 :param timeout:
3651 (Optional) The amount of time, in seconds, to wait
3652 for the server response. See: :ref:`configuring_timeouts`
3653
3654 :type if_generation_match: long
3655 :param if_generation_match:
3656 (Optional) Makes the operation conditional on whether the
3657 destination object's current generation matches the given value.
3658 Setting to 0 makes the operation succeed only if there are no live
3659 versions of the object.
3660 Note: In a previous version, this argument worked identically to the
3661 ``if_source_generation_match`` argument. For
3662 backwards-compatibility reasons, if a list is passed in,
3663 this argument will behave like ``if_source_generation_match``
3664 and also issue a DeprecationWarning.
3665
3666 :type if_metageneration_match: long
3667 :param if_metageneration_match:
3668 (Optional) Makes the operation conditional on whether the
3669 destination object's current metageneration matches the given
3670 value.
3671
3672 If a list of long is passed in, no match operation will be
3673 performed. (Deprecated: type(list of long) is supported for
3674 backwards-compatability reasons only.)
3675
3676 :type if_source_generation_match: list of long
3677 :param if_source_generation_match:
3678 (Optional) Makes the operation conditional on whether the current
3679 generation of each source blob matches the corresponding generation.
3680 The list must match ``sources`` item-to-item.
3681
3682 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3683 :param retry:
3684 (Optional) How to retry the RPC.
3685 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
3686 policy which will only enable retries if ``if_generation_match`` or ``generation``
3687 is set, in order to ensure requests are idempotent before retrying them.
3688 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
3689 to enable retries regardless of generation precondition setting.
3690 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
3691 """
3692 with create_trace_span(name="Storage.Blob.compose"):
3693 sources_len = len(sources)
3694 client = self._require_client(client)
3695 query_params = {}
3696
3697 if isinstance(if_generation_match, list):
3698 warnings.warn(
3699 _COMPOSE_IF_GENERATION_LIST_DEPRECATED,
3700 DeprecationWarning,
3701 stacklevel=2,
3702 )
3703
3704 if if_source_generation_match is not None:
3705 raise ValueError(
3706 _COMPOSE_IF_GENERATION_LIST_AND_IF_SOURCE_GENERATION_ERROR
3707 )
3708
3709 if_source_generation_match = if_generation_match
3710 if_generation_match = None
3711
3712 if isinstance(if_metageneration_match, list):
3713 warnings.warn(
3714 _COMPOSE_IF_METAGENERATION_LIST_DEPRECATED,
3715 DeprecationWarning,
3716 stacklevel=2,
3717 )
3718
3719 if_metageneration_match = None
3720
3721 if if_source_generation_match is None:
3722 if_source_generation_match = [None] * sources_len
3723 if len(if_source_generation_match) != sources_len:
3724 raise ValueError(_COMPOSE_IF_SOURCE_GENERATION_MISMATCH_ERROR)
3725
3726 source_objects = []
3727 for source, source_generation in zip(sources, if_source_generation_match):
3728 source_object = {
3729 "name": source.name,
3730 "generation": source.generation,
3731 }
3732
3733 preconditions = {}
3734 if source_generation is not None:
3735 preconditions["ifGenerationMatch"] = source_generation
3736
3737 if preconditions:
3738 source_object["objectPreconditions"] = preconditions
3739
3740 source_objects.append(source_object)
3741
3742 request = {
3743 "sourceObjects": source_objects,
3744 "destination": self._properties.copy(),
3745 }
3746
3747 if self.user_project is not None:
3748 query_params["userProject"] = self.user_project
3749
3750 _add_generation_match_parameters(
3751 query_params,
3752 if_generation_match=if_generation_match,
3753 if_metageneration_match=if_metageneration_match,
3754 )
3755
3756 api_response = client._post_resource(
3757 f"{self.path}/compose",
3758 request,
3759 query_params=query_params,
3760 timeout=timeout,
3761 retry=retry,
3762 _target_object=self,
3763 )
3764 self._set_properties(api_response)
3765
3766 def rewrite(
3767 self,
3768 source,
3769 token=None,
3770 client=None,
3771 if_generation_match=None,
3772 if_generation_not_match=None,
3773 if_metageneration_match=None,
3774 if_metageneration_not_match=None,
3775 if_source_generation_match=None,
3776 if_source_generation_not_match=None,
3777 if_source_metageneration_match=None,
3778 if_source_metageneration_not_match=None,
3779 timeout=_DEFAULT_TIMEOUT,
3780 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3781 ):
3782 """Rewrite source blob into this one.
3783
3784 If :attr:`user_project` is set on the bucket, bills the API request
3785 to that project.
3786
3787 .. note::
3788
3789 ``rewrite`` is not supported in a ``Batch`` context.
3790
3791 :type source: :class:`Blob`
3792 :param source: blob whose contents will be rewritten into this blob.
3793
3794 :type token: str
3795 :param token:
3796 (Optional) Token returned from an earlier, not-completed call to
3797 rewrite the same source blob. If passed, result will include
3798 updated status, total bytes written.
3799
3800 :type client: :class:`~google.cloud.storage.client.Client`
3801 :param client:
3802 (Optional) The client to use. If not passed, falls back to the
3803 ``client`` stored on the blob's bucket.
3804
3805 :type if_generation_match: long
3806 :param if_generation_match:
3807 (Optional) See :ref:`using-if-generation-match`
3808 Note that the generation to be matched is that of the
3809 ``destination`` blob.
3810
3811 :type if_generation_not_match: long
3812 :param if_generation_not_match:
3813 (Optional) See :ref:`using-if-generation-not-match`
3814 Note that the generation to be matched is that of the
3815 ``destination`` blob.
3816
3817 :type if_metageneration_match: long
3818 :param if_metageneration_match:
3819 (Optional) See :ref:`using-if-metageneration-match`
3820 Note that the metageneration to be matched is that of the
3821 ``destination`` blob.
3822
3823 :type if_metageneration_not_match: long
3824 :param if_metageneration_not_match:
3825 (Optional) See :ref:`using-if-metageneration-not-match`
3826 Note that the metageneration to be matched is that of the
3827 ``destination`` blob.
3828
3829 :type if_source_generation_match: long
3830 :param if_source_generation_match:
3831 (Optional) Makes the operation conditional on whether the source
3832 object's generation matches the given value.
3833
3834 :type if_source_generation_not_match: long
3835 :param if_source_generation_not_match:
3836 (Optional) Makes the operation conditional on whether the source
3837 object's generation does not match the given value.
3838
3839 :type if_source_metageneration_match: long
3840 :param if_source_metageneration_match:
3841 (Optional) Makes the operation conditional on whether the source
3842 object's current metageneration matches the given value.
3843
3844 :type if_source_metageneration_not_match: long
3845 :param if_source_metageneration_not_match:
3846 (Optional) Makes the operation conditional on whether the source
3847 object's current metageneration does not match the given value.
3848
3849 :type timeout: float or tuple
3850 :param timeout:
3851 (Optional) The amount of time, in seconds, to wait
3852 for the server response. See: :ref:`configuring_timeouts`
3853
3854 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3855 :param retry:
3856 (Optional) How to retry the RPC.
3857 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
3858 policy which will only enable retries if ``if_generation_match`` or ``generation``
3859 is set, in order to ensure requests are idempotent before retrying them.
3860 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
3861 to enable retries regardless of generation precondition setting.
3862 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
3863
3864 :rtype: tuple
3865 :returns: ``(token, bytes_rewritten, total_bytes)``, where ``token``
3866 is a rewrite token (``None`` if the rewrite is complete),
3867 ``bytes_rewritten`` is the number of bytes rewritten so far,
3868 and ``total_bytes`` is the total number of bytes to be
3869 rewritten.
3870 """
3871 with create_trace_span(name="Storage.Blob.rewrite"):
3872 client = self._require_client(client)
3873 headers = _get_encryption_headers(self._encryption_key)
3874 headers.update(_get_encryption_headers(source._encryption_key, source=True))
3875
3876 query_params = self._query_params
3877 if "generation" in query_params:
3878 del query_params["generation"]
3879
3880 if token:
3881 query_params["rewriteToken"] = token
3882
3883 if source.generation:
3884 query_params["sourceGeneration"] = source.generation
3885
3886 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
3887 # at rest, object resource metadata will store the version of the Key Management
3888 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
3889 # used to rewrite the object, then the existing kmsKeyName version
3890 # value can't be used in the rewrite request and the client instead ignores it.
3891 if (
3892 self.kms_key_name is not None
3893 and "cryptoKeyVersions" not in self.kms_key_name
3894 ):
3895 query_params["destinationKmsKeyName"] = self.kms_key_name
3896
3897 _add_generation_match_parameters(
3898 query_params,
3899 if_generation_match=if_generation_match,
3900 if_generation_not_match=if_generation_not_match,
3901 if_metageneration_match=if_metageneration_match,
3902 if_metageneration_not_match=if_metageneration_not_match,
3903 if_source_generation_match=if_source_generation_match,
3904 if_source_generation_not_match=if_source_generation_not_match,
3905 if_source_metageneration_match=if_source_metageneration_match,
3906 if_source_metageneration_not_match=if_source_metageneration_not_match,
3907 )
3908
3909 path = f"{source.path}/rewriteTo{self.path}"
3910 api_response = client._post_resource(
3911 path,
3912 self._properties,
3913 query_params=query_params,
3914 headers=headers,
3915 timeout=timeout,
3916 retry=retry,
3917 _target_object=self,
3918 )
3919 rewritten = int(api_response["totalBytesRewritten"])
3920 size = int(api_response["objectSize"])
3921
3922 # The resource key is set if and only if the API response is
3923 # completely done. Additionally, there is no rewrite token to return
3924 # in this case.
3925 if api_response["done"]:
3926 self._set_properties(api_response["resource"])
3927 return None, rewritten, size
3928
3929 return api_response["rewriteToken"], rewritten, size
3930
3931 def update_storage_class(
3932 self,
3933 new_class,
3934 client=None,
3935 if_generation_match=None,
3936 if_generation_not_match=None,
3937 if_metageneration_match=None,
3938 if_metageneration_not_match=None,
3939 if_source_generation_match=None,
3940 if_source_generation_not_match=None,
3941 if_source_metageneration_match=None,
3942 if_source_metageneration_not_match=None,
3943 timeout=_DEFAULT_TIMEOUT,
3944 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3945 ):
3946 """Update blob's storage class via a rewrite-in-place. This helper will
3947 wait for the rewrite to complete before returning, so it may take some
3948 time for large files.
3949
3950 See
3951 https://cloud.google.com/storage/docs/per-object-storage-class
3952
3953 If :attr:`user_project` is set on the bucket, bills the API request
3954 to that project.
3955
3956 :type new_class: str
3957 :param new_class:
3958 new storage class for the object. One of:
3959 :attr:`~google.cloud.storage.constants.NEARLINE_STORAGE_CLASS`,
3960 :attr:`~google.cloud.storage.constants.COLDLINE_STORAGE_CLASS`,
3961 :attr:`~google.cloud.storage.constants.ARCHIVE_STORAGE_CLASS`,
3962 :attr:`~google.cloud.storage.constants.STANDARD_STORAGE_CLASS`,
3963 :attr:`~google.cloud.storage.constants.MULTI_REGIONAL_LEGACY_STORAGE_CLASS`,
3964 or
3965 :attr:`~google.cloud.storage.constants.REGIONAL_LEGACY_STORAGE_CLASS`.
3966
3967 :type client: :class:`~google.cloud.storage.client.Client`
3968 :param client:
3969 (Optional) The client to use. If not passed, falls back to the
3970 ``client`` stored on the blob's bucket.
3971
3972 :type if_generation_match: long
3973 :param if_generation_match:
3974 (Optional) See :ref:`using-if-generation-match`
3975 Note that the generation to be matched is that of the
3976 ``destination`` blob.
3977
3978 :type if_generation_not_match: long
3979 :param if_generation_not_match:
3980 (Optional) See :ref:`using-if-generation-not-match`
3981 Note that the generation to be matched is that of the
3982 ``destination`` blob.
3983
3984 :type if_metageneration_match: long
3985 :param if_metageneration_match:
3986 (Optional) See :ref:`using-if-metageneration-match`
3987 Note that the metageneration to be matched is that of the
3988 ``destination`` blob.
3989
3990 :type if_metageneration_not_match: long
3991 :param if_metageneration_not_match:
3992 (Optional) See :ref:`using-if-metageneration-not-match`
3993 Note that the metageneration to be matched is that of the
3994 ``destination`` blob.
3995
3996 :type if_source_generation_match: long
3997 :param if_source_generation_match:
3998 (Optional) Makes the operation conditional on whether the source
3999 object's generation matches the given value.
4000
4001 :type if_source_generation_not_match: long
4002 :param if_source_generation_not_match:
4003 (Optional) Makes the operation conditional on whether the source
4004 object's generation does not match the given value.
4005
4006 :type if_source_metageneration_match: long
4007 :param if_source_metageneration_match:
4008 (Optional) Makes the operation conditional on whether the source
4009 object's current metageneration matches the given value.
4010
4011 :type if_source_metageneration_not_match: long
4012 :param if_source_metageneration_not_match:
4013 (Optional) Makes the operation conditional on whether the source
4014 object's current metageneration does not match the given value.
4015
4016 :type timeout: float or tuple
4017 :param timeout:
4018 (Optional) The amount of time, in seconds, to wait
4019 for the server response. See: :ref:`configuring_timeouts`
4020
4021 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4022 :param retry:
4023 (Optional) How to retry the RPC.
4024 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
4025 policy which will only enable retries if ``if_generation_match`` or ``generation``
4026 is set, in order to ensure requests are idempotent before retrying them.
4027 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
4028 to enable retries regardless of generation precondition setting.
4029 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
4030 """
4031 with create_trace_span(name="Storage.Blob.updateStorageClass"):
4032 # Update current blob's storage class prior to rewrite
4033 self._patch_property("storageClass", new_class)
4034
4035 # Execute consecutive rewrite operations until operation is done
4036 token, _, _ = self.rewrite(
4037 self,
4038 if_generation_match=if_generation_match,
4039 if_generation_not_match=if_generation_not_match,
4040 if_metageneration_match=if_metageneration_match,
4041 if_metageneration_not_match=if_metageneration_not_match,
4042 if_source_generation_match=if_source_generation_match,
4043 if_source_generation_not_match=if_source_generation_not_match,
4044 if_source_metageneration_match=if_source_metageneration_match,
4045 if_source_metageneration_not_match=if_source_metageneration_not_match,
4046 timeout=timeout,
4047 retry=retry,
4048 )
4049 while token is not None:
4050 token, _, _ = self.rewrite(
4051 self,
4052 token=token,
4053 if_generation_match=if_generation_match,
4054 if_generation_not_match=if_generation_not_match,
4055 if_metageneration_match=if_metageneration_match,
4056 if_metageneration_not_match=if_metageneration_not_match,
4057 if_source_generation_match=if_source_generation_match,
4058 if_source_generation_not_match=if_source_generation_not_match,
4059 if_source_metageneration_match=if_source_metageneration_match,
4060 if_source_metageneration_not_match=if_source_metageneration_not_match,
4061 timeout=timeout,
4062 retry=retry,
4063 )
4064
4065 def open(
4066 self,
4067 mode="r",
4068 chunk_size=None,
4069 ignore_flush=None,
4070 encoding=None,
4071 errors=None,
4072 newline=None,
4073 **kwargs,
4074 ):
4075 r"""Create a file handler for file-like I/O to or from this blob.
4076
4077 This method can be used as a context manager, just like Python's
4078 built-in 'open()' function.
4079
4080 While reading, as with other read methods, if blob.generation is not set
4081 the most recent blob generation will be used. Because the file-like IO
4082 reader downloads progressively in chunks, this could result in data from
4083 multiple versions being mixed together. If this is a concern, use
4084 either bucket.get_blob(), or blob.reload(), which will download the
4085 latest generation number and set it; or, if the generation is known, set
4086 it manually, for instance with bucket.blob(generation=123456).
4087
4088 Checksumming (hashing) to verify data integrity is disabled for reads
4089 using this feature because reads are implemented using request ranges,
4090 which do not provide checksums to validate. See
4091 https://cloud.google.com/storage/docs/hashes-etags for details.
4092
4093 See a [code sample](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_fileio_write_read.py).
4094
4095 Keyword arguments to pass to the underlying API calls.
4096 For both uploads and downloads, the following arguments are
4097 supported:
4098
4099 - ``if_generation_match``
4100 - ``if_generation_not_match``
4101 - ``if_metageneration_match``
4102 - ``if_metageneration_not_match``
4103 - ``timeout``
4104 - ``retry``
4105
4106 For downloads only, the following additional arguments are supported:
4107
4108 - ``raw_download``
4109 - ``single_shot_download``
4110
4111 For uploads only, the following additional arguments are supported:
4112
4113 - ``content_type``
4114 - ``predefined_acl``
4115 - ``checksum``
4116
4117 :type mode: str
4118 :param mode:
4119 (Optional) A mode string, as per standard Python `open()` semantics.The first
4120 character must be 'r', to open the blob for reading, or 'w' to open
4121 it for writing. The second character, if present, must be 't' for
4122 (unicode) text mode, or 'b' for bytes mode. If the second character
4123 is omitted, text mode is the default.
4124
4125 :type chunk_size: long
4126 :param chunk_size:
4127 (Optional) For reads, the minimum number of bytes to read at a time.
4128 If fewer bytes than the chunk_size are requested, the remainder is
4129 buffered. For writes, the maximum number of bytes to buffer before
4130 sending data to the server, and the size of each request when data
4131 is sent. Writes are implemented as a "resumable upload", so
4132 chunk_size for writes must be exactly a multiple of 256KiB as with
4133 other resumable uploads. The default is 40 MiB.
4134
4135 :type ignore_flush: bool
4136 :param ignore_flush:
4137 (Optional) For non text-mode writes, makes flush() do nothing
4138 instead of raising an error. flush() without closing is not
4139 supported by the remote service and therefore calling it normally
4140 results in io.UnsupportedOperation. However, that behavior is
4141 incompatible with some consumers and wrappers of file objects in
4142 Python, such as zipfile.ZipFile or io.TextIOWrapper. Setting
4143 ignore_flush will cause flush() to successfully do nothing, for
4144 compatibility with those contexts. The correct way to actually flush
4145 data to the remote server is to close() (using a context manager,
4146 such as in the example, will cause this to happen automatically).
4147
4148 :type encoding: str
4149 :param encoding:
4150 (Optional) For text mode only, the name of the encoding that the stream will
4151 be decoded or encoded with. If omitted, it defaults to
4152 locale.getpreferredencoding(False).
4153
4154 :type errors: str
4155 :param errors:
4156 (Optional) For text mode only, an optional string that specifies how encoding
4157 and decoding errors are to be handled. Pass 'strict' to raise a
4158 ValueError exception if there is an encoding error (the default of
4159 None has the same effect), or pass 'ignore' to ignore errors. (Note
4160 that ignoring encoding errors can lead to data loss.) Other more
4161 rarely-used options are also available; see the Python 'io' module
4162 documentation for 'io.TextIOWrapper' for a complete list.
4163
4164 :type newline: str
4165 :param newline:
4166 (Optional) For text mode only, controls how line endings are handled. It can
4167 be None, '', '\n', '\r', and '\r\n'. If None, reads use "universal
4168 newline mode" and writes use the system default. See the Python
4169 'io' module documentation for 'io.TextIOWrapper' for details.
4170
4171 :returns: A 'BlobReader' or 'BlobWriter' from
4172 'google.cloud.storage.fileio', or an 'io.TextIOWrapper' around one
4173 of those classes, depending on the 'mode' argument.
4174 """
4175 with create_trace_span(name="Storage.Blob.open"):
4176 if mode == "rb":
4177 if encoding or errors or newline:
4178 raise ValueError(
4179 "encoding, errors and newline arguments are for text mode only"
4180 )
4181 if ignore_flush:
4182 raise ValueError(
4183 "ignore_flush argument is for non-text write mode only"
4184 )
4185 return BlobReader(self, chunk_size=chunk_size, **kwargs)
4186 elif mode == "wb":
4187 if encoding or errors or newline:
4188 raise ValueError(
4189 "encoding, errors and newline arguments are for text mode only"
4190 )
4191 return BlobWriter(
4192 self,
4193 chunk_size=chunk_size,
4194 ignore_flush=ignore_flush,
4195 **kwargs,
4196 )
4197 elif mode in ("r", "rt"):
4198 if ignore_flush:
4199 raise ValueError(
4200 "ignore_flush argument is for non-text write mode only"
4201 )
4202 return TextIOWrapper(
4203 BlobReader(self, chunk_size=chunk_size, **kwargs),
4204 encoding=encoding,
4205 errors=errors,
4206 newline=newline,
4207 )
4208 elif mode in ("w", "wt"):
4209 if ignore_flush is False:
4210 raise ValueError(
4211 "ignore_flush is required for text mode writing and "
4212 "cannot be set to False"
4213 )
4214 return TextIOWrapper(
4215 BlobWriter(
4216 self, chunk_size=chunk_size, ignore_flush=True, **kwargs
4217 ),
4218 encoding=encoding,
4219 errors=errors,
4220 newline=newline,
4221 )
4222 else:
4223 raise NotImplementedError(
4224 "Supported modes strings are 'r', 'rb', 'rt', 'w', 'wb', and 'wt' only."
4225 )
4226
4227 cache_control = _scalar_property("cacheControl")
4228 """HTTP 'Cache-Control' header for this object.
4229
4230 See [`RFC 7234`](https://tools.ietf.org/html/rfc7234#section-5.2)
4231 and [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4232
4233 :rtype: str or ``NoneType``
4234
4235 """
4236
4237 content_disposition = _scalar_property("contentDisposition")
4238 """HTTP 'Content-Disposition' header for this object.
4239
4240 See [`RFC 6266`](https://tools.ietf.org/html/rfc7234#section-5.2) and
4241 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4242
4243 :rtype: str or ``NoneType``
4244 """
4245
4246 content_encoding = _scalar_property("contentEncoding")
4247 """HTTP 'Content-Encoding' header for this object.
4248
4249 See [`RFC 7231`](https://tools.ietf.org/html/rfc7231#section-3.1.2.2) and
4250 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4251
4252 :rtype: str or ``NoneType``
4253 """
4254
4255 content_language = _scalar_property("contentLanguage")
4256 """HTTP 'Content-Language' header for this object.
4257
4258 See [`BCP47`](https://tools.ietf.org/html/bcp47) and
4259 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4260
4261 :rtype: str or ``NoneType``
4262 """
4263
4264 content_type = _scalar_property(_CONTENT_TYPE_FIELD)
4265 """HTTP 'Content-Type' header for this object.
4266
4267 See [`RFC 2616`](https://tools.ietf.org/html/rfc2616#section-14.17) and
4268 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4269
4270 :rtype: str or ``NoneType``
4271 """
4272
4273 crc32c = _scalar_property("crc32c")
4274 """CRC32C checksum for this object.
4275
4276 This returns the blob's CRC32C checksum. To retrieve the value, first use a
4277 reload method of the Blob class which loads the blob's properties from the server.
4278
4279 See [`RFC 4960`](https://tools.ietf.org/html/rfc4960#appendix-B) and
4280 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4281
4282 If not set before upload, the server will compute the hash.
4283
4284 :rtype: str or ``NoneType``
4285 """
4286
4287 def _prep_and_do_download(
4288 self,
4289 file_obj,
4290 client=None,
4291 start=None,
4292 end=None,
4293 raw_download=False,
4294 if_etag_match=None,
4295 if_etag_not_match=None,
4296 if_generation_match=None,
4297 if_generation_not_match=None,
4298 if_metageneration_match=None,
4299 if_metageneration_not_match=None,
4300 timeout=_DEFAULT_TIMEOUT,
4301 checksum="auto",
4302 retry=DEFAULT_RETRY,
4303 single_shot_download=False,
4304 command=None,
4305 ):
4306 """Download the contents of a blob object into a file-like object.
4307
4308 See https://cloud.google.com/storage/docs/downloading-objects
4309
4310 If :attr:`user_project` is set on the bucket, bills the API request
4311 to that project.
4312
4313 :type file_obj: file
4314 :param file_obj: A file handle to which to write the blob's data.
4315
4316 :type client: :class:`~google.cloud.storage.client.Client`
4317 :param client:
4318 (Optional) The client to use. If not passed, falls back to the
4319 ``client`` stored on the blob's bucket.
4320
4321 :type start: int
4322 :param start: (Optional) The first byte in a range to be downloaded.
4323
4324 :type end: int
4325 :param end: (Optional) The last byte in a range to be downloaded.
4326
4327 :type raw_download: bool
4328 :param raw_download:
4329 (Optional) If true, download the object without any expansion.
4330
4331 :type if_etag_match: Union[str, Set[str]]
4332 :param if_etag_match:
4333 (Optional) See :ref:`using-if-etag-match`
4334
4335 :type if_etag_not_match: Union[str, Set[str]]
4336 :param if_etag_not_match:
4337 (Optional) See :ref:`using-if-etag-not-match`
4338
4339 :type if_generation_match: long
4340 :param if_generation_match:
4341 (Optional) See :ref:`using-if-generation-match`
4342
4343 :type if_generation_not_match: long
4344 :param if_generation_not_match:
4345 (Optional) See :ref:`using-if-generation-not-match`
4346
4347 :type if_metageneration_match: long
4348 :param if_metageneration_match:
4349 (Optional) See :ref:`using-if-metageneration-match`
4350
4351 :type if_metageneration_not_match: long
4352 :param if_metageneration_not_match:
4353 (Optional) See :ref:`using-if-metageneration-not-match`
4354
4355 :type timeout: float or tuple
4356 :param timeout:
4357 (Optional) The amount of time, in seconds, to wait
4358 for the server response. See: :ref:`configuring_timeouts`
4359
4360 :type checksum: str
4361 :param checksum:
4362 (Optional) The type of checksum to compute to verify the integrity
4363 of the object. The response headers must contain a checksum of the
4364 requested type. If the headers lack an appropriate checksum (for
4365 instance in the case of transcoded or ranged downloads where the
4366 remote service does not know the correct checksum, including
4367 downloads where chunk_size is set) an INFO-level log will be
4368 emitted. Supported values are "md5", "crc32c", "auto" and None. The
4369 default is "auto", which will try to detect if the C extension for
4370 crc32c is installed and fall back to md5 otherwise.
4371
4372 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4373 :param retry: (Optional) How to retry the RPC. A None value will disable
4374 retries. A google.api_core.retry.Retry value will enable retries,
4375 and the object will define retriable response codes and errors and
4376 configure backoff and timeout options.
4377
4378 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
4379 Retry object and activates it only if certain conditions are met.
4380 This class exists to provide safe defaults for RPC calls that are
4381 not technically safe to retry normally (due to potential data
4382 duplication or other side-effects) but become safe to retry if a
4383 condition such as if_metageneration_match is set.
4384
4385 See the retry.py source code and docstrings in this package
4386 (google.cloud.storage.retry) for information on retry types and how
4387 to configure them.
4388
4389 :type single_shot_download: bool
4390 :param single_shot_download:
4391 (Optional) If true, download the object in a single request.
4392 Caution: Enabling this will increase the memory overload for your application.
4393 Please enable this as per your use case.
4394
4395 :type command: str
4396 :param command:
4397 (Optional) Information about which interface for download was used,
4398 to be included in the X-Goog-API-Client header. Please leave as None
4399 unless otherwise directed.
4400 """
4401 # Handle ConditionalRetryPolicy.
4402 if isinstance(retry, ConditionalRetryPolicy):
4403 # Conditional retries are designed for non-media calls, which change
4404 # arguments into query_params dictionaries. Media operations work
4405 # differently, so here we make a "fake" query_params to feed to the
4406 # ConditionalRetryPolicy.
4407 query_params = {
4408 "ifGenerationMatch": if_generation_match,
4409 "ifMetagenerationMatch": if_metageneration_match,
4410 }
4411 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
4412
4413 client = self._require_client(client)
4414
4415 download_url = self._get_download_url(
4416 client,
4417 if_generation_match=if_generation_match,
4418 if_generation_not_match=if_generation_not_match,
4419 if_metageneration_match=if_metageneration_match,
4420 if_metageneration_not_match=if_metageneration_not_match,
4421 )
4422 headers = _get_encryption_headers(self._encryption_key)
4423 headers["accept-encoding"] = "gzip"
4424 _add_etag_match_headers(
4425 headers,
4426 if_etag_match=if_etag_match,
4427 if_etag_not_match=if_etag_not_match,
4428 )
4429 # Add any client attached custom headers to be sent with the request.
4430 headers = {
4431 **_get_default_headers(client._connection.user_agent, command=command),
4432 **headers,
4433 **client._extra_headers,
4434 }
4435
4436 transport = client._http
4437
4438 try:
4439 self._do_download(
4440 transport,
4441 file_obj,
4442 download_url,
4443 headers,
4444 start,
4445 end,
4446 raw_download,
4447 timeout=timeout,
4448 checksum=checksum,
4449 retry=retry,
4450 single_shot_download=single_shot_download,
4451 )
4452 except InvalidResponse as exc:
4453 _raise_from_invalid_response(exc)
4454
4455 @property
4456 def component_count(self):
4457 """Number of underlying components that make up this object.
4458
4459 See https://cloud.google.com/storage/docs/json_api/v1/objects
4460
4461 :rtype: int or ``NoneType``
4462 :returns: The component count (in case of a composed object) or
4463 ``None`` if the blob's resource has not been loaded from
4464 the server. This property will not be set on objects
4465 not created via ``compose``.
4466 """
4467 component_count = self._properties.get("componentCount")
4468 if component_count is not None:
4469 return int(component_count)
4470
4471 @property
4472 def etag(self):
4473 """Retrieve the ETag for the object.
4474
4475 See [`RFC 2616 (etags)`](https://tools.ietf.org/html/rfc2616#section-3.11) and
4476 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4477
4478 :rtype: str or ``NoneType``
4479 :returns: The blob etag or ``None`` if the blob's resource has not
4480 been loaded from the server.
4481 """
4482 return self._properties.get("etag")
4483
4484 event_based_hold = _scalar_property("eventBasedHold")
4485 """Is an event-based hold active on the object?
4486
4487 See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4488
4489 If the property is not set locally, returns :data:`None`.
4490
4491 :rtype: bool or ``NoneType``
4492 """
4493
4494 @property
4495 def generation(self):
4496 """Retrieve the generation for the object.
4497
4498 See https://cloud.google.com/storage/docs/json_api/v1/objects
4499
4500 :rtype: int or ``NoneType``
4501 :returns: The generation of the blob or ``None`` if the blob's
4502 resource has not been loaded from the server.
4503 """
4504 generation = self._properties.get("generation")
4505 if generation is not None:
4506 return int(generation)
4507
4508 @property
4509 def id(self):
4510 """Retrieve the ID for the object.
4511
4512 See https://cloud.google.com/storage/docs/json_api/v1/objects
4513
4514 The ID consists of the bucket name, object name, and generation number.
4515
4516 :rtype: str or ``NoneType``
4517 :returns: The ID of the blob or ``None`` if the blob's
4518 resource has not been loaded from the server.
4519 """
4520 return self._properties.get("id")
4521
4522 md5_hash = _scalar_property("md5Hash")
4523 """MD5 hash for this object.
4524
4525 This returns the blob's MD5 hash. To retrieve the value, first use a
4526 reload method of the Blob class which loads the blob's properties from the server.
4527
4528 See [`RFC 1321`](https://tools.ietf.org/html/rfc1321) and
4529 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4530
4531 If not set before upload, the server will compute the hash.
4532
4533 :rtype: str or ``NoneType``
4534 """
4535
4536 @property
4537 def media_link(self):
4538 """Retrieve the media download URI for the object.
4539
4540 See https://cloud.google.com/storage/docs/json_api/v1/objects
4541
4542 :rtype: str or ``NoneType``
4543 :returns: The media link for the blob or ``None`` if the blob's
4544 resource has not been loaded from the server.
4545 """
4546 return self._properties.get("mediaLink")
4547
4548 @property
4549 def metadata(self):
4550 """Retrieve arbitrary/application specific metadata for the object.
4551
4552 See https://cloud.google.com/storage/docs/json_api/v1/objects
4553
4554 :setter: Update arbitrary/application specific metadata for the
4555 object.
4556 :getter: Retrieve arbitrary/application specific metadata for
4557 the object.
4558
4559 :rtype: dict or ``NoneType``
4560 :returns: The metadata associated with the blob or ``None`` if the
4561 property is not set.
4562 """
4563 return copy.deepcopy(self._properties.get("metadata"))
4564
4565 @metadata.setter
4566 def metadata(self, value):
4567 """Update arbitrary/application specific metadata for the object.
4568
4569 Values are stored to GCS as strings. To delete a key, set its value to
4570 None and call blob.patch().
4571
4572 See https://cloud.google.com/storage/docs/json_api/v1/objects
4573
4574 :type value: dict
4575 :param value: The blob metadata to set.
4576 """
4577 if value is not None:
4578 value = {k: str(v) if v is not None else None for k, v in value.items()}
4579 self._patch_property("metadata", value)
4580
4581 @property
4582 def metageneration(self):
4583 """Retrieve the metageneration for the object.
4584
4585 See https://cloud.google.com/storage/docs/json_api/v1/objects
4586
4587 :rtype: int or ``NoneType``
4588 :returns: The metageneration of the blob or ``None`` if the blob's
4589 resource has not been loaded from the server.
4590 """
4591 metageneration = self._properties.get("metageneration")
4592 if metageneration is not None:
4593 return int(metageneration)
4594
4595 @property
4596 def owner(self):
4597 """Retrieve info about the owner of the object.
4598
4599 See https://cloud.google.com/storage/docs/json_api/v1/objects
4600
4601 :rtype: dict or ``NoneType``
4602 :returns: Mapping of owner's role/ID, or ``None`` if the blob's
4603 resource has not been loaded from the server.
4604 """
4605 return copy.deepcopy(self._properties.get("owner"))
4606
4607 @property
4608 def retention_expiration_time(self):
4609 """Retrieve timestamp at which the object's retention period expires.
4610
4611 See https://cloud.google.com/storage/docs/json_api/v1/objects
4612
4613 :rtype: :class:`datetime.datetime` or ``NoneType``
4614 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4615 ``None`` if the property is not set locally.
4616 """
4617 value = self._properties.get("retentionExpirationTime")
4618 if value is not None:
4619 return _rfc3339_nanos_to_datetime(value)
4620
4621 @property
4622 def self_link(self):
4623 """Retrieve the URI for the object.
4624
4625 See https://cloud.google.com/storage/docs/json_api/v1/objects
4626
4627 :rtype: str or ``NoneType``
4628 :returns: The self link for the blob or ``None`` if the blob's
4629 resource has not been loaded from the server.
4630 """
4631 return self._properties.get("selfLink")
4632
4633 @property
4634 def size(self):
4635 """Size of the object, in bytes.
4636
4637 See https://cloud.google.com/storage/docs/json_api/v1/objects
4638
4639 :rtype: int or ``NoneType``
4640 :returns: The size of the blob or ``None`` if the blob's
4641 resource has not been loaded from the server.
4642 """
4643 size = self._properties.get("size")
4644 if size is not None:
4645 return int(size)
4646
4647 @property
4648 def kms_key_name(self):
4649 """Resource name of Cloud KMS key used to encrypt the blob's contents.
4650
4651 :rtype: str or ``NoneType``
4652 :returns:
4653 The resource name or ``None`` if no Cloud KMS key was used,
4654 or the blob's resource has not been loaded from the server.
4655 """
4656 return self._properties.get("kmsKeyName")
4657
4658 @kms_key_name.setter
4659 def kms_key_name(self, value):
4660 """Set KMS encryption key for object.
4661
4662 :type value: str or ``NoneType``
4663 :param value: new KMS key name (None to clear any existing key).
4664 """
4665 self._patch_property("kmsKeyName", value)
4666
4667 storage_class = _scalar_property("storageClass")
4668 """Retrieve the storage class for the object.
4669
4670 This can only be set at blob / object **creation** time. If you'd
4671 like to change the storage class **after** the blob / object already
4672 exists in a bucket, call :meth:`update_storage_class` (which uses
4673 :meth:`rewrite`).
4674
4675 See https://cloud.google.com/storage/docs/storage-classes
4676
4677 :rtype: str or ``NoneType``
4678 :returns:
4679 If set, one of
4680 :attr:`~google.cloud.storage.constants.STANDARD_STORAGE_CLASS`,
4681 :attr:`~google.cloud.storage.constants.NEARLINE_STORAGE_CLASS`,
4682 :attr:`~google.cloud.storage.constants.COLDLINE_STORAGE_CLASS`,
4683 :attr:`~google.cloud.storage.constants.ARCHIVE_STORAGE_CLASS`,
4684 :attr:`~google.cloud.storage.constants.MULTI_REGIONAL_LEGACY_STORAGE_CLASS`,
4685 :attr:`~google.cloud.storage.constants.REGIONAL_LEGACY_STORAGE_CLASS`,
4686 :attr:`~google.cloud.storage.constants.DURABLE_REDUCED_AVAILABILITY_STORAGE_CLASS`,
4687 else ``None``.
4688 """
4689
4690 temporary_hold = _scalar_property("temporaryHold")
4691 """Is a temporary hold active on the object?
4692
4693 See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4694
4695 If the property is not set locally, returns :data:`None`.
4696
4697 :rtype: bool or ``NoneType``
4698 """
4699
4700 @property
4701 def time_deleted(self):
4702 """Retrieve the timestamp at which the object was deleted.
4703
4704 See https://cloud.google.com/storage/docs/json_api/v1/objects
4705
4706 :rtype: :class:`datetime.datetime` or ``NoneType``
4707 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4708 ``None`` if the blob's resource has not been loaded from
4709 the server (see :meth:`reload`). If the blob has
4710 not been deleted, this will never be set.
4711 """
4712 value = self._properties.get("timeDeleted")
4713 if value is not None:
4714 return _rfc3339_nanos_to_datetime(value)
4715
4716 @property
4717 def time_created(self):
4718 """Retrieve the timestamp at which the object was created.
4719
4720 See https://cloud.google.com/storage/docs/json_api/v1/objects
4721
4722 :rtype: :class:`datetime.datetime` or ``NoneType``
4723 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4724 ``None`` if the blob's resource has not been loaded from
4725 the server (see :meth:`reload`).
4726 """
4727 value = self._properties.get("timeCreated")
4728 if value is not None:
4729 return _rfc3339_nanos_to_datetime(value)
4730
4731 @property
4732 def updated(self):
4733 """Retrieve the timestamp at which the object was updated.
4734
4735 See https://cloud.google.com/storage/docs/json_api/v1/objects
4736
4737 :rtype: :class:`datetime.datetime` or ``NoneType``
4738 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4739 ``None`` if the blob's resource has not been loaded from
4740 the server (see :meth:`reload`).
4741 """
4742 value = self._properties.get("updated")
4743 if value is not None:
4744 return _rfc3339_nanos_to_datetime(value)
4745
4746 @property
4747 def custom_time(self):
4748 """Retrieve the custom time for the object.
4749
4750 See https://cloud.google.com/storage/docs/json_api/v1/objects
4751
4752 :rtype: :class:`datetime.datetime` or ``NoneType``
4753 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4754 ``None`` if the blob's resource has not been loaded from
4755 the server (see :meth:`reload`).
4756 """
4757 value = self._properties.get("customTime")
4758 if value is not None:
4759 return _rfc3339_nanos_to_datetime(value)
4760
4761 @custom_time.setter
4762 def custom_time(self, value):
4763 """Set the custom time for the object.
4764
4765 Once set on the server side object, this value can't be unset, but may
4766 only changed to a custom datetime in the future.
4767
4768 If :attr:`custom_time` must be unset, either perform a rewrite
4769 operation or upload the data again.
4770
4771 See https://cloud.google.com/storage/docs/json_api/v1/objects
4772
4773 :type value: :class:`datetime.datetime`
4774 :param value: new value
4775 """
4776 if value is not None:
4777 value = _datetime_to_rfc3339(value)
4778
4779 self._patch_property("customTime", value)
4780
4781 @property
4782 def retention(self):
4783 """Retrieve the retention configuration for this object.
4784
4785 :rtype: :class:`Retention`
4786 :returns: an instance for managing the object's retention configuration.
4787 """
4788 info = self._properties.get("retention", {})
4789 return Retention.from_api_repr(info, self)
4790
4791 @property
4792 def soft_delete_time(self):
4793 """If this object has been soft-deleted, returns the time at which it became soft-deleted.
4794
4795 :rtype: :class:`datetime.datetime` or ``NoneType``
4796 :returns:
4797 (readonly) The time that the object became soft-deleted.
4798 Note this property is only set for soft-deleted objects.
4799 """
4800 soft_delete_time = self._properties.get("softDeleteTime")
4801 if soft_delete_time is not None:
4802 return _rfc3339_nanos_to_datetime(soft_delete_time)
4803
4804 @property
4805 def hard_delete_time(self):
4806 """If this object has been soft-deleted, returns the time at which it will be permanently deleted.
4807
4808 :rtype: :class:`datetime.datetime` or ``NoneType``
4809 :returns:
4810 (readonly) The time that the object will be permanently deleted.
4811 Note this property is only set for soft-deleted objects.
4812 """
4813 hard_delete_time = self._properties.get("hardDeleteTime")
4814 if hard_delete_time is not None:
4815 return _rfc3339_nanos_to_datetime(hard_delete_time)
4816
4817
4818def _get_host_name(connection):
4819 """Returns the host name from the given connection.
4820
4821 :type connection: :class:`~google.cloud.storage._http.Connection`
4822 :param connection: The connection object.
4823
4824 :rtype: str
4825 :returns: The host name.
4826 """
4827 # TODO: After google-cloud-core 1.6.0 is stable and we upgrade it
4828 # to 1.6.0 in setup.py, we no longer need to check the attribute
4829 # existence. We can simply return connection.get_api_base_url_for_mtls().
4830 return (
4831 connection.API_BASE_URL
4832 if not hasattr(connection, "get_api_base_url_for_mtls")
4833 else connection.get_api_base_url_for_mtls()
4834 )
4835
4836
4837def _get_encryption_headers(key, source=False):
4838 """Builds customer encryption key headers
4839
4840 :type key: bytes
4841 :param key: 32 byte key to build request key and hash.
4842
4843 :type source: bool
4844 :param source: If true, return headers for the "source" blob; otherwise,
4845 return headers for the "destination" blob.
4846
4847 :rtype: dict
4848 :returns: dict of HTTP headers being sent in request.
4849 """
4850 if key is None:
4851 return {}
4852
4853 key = _to_bytes(key)
4854 key_hash = hashlib.sha256(key).digest()
4855 key_hash = base64.b64encode(key_hash)
4856 key = base64.b64encode(key)
4857
4858 if source:
4859 prefix = "X-Goog-Copy-Source-Encryption-"
4860 else:
4861 prefix = "X-Goog-Encryption-"
4862
4863 return {
4864 prefix + "Algorithm": "AES256",
4865 prefix + "Key": _bytes_to_unicode(key),
4866 prefix + "Key-Sha256": _bytes_to_unicode(key_hash),
4867 }
4868
4869
4870def _quote(value, safe=b"~"):
4871 """URL-quote a string.
4872
4873 If the value is unicode, this method first UTF-8 encodes it as bytes and
4874 then quotes the bytes. (In Python 3, ``urllib.parse.quote`` does this
4875 encoding automatically, but in Python 2, non-ASCII characters cannot be
4876 quoted.)
4877
4878 :type value: str or bytes
4879 :param value: The value to be URL-quoted.
4880
4881 :type safe: bytes
4882 :param safe: Bytes *not* to be quoted. By default, includes only ``b'~'``.
4883
4884 :rtype: str
4885 :returns: The encoded value (bytes in Python 2, unicode in Python 3).
4886 """
4887 value = _to_bytes(value, encoding="utf-8")
4888 return quote(value, safe=safe)
4889
4890
4891def _maybe_rewind(stream, rewind=False):
4892 """Rewind the stream if desired.
4893
4894 :type stream: IO[bytes]
4895 :param stream: A bytes IO object open for reading.
4896
4897 :type rewind: bool
4898 :param rewind: Indicates if we should seek to the beginning of the stream.
4899 """
4900 if rewind:
4901 stream.seek(0, os.SEEK_SET)
4902
4903
4904def _raise_from_invalid_response(error):
4905 """Re-wrap and raise an ``InvalidResponse`` exception.
4906
4907 :type error: :exc:`google.cloud.storage.exceptions.InvalidResponse`
4908 :param error: A caught exception from the ``google-resumable-media``
4909 library.
4910
4911 :raises: :class:`~google.cloud.exceptions.GoogleCloudError` corresponding
4912 to the failed status code
4913 """
4914 response = error.response
4915
4916 # The 'response.text' gives the actual reason of error, where 'error' gives
4917 # the message of expected status code.
4918 if response.text:
4919 error_message = response.text + ": " + str(error)
4920 else:
4921 error_message = str(error)
4922
4923 message = f"{response.request.method} {response.request.url}: {error_message}"
4924
4925 raise exceptions.from_http_status(response.status_code, message, response=response)
4926
4927
4928def _add_query_parameters(base_url, name_value_pairs):
4929 """Add one query parameter to a base URL.
4930
4931 :type base_url: string
4932 :param base_url: Base URL (may already contain query parameters)
4933
4934 :type name_value_pairs: list of (string, string) tuples.
4935 :param name_value_pairs: Names and values of the query parameters to add
4936
4937 :rtype: string
4938 :returns: URL with additional query strings appended.
4939 """
4940 if len(name_value_pairs) == 0:
4941 return base_url
4942
4943 scheme, netloc, path, query, frag = urlsplit(base_url)
4944 query = parse_qsl(query)
4945 query.extend(name_value_pairs)
4946 return urlunsplit((scheme, netloc, path, urlencode(query), frag))
4947
4948
4949class Retention(dict):
4950 """Map an object's retention configuration.
4951
4952 :type blob: :class:`Blob`
4953 :params blob: blob for which this retention configuration applies to.
4954
4955 :type mode: str or ``NoneType``
4956 :params mode:
4957 (Optional) The mode of the retention configuration, which can be either Unlocked or Locked.
4958 See: https://cloud.google.com/storage/docs/object-lock
4959
4960 :type retain_until_time: :class:`datetime.datetime` or ``NoneType``
4961 :params retain_until_time:
4962 (Optional) The earliest time that the object can be deleted or replaced, which is the
4963 retention configuration set for this object.
4964
4965 :type retention_expiration_time: :class:`datetime.datetime` or ``NoneType``
4966 :params retention_expiration_time:
4967 (Optional) The earliest time that the object can be deleted, which depends on any
4968 retention configuration set for the object and any retention policy set for the bucket
4969 that contains the object. This value should normally only be set by the back-end API.
4970 """
4971
4972 def __init__(
4973 self,
4974 blob,
4975 mode=None,
4976 retain_until_time=None,
4977 retention_expiration_time=None,
4978 ):
4979 data = {"mode": mode}
4980 if retain_until_time is not None:
4981 retain_until_time = _datetime_to_rfc3339(retain_until_time)
4982 data["retainUntilTime"] = retain_until_time
4983
4984 if retention_expiration_time is not None:
4985 retention_expiration_time = _datetime_to_rfc3339(retention_expiration_time)
4986 data["retentionExpirationTime"] = retention_expiration_time
4987
4988 super(Retention, self).__init__(data)
4989 self._blob = blob
4990
4991 @classmethod
4992 def from_api_repr(cls, resource, blob):
4993 """Factory: construct instance from resource.
4994
4995 :type blob: :class:`Blob`
4996 :params blob: Blob for which this retention configuration applies to.
4997
4998 :type resource: dict
4999 :param resource: mapping as returned from API call.
5000
5001 :rtype: :class:`Retention`
5002 :returns: Retention configuration created from resource.
5003 """
5004 instance = cls(blob)
5005 instance.update(resource)
5006 return instance
5007
5008 @property
5009 def blob(self):
5010 """Blob for which this retention configuration applies to.
5011
5012 :rtype: :class:`Blob`
5013 :returns: the instance's blob.
5014 """
5015 return self._blob
5016
5017 @property
5018 def mode(self):
5019 """The mode of the retention configuration. Options are 'Unlocked' or 'Locked'.
5020
5021 :rtype: string
5022 :returns: The mode of the retention configuration, which can be either set to 'Unlocked' or 'Locked'.
5023 """
5024 return self.get("mode")
5025
5026 @mode.setter
5027 def mode(self, value):
5028 self["mode"] = value
5029 self.blob._patch_property("retention", self)
5030
5031 @property
5032 def retain_until_time(self):
5033 """The earliest time that the object can be deleted or replaced, which is the
5034 retention configuration set for this object.
5035
5036 :rtype: :class:`datetime.datetime` or ``NoneType``
5037 :returns: Datetime object parsed from RFC3339 valid timestamp, or
5038 ``None`` if the blob's resource has not been loaded from
5039 the server (see :meth:`reload`).
5040 """
5041 value = self.get("retainUntilTime")
5042 if value is not None:
5043 return _rfc3339_nanos_to_datetime(value)
5044
5045 @retain_until_time.setter
5046 def retain_until_time(self, value):
5047 """Set the retain_until_time for the object retention configuration.
5048
5049 :type value: :class:`datetime.datetime`
5050 :param value: The earliest time that the object can be deleted or replaced.
5051 """
5052 if value is not None:
5053 value = _datetime_to_rfc3339(value)
5054 self["retainUntilTime"] = value
5055 self.blob._patch_property("retention", self)
5056
5057 @property
5058 def retention_expiration_time(self):
5059 """The earliest time that the object can be deleted, which depends on any
5060 retention configuration set for the object and any retention policy set for
5061 the bucket that contains the object.
5062
5063 :rtype: :class:`datetime.datetime` or ``NoneType``
5064 :returns:
5065 (readonly) The earliest time that the object can be deleted.
5066 """
5067 retention_expiration_time = self.get("retentionExpirationTime")
5068 if retention_expiration_time is not None:
5069 return _rfc3339_nanos_to_datetime(retention_expiration_time)