1# Copyright 2014 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15# pylint: disable=too-many-lines
16
17"""Create / interact with Google Cloud Storage blobs."""
18
19import base64
20import copy
21import hashlib
22from io import BytesIO
23from io import TextIOWrapper
24import logging
25import mimetypes
26import os
27import re
28from email.parser import HeaderParser
29from urllib.parse import parse_qsl
30from urllib.parse import quote
31from urllib.parse import urlencode
32from urllib.parse import urlsplit
33from urllib.parse import urlunsplit
34import warnings
35
36from google.cloud.storage._media.requests import ChunkedDownload
37from google.cloud.storage._media.requests import Download
38from google.cloud.storage._media.requests import RawDownload
39from google.cloud.storage._media.requests import RawChunkedDownload
40from google.cloud.storage._media.requests import MultipartUpload
41from google.cloud.storage._media.requests import ResumableUpload
42
43from google.api_core.iam import Policy
44from google.cloud import exceptions
45from google.cloud._helpers import _bytes_to_unicode
46from google.cloud._helpers import _datetime_to_rfc3339
47from google.cloud._helpers import _rfc3339_nanos_to_datetime
48from google.cloud._helpers import _to_bytes
49from google.cloud.exceptions import NotFound
50from google.cloud.storage._opentelemetry_tracing import (
51 _get_opentelemetry_attributes_from_url,
52)
53from google.cloud.storage._helpers import _add_etag_match_headers
54from google.cloud.storage._helpers import _add_generation_match_parameters
55from google.cloud.storage._helpers import _PropertyMixin
56from google.cloud.storage._helpers import _scalar_property
57from google.cloud.storage._helpers import _bucket_bound_hostname_url
58from google.cloud.storage._helpers import _raise_if_more_than_one_set
59from google.cloud.storage._helpers import _get_default_headers
60from google.cloud.storage._helpers import _get_default_storage_base_url
61from google.cloud.storage._signing import generate_signed_url_v2
62from google.cloud.storage._signing import generate_signed_url_v4
63from google.cloud.storage._helpers import _API_VERSION
64from google.cloud.storage._helpers import _virtual_hosted_style_base_url
65from google.cloud.storage._opentelemetry_tracing import create_trace_span
66from google.cloud.storage.acl import ACL
67from google.cloud.storage.acl import ObjectACL
68from google.cloud.storage.constants import _DEFAULT_TIMEOUT
69from google.cloud.storage.constants import ARCHIVE_STORAGE_CLASS
70from google.cloud.storage.constants import COLDLINE_STORAGE_CLASS
71from google.cloud.storage.constants import MULTI_REGIONAL_LEGACY_STORAGE_CLASS
72from google.cloud.storage.constants import NEARLINE_STORAGE_CLASS
73from google.cloud.storage.constants import REGIONAL_LEGACY_STORAGE_CLASS
74from google.cloud.storage.constants import STANDARD_STORAGE_CLASS
75from google.cloud.storage.exceptions import DataCorruption
76from google.cloud.storage.exceptions import InvalidResponse
77from google.cloud.storage.retry import ConditionalRetryPolicy
78from google.cloud.storage.retry import DEFAULT_RETRY
79from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON
80from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED
81from google.cloud.storage.fileio import BlobReader
82from google.cloud.storage.fileio import BlobWriter
83
84
85_DEFAULT_CONTENT_TYPE = "application/octet-stream"
86_DOWNLOAD_URL_TEMPLATE = "{hostname}/download/storage/{api_version}{path}?alt=media"
87_BASE_UPLOAD_TEMPLATE = (
88 "{hostname}/upload/storage/{api_version}{bucket_path}/o?uploadType="
89)
90_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart"
91_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable"
92# NOTE: "acl" is also writeable but we defer ACL management to
93# the classes in the google.cloud.storage.acl module.
94_CONTENT_TYPE_FIELD = "contentType"
95_WRITABLE_FIELDS = (
96 "cacheControl",
97 "contentDisposition",
98 "contentEncoding",
99 "contentLanguage",
100 _CONTENT_TYPE_FIELD,
101 "crc32c",
102 "customTime",
103 "md5Hash",
104 "metadata",
105 "name",
106 "retention",
107 "storageClass",
108)
109_READ_LESS_THAN_SIZE = (
110 "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining."
111)
112_CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE = (
113 "A checksum of type `{}` was requested, but checksumming is not available "
114 "for downloads when chunk_size is set."
115)
116_COMPOSE_IF_GENERATION_LIST_DEPRECATED = (
117 "'if_generation_match: type list' is deprecated and supported for "
118 "backwards-compatability reasons only. Use 'if_source_generation_match' "
119 "instead' to match source objects' generations."
120)
121_COMPOSE_IF_GENERATION_LIST_AND_IF_SOURCE_GENERATION_ERROR = (
122 "Use 'if_generation_match' to match the generation of the destination "
123 "object by passing in a generation number, instead of a list. "
124 "Use 'if_source_generation_match' to match source objects generations."
125)
126_COMPOSE_IF_METAGENERATION_LIST_DEPRECATED = (
127 "'if_metageneration_match: type list' is deprecated and supported for "
128 "backwards-compatability reasons only. Note that the metageneration to "
129 "be matched is that of the destination blob. Please pass in a single "
130 "value (type long)."
131)
132_COMPOSE_IF_SOURCE_GENERATION_MISMATCH_ERROR = (
133 "'if_source_generation_match' length must be the same as 'sources' length"
134)
135_DOWNLOAD_AS_STRING_DEPRECATED = (
136 "Blob.download_as_string() is deprecated and will be removed in future. "
137 "Use Blob.download_as_bytes() instead."
138)
139_FROM_STRING_DEPRECATED = (
140 "Blob.from_string() is deprecated and will be removed in future. "
141 "Use Blob.from_uri() instead."
142)
143_GS_URL_REGEX_PATTERN = re.compile(
144 r"(?P<scheme>gs)://(?P<bucket_name>[a-z0-9_.-]+)/(?P<object_name>.+)"
145)
146
147_DEFAULT_CHUNKSIZE = 104857600 # 1024 * 1024 B * 100 = 100 MiB
148_MAX_MULTIPART_SIZE = 8388608 # 8 MiB
149
150_logger = logging.getLogger(__name__)
151
152
153class Blob(_PropertyMixin):
154 """A wrapper around Cloud Storage's concept of an ``Object``.
155
156 :type name: str
157 :param name: The name of the blob. This corresponds to the unique path of
158 the object in the bucket. If bytes, will be converted to a
159 unicode object. Blob / object names can contain any sequence
160 of valid unicode characters, of length 1-1024 bytes when
161 UTF-8 encoded.
162
163 :type bucket: :class:`google.cloud.storage.bucket.Bucket`
164 :param bucket: The bucket to which this blob belongs.
165
166 :type chunk_size: int
167 :param chunk_size:
168 (Optional) The size of a chunk of data whenever iterating (in bytes).
169 This must be a multiple of 256 KB per the API specification. If not
170 specified, the chunk_size of the blob itself is used. If that is not
171 specified, a default value of 40 MB is used.
172
173 :type encryption_key: bytes
174 :param encryption_key:
175 (Optional) 32 byte encryption key for customer-supplied encryption.
176 See https://cloud.google.com/storage/docs/encryption#customer-supplied.
177
178 :type kms_key_name: str
179 :param kms_key_name:
180 (Optional) Resource name of Cloud KMS key used to encrypt the blob's
181 contents.
182
183 :type generation: long
184 :param generation:
185 (Optional) If present, selects a specific revision of this object.
186 """
187
188 _chunk_size = None # Default value for each instance.
189 _CHUNK_SIZE_MULTIPLE = 256 * 1024
190 """Number (256 KB, in bytes) that must divide the chunk size."""
191
192 STORAGE_CLASSES = (
193 STANDARD_STORAGE_CLASS,
194 NEARLINE_STORAGE_CLASS,
195 COLDLINE_STORAGE_CLASS,
196 ARCHIVE_STORAGE_CLASS,
197 MULTI_REGIONAL_LEGACY_STORAGE_CLASS,
198 REGIONAL_LEGACY_STORAGE_CLASS,
199 )
200 """Allowed values for :attr:`storage_class`.
201
202 See
203 https://cloud.google.com/storage/docs/json_api/v1/objects#storageClass
204 https://cloud.google.com/storage/docs/per-object-storage-class
205
206 .. note::
207 This list does not include 'DURABLE_REDUCED_AVAILABILITY', which
208 is only documented for buckets (and deprecated).
209 """
210
211 def __init__(
212 self,
213 name,
214 bucket,
215 chunk_size=None,
216 encryption_key=None,
217 kms_key_name=None,
218 generation=None,
219 ):
220 """
221 property :attr:`name`
222 Get the blob's name.
223 """
224 name = _bytes_to_unicode(name)
225 super(Blob, self).__init__(name=name)
226
227 self.chunk_size = chunk_size # Check that setter accepts value.
228 self._bucket = bucket
229 self._acl = ObjectACL(self)
230 _raise_if_more_than_one_set(
231 encryption_key=encryption_key, kms_key_name=kms_key_name
232 )
233
234 self._encryption_key = encryption_key
235
236 if kms_key_name is not None:
237 self._properties["kmsKeyName"] = kms_key_name
238
239 if generation is not None:
240 self._properties["generation"] = generation
241
242 @property
243 def bucket(self):
244 """Bucket which contains the object.
245
246 :rtype: :class:`~google.cloud.storage.bucket.Bucket`
247 :returns: The object's bucket.
248 """
249 return self._bucket
250
251 @property
252 def chunk_size(self):
253 """Get the blob's default chunk size.
254
255 :rtype: int or ``NoneType``
256 :returns: The current blob's chunk size, if it is set.
257 """
258 return self._chunk_size
259
260 @chunk_size.setter
261 def chunk_size(self, value):
262 """Set the blob's default chunk size.
263
264 :type value: int
265 :param value: (Optional) The current blob's chunk size, if it is set.
266
267 :raises: :class:`ValueError` if ``value`` is not ``None`` and is not a
268 multiple of 256 KB.
269 """
270 if value is not None and value > 0 and value % self._CHUNK_SIZE_MULTIPLE != 0:
271 raise ValueError(
272 "Chunk size must be a multiple of %d." % (self._CHUNK_SIZE_MULTIPLE,)
273 )
274 self._chunk_size = value
275
276 @property
277 def encryption_key(self):
278 """Retrieve the customer-supplied encryption key for the object.
279
280 :rtype: bytes or ``NoneType``
281 :returns:
282 The encryption key or ``None`` if no customer-supplied encryption key was used,
283 or the blob's resource has not been loaded from the server.
284 """
285 return self._encryption_key
286
287 @encryption_key.setter
288 def encryption_key(self, value):
289 """Set the blob's encryption key.
290
291 See https://cloud.google.com/storage/docs/encryption#customer-supplied
292
293 To perform a key rotation for an encrypted blob, use :meth:`rewrite`.
294 See https://cloud.google.com/storage/docs/encryption/using-customer-supplied-keys?hl=ca#rotating
295
296 :type value: bytes
297 :param value: 32 byte encryption key for customer-supplied encryption.
298 """
299 self._encryption_key = value
300
301 @staticmethod
302 def path_helper(bucket_path, blob_name):
303 """Relative URL path for a blob.
304
305 :type bucket_path: str
306 :param bucket_path: The URL path for a bucket.
307
308 :type blob_name: str
309 :param blob_name: The name of the blob.
310
311 :rtype: str
312 :returns: The relative URL path for ``blob_name``.
313 """
314 return bucket_path + "/o/" + _quote(blob_name)
315
316 @property
317 def acl(self):
318 """Create our ACL on demand."""
319 return self._acl
320
321 def __repr__(self):
322 if self.bucket:
323 bucket_name = self.bucket.name
324 else:
325 bucket_name = None
326
327 return f"<Blob: {bucket_name}, {self.name}, {self.generation}>"
328
329 @property
330 def path(self):
331 """Getter property for the URL path to this Blob.
332
333 :rtype: str
334 :returns: The URL path to this Blob.
335 """
336 if not self.name:
337 raise ValueError("Cannot determine path without a blob name.")
338
339 return self.path_helper(self.bucket.path, self.name)
340
341 @property
342 def client(self):
343 """The client bound to this blob."""
344 return self.bucket.client
345
346 @property
347 def user_project(self):
348 """Project ID billed for API requests made via this blob.
349
350 Derived from bucket's value.
351
352 :rtype: str
353 """
354 return self.bucket.user_project
355
356 def _encryption_headers(self):
357 """Return any encryption headers needed to fetch the object.
358
359 :rtype: List(Tuple(str, str))
360 :returns: a list of tuples to be passed as headers.
361 """
362 return _get_encryption_headers(self._encryption_key)
363
364 @property
365 def _query_params(self):
366 """Default query parameters."""
367 params = {}
368 if self.generation is not None:
369 params["generation"] = self.generation
370 if self.user_project is not None:
371 params["userProject"] = self.user_project
372 return params
373
374 @property
375 def public_url(self):
376 """The public URL for this blob.
377
378 Use :meth:`make_public` to enable anonymous access via the returned
379 URL.
380
381 :rtype: `string`
382 :returns: The public URL for this blob.
383 """
384 if self.client:
385 endpoint = self.client.api_endpoint
386 else:
387 endpoint = _get_default_storage_base_url()
388 return "{storage_base_url}/{bucket_name}/{quoted_name}".format(
389 storage_base_url=endpoint,
390 bucket_name=self.bucket.name,
391 quoted_name=_quote(self.name, safe=b"/~"),
392 )
393
394 @classmethod
395 def from_uri(cls, uri, client=None):
396 """Get a constructor for blob object by URI.
397
398 .. code-block:: python
399
400 from google.cloud import storage
401 from google.cloud.storage.blob import Blob
402 client = storage.Client()
403 blob = Blob.from_uri("gs://bucket/object", client=client)
404
405 :type uri: str
406 :param uri: The blob uri following a gs://bucket/object pattern.
407 Both a bucket and object name is required to construct a blob object.
408
409 :type client: :class:`~google.cloud.storage.client.Client`
410 :param client:
411 (Optional) The client to use. Application code should
412 *always* pass ``client``.
413
414 :rtype: :class:`google.cloud.storage.blob.Blob`
415 :returns: The blob object created.
416 """
417 from google.cloud.storage.bucket import Bucket
418
419 match = _GS_URL_REGEX_PATTERN.match(uri)
420 if not match:
421 raise ValueError("URI pattern must be gs://bucket/object")
422 bucket = Bucket(client, name=match.group("bucket_name"))
423 return cls(match.group("object_name"), bucket)
424
425 @classmethod
426 def from_string(cls, uri, client=None):
427 """(Deprecated) Get a constructor for blob object by URI.
428
429 .. note::
430 Deprecated alias for :meth:`from_uri`.
431
432 .. code-block:: python
433
434 from google.cloud import storage
435 from google.cloud.storage.blob import Blob
436 client = storage.Client()
437 blob = Blob.from_string("gs://bucket/object", client=client)
438
439 :type uri: str
440 :param uri: The blob uri following a gs://bucket/object pattern.
441 Both a bucket and object name is required to construct a blob object.
442
443 :type client: :class:`~google.cloud.storage.client.Client`
444 :param client:
445 (Optional) The client to use. Application code should
446 *always* pass ``client``.
447
448 :rtype: :class:`google.cloud.storage.blob.Blob`
449 :returns: The blob object created.
450 """
451 warnings.warn(_FROM_STRING_DEPRECATED, PendingDeprecationWarning, stacklevel=2)
452 return Blob.from_uri(uri=uri, client=client)
453
454 def generate_signed_url(
455 self,
456 expiration=None,
457 api_access_endpoint=None,
458 method="GET",
459 content_md5=None,
460 content_type=None,
461 response_disposition=None,
462 response_type=None,
463 generation=None,
464 headers=None,
465 query_parameters=None,
466 client=None,
467 credentials=None,
468 version=None,
469 service_account_email=None,
470 access_token=None,
471 virtual_hosted_style=False,
472 bucket_bound_hostname=None,
473 scheme="http",
474 ):
475 """Generates a signed URL for this blob.
476
477 .. note::
478
479 If you are on Google Compute Engine, you can't generate a signed
480 URL using GCE service account.
481 If you'd like to be able to generate a signed URL from GCE,
482 you can use a standard service account from a JSON file rather
483 than a GCE service account.
484
485 If you have a blob that you want to allow access to for a set
486 amount of time, you can use this method to generate a URL that
487 is only valid within a certain time period.
488
489 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-generate-signed-url-v4#storage_generate_signed_url_v4-python).
490
491 This is particularly useful if you don't want publicly
492 accessible blobs, but don't want to require users to explicitly
493 log in.
494
495 If ``bucket_bound_hostname`` is set as an argument of :attr:`api_access_endpoint`,
496 ``https`` works only if using a ``CDN``.
497
498 :type expiration: Union[Integer, datetime.datetime, datetime.timedelta]
499 :param expiration:
500 Point in time when the signed URL should expire. If a ``datetime``
501 instance is passed without an explicit ``tzinfo`` set, it will be
502 assumed to be ``UTC``.
503
504 :type api_access_endpoint: str
505 :param api_access_endpoint: (Optional) URI base, for instance
506 "https://storage.googleapis.com". If not specified, the client's
507 api_endpoint will be used. Incompatible with bucket_bound_hostname.
508
509 :type method: str
510 :param method: The HTTP verb that will be used when requesting the URL.
511
512 :type content_md5: str
513 :param content_md5:
514 (Optional) The MD5 hash of the object referenced by ``resource``.
515
516 :type content_type: str
517 :param content_type:
518 (Optional) The content type of the object referenced by
519 ``resource``.
520
521 :type response_disposition: str
522 :param response_disposition:
523 (Optional) Content disposition of responses to requests for the
524 signed URL. For example, to enable the signed URL to initiate a
525 file of ``blog.png``, use the value ``'attachment;
526 filename=blob.png'``.
527
528 :type response_type: str
529 :param response_type:
530 (Optional) Content type of responses to requests for the signed
531 URL. Ignored if content_type is set on object/blob metadata.
532
533 :type generation: str
534 :param generation:
535 (Optional) A value that indicates which generation of the resource
536 to fetch.
537
538 :type headers: dict
539 :param headers:
540 (Optional) Additional HTTP headers to be included as part of the
541 signed URLs. See:
542 https://cloud.google.com/storage/docs/xml-api/reference-headers
543 Requests using the signed URL *must* pass the specified header
544 (name and value) with each request for the URL.
545
546 :type query_parameters: dict
547 :param query_parameters:
548 (Optional) Additional query parameters to be included as part of the
549 signed URLs. See:
550 https://cloud.google.com/storage/docs/xml-api/reference-headers#query
551
552 :type client: :class:`~google.cloud.storage.client.Client`
553 :param client:
554 (Optional) The client to use. If not passed, falls back to the
555 ``client`` stored on the blob's bucket.
556
557 :type credentials: :class:`google.auth.credentials.Credentials`
558 :param credentials:
559 (Optional) The authorization credentials to attach to requests.
560 These credentials identify this application to the service. If
561 none are specified, the client will attempt to ascertain the
562 credentials from the environment.
563
564 :type version: str
565 :param version:
566 (Optional) The version of signed credential to create. Must be one
567 of 'v2' | 'v4'.
568
569 :type service_account_email: str
570 :param service_account_email:
571 (Optional) E-mail address of the service account.
572
573 :type access_token: str
574 :param access_token: (Optional) Access token for a service account.
575
576 :type virtual_hosted_style: bool
577 :param virtual_hosted_style:
578 (Optional) If true, then construct the URL relative the bucket's
579 virtual hostname, e.g., '<bucket-name>.storage.googleapis.com'.
580 Incompatible with bucket_bound_hostname.
581
582 :type bucket_bound_hostname: str
583 :param bucket_bound_hostname:
584 (Optional) If passed, then construct the URL relative to the bucket-bound hostname.
585 Value can be a bare or with scheme, e.g., 'example.com' or 'http://example.com'.
586 Incompatible with api_access_endpoint and virtual_hosted_style.
587 See: https://cloud.google.com/storage/docs/request-endpoints#cname
588
589 :type scheme: str
590 :param scheme:
591 (Optional) If ``bucket_bound_hostname`` is passed as a bare
592 hostname, use this value as the scheme. ``https`` will work only
593 when using a CDN. Defaults to ``"http"``.
594
595 :raises: :exc:`ValueError` when version is invalid or mutually exclusive arguments are used.
596 :raises: :exc:`TypeError` when expiration is not a valid type.
597 :raises: :exc:`AttributeError` if credentials is not an instance
598 of :class:`google.auth.credentials.Signing`.
599
600 :rtype: str
601 :returns: A signed URL you can use to access the resource
602 until expiration.
603 """
604 if version is None:
605 version = "v2"
606 elif version not in ("v2", "v4"):
607 raise ValueError("'version' must be either 'v2' or 'v4'")
608
609 if (
610 api_access_endpoint is not None or virtual_hosted_style
611 ) and bucket_bound_hostname:
612 raise ValueError(
613 "The bucket_bound_hostname argument is not compatible with "
614 "either api_access_endpoint or virtual_hosted_style."
615 )
616
617 if api_access_endpoint is None:
618 client = self._require_client(client)
619 api_access_endpoint = client.api_endpoint
620
621 quoted_name = _quote(self.name, safe=b"/~")
622
623 # If you are on Google Compute Engine, you can't generate a signed URL
624 # using GCE service account.
625 # See https://github.com/googleapis/google-auth-library-python/issues/50
626 if virtual_hosted_style:
627 api_access_endpoint = _virtual_hosted_style_base_url(
628 api_access_endpoint, self.bucket.name
629 )
630 resource = f"/{quoted_name}"
631 elif bucket_bound_hostname:
632 api_access_endpoint = _bucket_bound_hostname_url(
633 bucket_bound_hostname, scheme
634 )
635 resource = f"/{quoted_name}"
636 else:
637 resource = f"/{self.bucket.name}/{quoted_name}"
638
639 if credentials is None:
640 client = self._require_client(client) # May be redundant, but that's ok.
641 credentials = client._credentials
642
643 client = self._require_client(client)
644 universe_domain = client.universe_domain
645
646 if version == "v2":
647 helper = generate_signed_url_v2
648 else:
649 helper = generate_signed_url_v4
650
651 if self._encryption_key is not None:
652 encryption_headers = _get_encryption_headers(self._encryption_key)
653 if headers is None:
654 headers = {}
655 if version == "v2":
656 # See: https://cloud.google.com/storage/docs/access-control/signed-urls-v2#about-canonical-extension-headers
657 v2_copy_only = "X-Goog-Encryption-Algorithm"
658 headers[v2_copy_only] = encryption_headers[v2_copy_only]
659 else:
660 headers.update(encryption_headers)
661
662 return helper(
663 credentials,
664 resource=resource,
665 expiration=expiration,
666 api_access_endpoint=api_access_endpoint,
667 method=method.upper(),
668 content_md5=content_md5,
669 content_type=content_type,
670 response_type=response_type,
671 response_disposition=response_disposition,
672 generation=generation,
673 headers=headers,
674 query_parameters=query_parameters,
675 service_account_email=service_account_email,
676 access_token=access_token,
677 universe_domain=universe_domain,
678 )
679
680 def exists(
681 self,
682 client=None,
683 if_etag_match=None,
684 if_etag_not_match=None,
685 if_generation_match=None,
686 if_generation_not_match=None,
687 if_metageneration_match=None,
688 if_metageneration_not_match=None,
689 timeout=_DEFAULT_TIMEOUT,
690 retry=DEFAULT_RETRY,
691 soft_deleted=None,
692 ):
693 """Determines whether or not this blob exists.
694
695 If :attr:`user_project` is set on the bucket, bills the API request
696 to that project.
697
698 :type client: :class:`~google.cloud.storage.client.Client`
699 :param client:
700 (Optional) The client to use. If not passed, falls back to the
701 ``client`` stored on the blob's bucket.
702
703 :type if_etag_match: Union[str, Set[str]]
704 :param if_etag_match:
705 (Optional) See :ref:`using-if-etag-match`
706
707 :type if_etag_not_match: Union[str, Set[str]]
708 :param if_etag_not_match:
709 (Optional) See :ref:`using-if-etag-not-match`
710
711 :type if_generation_match: long
712 :param if_generation_match:
713 (Optional) See :ref:`using-if-generation-match`
714
715 :type if_generation_not_match: long
716 :param if_generation_not_match:
717 (Optional) See :ref:`using-if-generation-not-match`
718
719 :type if_metageneration_match: long
720 :param if_metageneration_match:
721 (Optional) See :ref:`using-if-metageneration-match`
722
723 :type if_metageneration_not_match: long
724 :param if_metageneration_not_match:
725 (Optional) See :ref:`using-if-metageneration-not-match`
726
727 :type timeout: float or tuple
728 :param timeout:
729 (Optional) The amount of time, in seconds, to wait
730 for the server response. See: :ref:`configuring_timeouts`
731
732 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
733 :param retry:
734 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
735
736 :type soft_deleted: bool
737 :param soft_deleted:
738 (Optional) If True, looks for a soft-deleted object. Will only return True
739 if the object exists and is in a soft-deleted state.
740 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True.
741 See: https://cloud.google.com/storage/docs/soft-delete
742
743 :rtype: bool
744 :returns: True if the blob exists in Cloud Storage.
745 """
746 with create_trace_span(name="Storage.Blob.exists"):
747 client = self._require_client(client)
748 # We only need the status code (200 or not) so we seek to
749 # minimize the returned payload.
750 query_params = self._query_params
751 query_params["fields"] = "name"
752 if soft_deleted is not None:
753 query_params["softDeleted"] = soft_deleted
754
755 _add_generation_match_parameters(
756 query_params,
757 if_generation_match=if_generation_match,
758 if_generation_not_match=if_generation_not_match,
759 if_metageneration_match=if_metageneration_match,
760 if_metageneration_not_match=if_metageneration_not_match,
761 )
762
763 headers = {}
764 _add_etag_match_headers(
765 headers,
766 if_etag_match=if_etag_match,
767 if_etag_not_match=if_etag_not_match,
768 )
769
770 try:
771 # We intentionally pass `_target_object=None` since fields=name
772 # would limit the local properties.
773 client._get_resource(
774 self.path,
775 query_params=query_params,
776 headers=headers,
777 timeout=timeout,
778 retry=retry,
779 _target_object=None,
780 )
781 except NotFound:
782 # NOTE: This will not fail immediately in a batch. However, when
783 # Batch.finish() is called, the resulting `NotFound` will be
784 # raised.
785 return False
786 return True
787
788 def delete(
789 self,
790 client=None,
791 if_generation_match=None,
792 if_generation_not_match=None,
793 if_metageneration_match=None,
794 if_metageneration_not_match=None,
795 timeout=_DEFAULT_TIMEOUT,
796 retry=DEFAULT_RETRY,
797 ):
798 """Deletes a blob from Cloud Storage.
799
800 If :attr:`user_project` is set on the bucket, bills the API request
801 to that project.
802
803 :type client: :class:`~google.cloud.storage.client.Client`
804 :param client:
805 (Optional) The client to use. If not passed, falls back to the
806 ``client`` stored on the blob's bucket.
807
808 :type if_generation_match: long
809 :param if_generation_match:
810 (Optional) See :ref:`using-if-generation-match`
811
812 :type if_generation_not_match: long
813 :param if_generation_not_match:
814 (Optional) See :ref:`using-if-generation-not-match`
815
816 :type if_metageneration_match: long
817 :param if_metageneration_match:
818 (Optional) See :ref:`using-if-metageneration-match`
819
820 :type if_metageneration_not_match: long
821 :param if_metageneration_not_match:
822 (Optional) See :ref:`using-if-metageneration-not-match`
823
824 :type timeout: float or tuple
825 :param timeout:
826 (Optional) The amount of time, in seconds, to wait
827 for the server response. See: :ref:`configuring_timeouts`
828
829 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
830 :param retry: (Optional) How to retry the RPC. A None value will disable
831 retries. A google.api_core.retry.Retry value will enable retries,
832 and the object will define retriable response codes and errors and
833 configure backoff and timeout options.
834
835 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
836 Retry object and activates it only if certain conditions are met.
837 This class exists to provide safe defaults for RPC calls that are
838 not technically safe to retry normally (due to potential data
839 duplication or other side-effects) but become safe to retry if a
840 condition such as if_generation_match is set.
841
842 See the retry.py source code and docstrings in this package
843 (google.cloud.storage.retry) for information on retry types and how
844 to configure them.
845
846 :raises: :class:`google.cloud.exceptions.NotFound`
847 (propagated from
848 :meth:`google.cloud.storage.bucket.Bucket.delete_blob`).
849 """
850 with create_trace_span(name="Storage.Blob.delete"):
851 self.bucket.delete_blob(
852 self.name,
853 client=client,
854 generation=self.generation,
855 timeout=timeout,
856 if_generation_match=if_generation_match,
857 if_generation_not_match=if_generation_not_match,
858 if_metageneration_match=if_metageneration_match,
859 if_metageneration_not_match=if_metageneration_not_match,
860 retry=retry,
861 )
862
863 def _get_transport(self, client):
864 """Return the client's transport.
865
866 :type client: :class:`~google.cloud.storage.client.Client`
867 :param client:
868 (Optional) The client to use. If not passed, falls back to the
869 ``client`` stored on the blob's bucket.
870
871 :rtype transport:
872 :class:`~google.auth.transport.requests.AuthorizedSession`
873 :returns: The transport (with credentials) that will
874 make authenticated requests.
875 """
876 client = self._require_client(client)
877 return client._http
878
879 def _get_download_url(
880 self,
881 client,
882 if_generation_match=None,
883 if_generation_not_match=None,
884 if_metageneration_match=None,
885 if_metageneration_not_match=None,
886 ):
887 """Get the download URL for the current blob.
888
889 If the ``media_link`` has been loaded, it will be used, otherwise
890 the URL will be constructed from the current blob's path (and possibly
891 generation) to avoid a round trip.
892
893 :type client: :class:`~google.cloud.storage.client.Client`
894 :param client: The client to use.
895
896 :type if_generation_match: long
897 :param if_generation_match:
898 (Optional) See :ref:`using-if-generation-match`
899
900 :type if_generation_not_match: long
901 :param if_generation_not_match:
902 (Optional) See :ref:`using-if-generation-not-match`
903
904 :type if_metageneration_match: long
905 :param if_metageneration_match:
906 (Optional) See :ref:`using-if-metageneration-match`
907
908 :type if_metageneration_not_match: long
909 :param if_metageneration_not_match:
910 (Optional) See :ref:`using-if-metageneration-not-match`
911
912 :rtype: str
913 :returns: The download URL for the current blob.
914 """
915 name_value_pairs = []
916 if self.media_link is None:
917 hostname = _get_host_name(client._connection)
918 base_url = _DOWNLOAD_URL_TEMPLATE.format(
919 hostname=hostname, path=self.path, api_version=_API_VERSION
920 )
921 if self.generation is not None:
922 name_value_pairs.append(("generation", f"{self.generation:d}"))
923 else:
924 base_url = self.media_link
925
926 if self.user_project is not None:
927 name_value_pairs.append(("userProject", self.user_project))
928
929 _add_generation_match_parameters(
930 name_value_pairs,
931 if_generation_match=if_generation_match,
932 if_generation_not_match=if_generation_not_match,
933 if_metageneration_match=if_metageneration_match,
934 if_metageneration_not_match=if_metageneration_not_match,
935 )
936 return _add_query_parameters(base_url, name_value_pairs)
937
938 def _extract_headers_from_download(self, response):
939 """Extract headers from a non-chunked request's http object.
940
941 This avoids the need to make a second request for commonly used
942 headers.
943
944 :type response:
945 :class requests.models.Response
946 :param response: The server response from downloading a non-chunked file
947 """
948 self._properties["contentEncoding"] = response.headers.get(
949 "Content-Encoding", None
950 )
951 self._properties[_CONTENT_TYPE_FIELD] = response.headers.get(
952 "Content-Type", None
953 )
954 self._properties["cacheControl"] = response.headers.get("Cache-Control", None)
955 self._properties["storageClass"] = response.headers.get(
956 "X-Goog-Storage-Class", None
957 )
958 self._properties["contentLanguage"] = response.headers.get(
959 "Content-Language", None
960 )
961 self._properties["etag"] = response.headers.get("ETag", None)
962 self._properties["generation"] = response.headers.get("X-goog-generation", None)
963 self._properties["metageneration"] = response.headers.get(
964 "X-goog-metageneration", None
965 )
966 # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==',
967 x_goog_hash = response.headers.get("X-Goog-Hash", "")
968
969 if x_goog_hash:
970 digests = {}
971 for encoded_digest in x_goog_hash.split(","):
972 match = re.match(r"(crc32c|md5)=([\w\d/\+/]+={0,3})", encoded_digest)
973 if match:
974 method, digest = match.groups()
975 digests[method] = digest
976
977 self._properties["crc32c"] = digests.get("crc32c", None)
978 self._properties["md5Hash"] = digests.get("md5", None)
979
980 def _do_download(
981 self,
982 transport,
983 file_obj,
984 download_url,
985 headers,
986 start=None,
987 end=None,
988 raw_download=False,
989 timeout=_DEFAULT_TIMEOUT,
990 checksum="auto",
991 retry=DEFAULT_RETRY,
992 single_shot_download=False,
993 ):
994 """Perform a download without any error handling.
995
996 This is intended to be called by :meth:`_prep_and_do_download` so it can
997 be wrapped with error handling / remapping.
998
999 :type transport:
1000 :class:`~google.auth.transport.requests.AuthorizedSession`
1001 :param transport:
1002 The transport (with credentials) that will make authenticated
1003 requests.
1004
1005 :type file_obj: file
1006 :param file_obj: A file handle to which to write the blob's data.
1007
1008 :type download_url: str
1009 :param download_url: The URL where the media can be accessed.
1010
1011 :type headers: dict
1012 :param headers: Headers to be sent with the request(s).
1013
1014 :type start: int
1015 :param start: (Optional) The first byte in a range to be downloaded.
1016
1017 :type end: int
1018 :param end: (Optional) The last byte in a range to be downloaded.
1019
1020 :type raw_download: bool
1021 :param raw_download:
1022 (Optional) If true, download the object without any expansion.
1023
1024 :type timeout: float or tuple
1025 :param timeout:
1026 (Optional) The amount of time, in seconds, to wait
1027 for the server response. See: :ref:`configuring_timeouts`
1028
1029 :type checksum: str
1030 :param checksum:
1031 (Optional) The type of checksum to compute to verify the integrity
1032 of the object. The response headers must contain a checksum of the
1033 requested type. If the headers lack an appropriate checksum (for
1034 instance in the case of transcoded or ranged downloads where the
1035 remote service does not know the correct checksum, including
1036 downloads where chunk_size is set) an INFO-level log will be
1037 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1038 default is "auto", which will try to detect if the C extension for
1039 crc32c is installed and fall back to md5 otherwise.
1040
1041 :type retry: google.api_core.retry.Retry
1042 :param retry: (Optional) How to retry the RPC. A None value will disable
1043 retries. A google.api_core.retry.Retry value will enable retries,
1044 and the object will configure backoff and timeout options.
1045
1046 This private method does not accept ConditionalRetryPolicy values
1047 because the information necessary to evaluate the policy is instead
1048 evaluated in blob._prep_and_do_download().
1049
1050 See the retry.py source code and docstrings in this package
1051 (google.cloud.storage.retry) for information on retry types and how
1052 to configure them.
1053
1054 :type single_shot_download: bool
1055 :param single_shot_download:
1056 (Optional) If true, download the object in a single request.
1057 Caution: Enabling this will increase the memory overload for your application.
1058 Please enable this as per your use case.
1059 """
1060
1061 extra_attributes = _get_opentelemetry_attributes_from_url(download_url)
1062 extra_attributes["download.chunk_size"] = f"{self.chunk_size}"
1063 extra_attributes["download.raw_download"] = raw_download
1064 extra_attributes["upload.checksum"] = f"{checksum}"
1065 extra_attributes["download.single_shot_download"] = single_shot_download
1066 args = {"timeout": timeout}
1067
1068 if self.chunk_size is None:
1069 if raw_download:
1070 klass = RawDownload
1071 download_class = "RawDownload"
1072 else:
1073 klass = Download
1074 download_class = "Download"
1075
1076 download = klass(
1077 download_url,
1078 stream=file_obj,
1079 headers=headers,
1080 start=start,
1081 end=end,
1082 checksum=checksum,
1083 retry=retry,
1084 # NOTE: single_shot_download is only supported in Download and RawDownload
1085 # classes, i.e., when chunk_size is set to None (the default value). It is
1086 # not supported for chunked downloads.
1087 single_shot_download=single_shot_download,
1088 )
1089 with create_trace_span(
1090 name=f"Storage.{download_class}/consume",
1091 attributes=extra_attributes,
1092 api_request=args,
1093 ):
1094 response = download.consume(transport, timeout=timeout)
1095 self._extract_headers_from_download(response)
1096 else:
1097 if checksum:
1098 msg = _CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE.format(checksum)
1099 _logger.info(msg)
1100
1101 if raw_download:
1102 klass = RawChunkedDownload
1103 download_class = "RawChunkedDownload"
1104 else:
1105 klass = ChunkedDownload
1106 download_class = "ChunkedDownload"
1107
1108 download = klass(
1109 download_url,
1110 self.chunk_size,
1111 file_obj,
1112 headers=headers,
1113 start=start if start else 0,
1114 end=end,
1115 retry=retry,
1116 )
1117
1118 with create_trace_span(
1119 name=f"Storage.{download_class}/consumeNextChunk",
1120 attributes=extra_attributes,
1121 api_request=args,
1122 ):
1123 while not download.finished:
1124 download.consume_next_chunk(transport, timeout=timeout)
1125
1126 def download_to_file(
1127 self,
1128 file_obj,
1129 client=None,
1130 start=None,
1131 end=None,
1132 raw_download=False,
1133 if_etag_match=None,
1134 if_etag_not_match=None,
1135 if_generation_match=None,
1136 if_generation_not_match=None,
1137 if_metageneration_match=None,
1138 if_metageneration_not_match=None,
1139 timeout=_DEFAULT_TIMEOUT,
1140 checksum="auto",
1141 retry=DEFAULT_RETRY,
1142 single_shot_download=False,
1143 ):
1144 """Download the contents of this blob into a file-like object.
1145
1146 .. note::
1147
1148 If the server-set property, :attr:`media_link`, is not yet
1149 initialized, makes an additional API request to load it.
1150
1151 If the :attr:`chunk_size` of a current blob is `None`, will download data
1152 in single download request otherwise it will download the :attr:`chunk_size`
1153 of data in each request.
1154
1155 For more fine-grained control over the download process, check out
1156 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
1157 For example, this library allows downloading **parts** of a blob rather than the whole thing.
1158
1159 If :attr:`user_project` is set on the bucket, bills the API request
1160 to that project.
1161
1162 :type file_obj: file
1163 :param file_obj: A file handle to which to write the blob's data.
1164
1165 :type client: :class:`~google.cloud.storage.client.Client`
1166 :param client:
1167 (Optional) The client to use. If not passed, falls back to the
1168 ``client`` stored on the blob's bucket.
1169
1170 :type start: int
1171 :param start: (Optional) The first byte in a range to be downloaded.
1172
1173 :type end: int
1174 :param end: (Optional) The last byte in a range to be downloaded.
1175
1176 :type raw_download: bool
1177 :param raw_download:
1178 (Optional) If true, download the object without any expansion.
1179
1180 :type if_etag_match: Union[str, Set[str]]
1181 :param if_etag_match:
1182 (Optional) See :ref:`using-if-etag-match`
1183
1184 :type if_etag_not_match: Union[str, Set[str]]
1185 :param if_etag_not_match:
1186 (Optional) See :ref:`using-if-etag-not-match`
1187
1188 :type if_generation_match: long
1189 :param if_generation_match:
1190 (Optional) See :ref:`using-if-generation-match`
1191
1192 :type if_generation_not_match: long
1193 :param if_generation_not_match:
1194 (Optional) See :ref:`using-if-generation-not-match`
1195
1196 :type if_metageneration_match: long
1197 :param if_metageneration_match:
1198 (Optional) See :ref:`using-if-metageneration-match`
1199
1200 :type if_metageneration_not_match: long
1201 :param if_metageneration_not_match:
1202 (Optional) See :ref:`using-if-metageneration-not-match`
1203
1204 :type timeout: float or tuple
1205 :param timeout:
1206 (Optional) The amount of time, in seconds, to wait
1207 for the server response. See: :ref:`configuring_timeouts`
1208
1209 :type checksum: str
1210 :param checksum:
1211 (Optional) The type of checksum to compute to verify the integrity
1212 of the object. The response headers must contain a checksum of the
1213 requested type. If the headers lack an appropriate checksum (for
1214 instance in the case of transcoded or ranged downloads where the
1215 remote service does not know the correct checksum, including
1216 downloads where chunk_size is set) an INFO-level log will be
1217 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1218 default is "auto", which will try to detect if the C extension for
1219 crc32c is installed and fall back to md5 otherwise.
1220
1221 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1222 :param retry: (Optional) How to retry the RPC. A None value will disable
1223 retries. A google.api_core.retry.Retry value will enable retries,
1224 and the object will define retriable response codes and errors and
1225 configure backoff and timeout options.
1226
1227 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1228 Retry object and activates it only if certain conditions are met.
1229 This class exists to provide safe defaults for RPC calls that are
1230 not technically safe to retry normally (due to potential data
1231 duplication or other side-effects) but become safe to retry if a
1232 condition such as if_metageneration_match is set.
1233
1234 See the retry.py source code and docstrings in this package
1235 (google.cloud.storage.retry) for information on retry types and how
1236 to configure them.
1237
1238 :type single_shot_download: bool
1239 :param single_shot_download:
1240 (Optional) If true, download the object in a single request.
1241 Caution: Enabling this will increase the memory overload for your application.
1242 Please enable this as per your use case.
1243
1244 :raises: :class:`google.cloud.exceptions.NotFound`
1245 """
1246 with create_trace_span(name="Storage.Blob.downloadToFile"):
1247 self._prep_and_do_download(
1248 file_obj,
1249 client=client,
1250 start=start,
1251 end=end,
1252 raw_download=raw_download,
1253 if_etag_match=if_etag_match,
1254 if_etag_not_match=if_etag_not_match,
1255 if_generation_match=if_generation_match,
1256 if_generation_not_match=if_generation_not_match,
1257 if_metageneration_match=if_metageneration_match,
1258 if_metageneration_not_match=if_metageneration_not_match,
1259 timeout=timeout,
1260 checksum=checksum,
1261 retry=retry,
1262 single_shot_download=single_shot_download,
1263 )
1264
1265 def _handle_filename_and_download(self, filename, *args, **kwargs):
1266 """Download the contents of this blob into a named file.
1267
1268 :type filename: str
1269 :param filename: A filename to be passed to ``open``.
1270
1271 For *args and **kwargs, refer to the documentation for download_to_filename() for more information.
1272 """
1273
1274 try:
1275 with open(filename, "wb") as file_obj:
1276 self._prep_and_do_download(
1277 file_obj,
1278 *args,
1279 **kwargs,
1280 )
1281
1282 except (DataCorruption, NotFound):
1283 # Delete the corrupt or empty downloaded file.
1284 os.remove(filename)
1285 raise
1286
1287 updated = self.updated
1288 if updated is not None:
1289 mtime = updated.timestamp()
1290 os.utime(file_obj.name, (mtime, mtime))
1291
1292 def download_to_filename(
1293 self,
1294 filename,
1295 client=None,
1296 start=None,
1297 end=None,
1298 raw_download=False,
1299 if_etag_match=None,
1300 if_etag_not_match=None,
1301 if_generation_match=None,
1302 if_generation_not_match=None,
1303 if_metageneration_match=None,
1304 if_metageneration_not_match=None,
1305 timeout=_DEFAULT_TIMEOUT,
1306 checksum="auto",
1307 retry=DEFAULT_RETRY,
1308 single_shot_download=False,
1309 ):
1310 """Download the contents of this blob into a named file.
1311
1312 If :attr:`user_project` is set on the bucket, bills the API request
1313 to that project.
1314
1315 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-download-encrypted-file#storage_download_encrypted_file-python)
1316 to download a file with a [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied).
1317
1318 :type filename: str
1319 :param filename: A filename to be passed to ``open``.
1320
1321 :type client: :class:`~google.cloud.storage.client.Client`
1322 :param client:
1323 (Optional) The client to use. If not passed, falls back to the
1324 ``client`` stored on the blob's bucket.
1325
1326 :type start: int
1327 :param start: (Optional) The first byte in a range to be downloaded.
1328
1329 :type end: int
1330 :param end: (Optional) The last byte in a range to be downloaded.
1331
1332 :type raw_download: bool
1333 :param raw_download:
1334 (Optional) If true, download the object without any expansion.
1335
1336 :type if_etag_match: Union[str, Set[str]]
1337 :param if_etag_match:
1338 (Optional) See :ref:`using-if-etag-match`
1339
1340 :type if_etag_not_match: Union[str, Set[str]]
1341 :param if_etag_not_match:
1342 (Optional) See :ref:`using-if-etag-not-match`
1343
1344 :type if_generation_match: long
1345 :param if_generation_match:
1346 (Optional) See :ref:`using-if-generation-match`
1347
1348 :type if_generation_not_match: long
1349 :param if_generation_not_match:
1350 (Optional) See :ref:`using-if-generation-not-match`
1351
1352 :type if_metageneration_match: long
1353 :param if_metageneration_match:
1354 (Optional) See :ref:`using-if-metageneration-match`
1355
1356 :type if_metageneration_not_match: long
1357 :param if_metageneration_not_match:
1358 (Optional) See :ref:`using-if-metageneration-not-match`
1359
1360 :type timeout: float or tuple
1361 :param timeout:
1362 (Optional) The amount of time, in seconds, to wait
1363 for the server response. See: :ref:`configuring_timeouts`
1364
1365 :type checksum: str
1366 :param checksum:
1367 (Optional) The type of checksum to compute to verify the integrity
1368 of the object. The response headers must contain a checksum of the
1369 requested type. If the headers lack an appropriate checksum (for
1370 instance in the case of transcoded or ranged downloads where the
1371 remote service does not know the correct checksum, including
1372 downloads where chunk_size is set) an INFO-level log will be
1373 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1374 default is "auto", which will try to detect if the C extension for
1375 crc32c is installed and fall back to md5 otherwise.
1376
1377 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1378 :param retry: (Optional) How to retry the RPC. A None value will disable
1379 retries. A google.api_core.retry.Retry value will enable retries,
1380 and the object will define retriable response codes and errors and
1381 configure backoff and timeout options.
1382
1383 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1384 Retry object and activates it only if certain conditions are met.
1385 This class exists to provide safe defaults for RPC calls that are
1386 not technically safe to retry normally (due to potential data
1387 duplication or other side-effects) but become safe to retry if a
1388 condition such as if_metageneration_match is set.
1389
1390 See the retry.py source code and docstrings in this package
1391 (google.cloud.storage.retry) for information on retry types and how
1392 to configure them.
1393
1394 :type single_shot_download: bool
1395 :param single_shot_download:
1396 (Optional) If true, download the object in a single request.
1397 Caution: Enabling this will increase the memory overload for your application.
1398 Please enable this as per your use case.
1399
1400 :raises: :class:`google.cloud.exceptions.NotFound`
1401 """
1402 with create_trace_span(name="Storage.Blob.downloadToFilename"):
1403 self._handle_filename_and_download(
1404 filename,
1405 client=client,
1406 start=start,
1407 end=end,
1408 raw_download=raw_download,
1409 if_etag_match=if_etag_match,
1410 if_etag_not_match=if_etag_not_match,
1411 if_generation_match=if_generation_match,
1412 if_generation_not_match=if_generation_not_match,
1413 if_metageneration_match=if_metageneration_match,
1414 if_metageneration_not_match=if_metageneration_not_match,
1415 timeout=timeout,
1416 checksum=checksum,
1417 retry=retry,
1418 single_shot_download=single_shot_download,
1419 )
1420
1421 def download_as_bytes(
1422 self,
1423 client=None,
1424 start=None,
1425 end=None,
1426 raw_download=False,
1427 if_etag_match=None,
1428 if_etag_not_match=None,
1429 if_generation_match=None,
1430 if_generation_not_match=None,
1431 if_metageneration_match=None,
1432 if_metageneration_not_match=None,
1433 timeout=_DEFAULT_TIMEOUT,
1434 checksum="auto",
1435 retry=DEFAULT_RETRY,
1436 single_shot_download=False,
1437 ):
1438 """Download the contents of this blob as a bytes object.
1439
1440 If :attr:`user_project` is set on the bucket, bills the API request
1441 to that project.
1442
1443 :type client: :class:`~google.cloud.storage.client.Client`
1444 :param client:
1445 (Optional) The client to use. If not passed, falls back to the
1446 ``client`` stored on the blob's bucket.
1447
1448 :type start: int
1449 :param start: (Optional) The first byte in a range to be downloaded.
1450
1451 :type end: int
1452 :param end: (Optional) The last byte in a range to be downloaded.
1453
1454 :type raw_download: bool
1455 :param raw_download:
1456 (Optional) If true, download the object without any expansion.
1457
1458 :type if_etag_match: Union[str, Set[str]]
1459 :param if_etag_match:
1460 (Optional) See :ref:`using-if-etag-match`
1461
1462 :type if_etag_not_match: Union[str, Set[str]]
1463 :param if_etag_not_match:
1464 (Optional) See :ref:`using-if-etag-not-match`
1465
1466 :type if_generation_match: long
1467 :param if_generation_match:
1468 (Optional) See :ref:`using-if-generation-match`
1469
1470 :type if_generation_not_match: long
1471 :param if_generation_not_match:
1472 (Optional) See :ref:`using-if-generation-not-match`
1473
1474 :type if_metageneration_match: long
1475 :param if_metageneration_match:
1476 (Optional) See :ref:`using-if-metageneration-match`
1477
1478 :type if_metageneration_not_match: long
1479 :param if_metageneration_not_match:
1480 (Optional) See :ref:`using-if-metageneration-not-match`
1481
1482 :type timeout: float or tuple
1483 :param timeout:
1484 (Optional) The amount of time, in seconds, to wait
1485 for the server response. See: :ref:`configuring_timeouts`
1486
1487 :type checksum: str
1488 :param checksum:
1489 (Optional) The type of checksum to compute to verify the integrity
1490 of the object. The response headers must contain a checksum of the
1491 requested type. If the headers lack an appropriate checksum (for
1492 instance in the case of transcoded or ranged downloads where the
1493 remote service does not know the correct checksum, including
1494 downloads where chunk_size is set) an INFO-level log will be
1495 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1496 default is "auto", which will try to detect if the C extension for
1497 crc32c is installed and fall back to md5 otherwise.
1498
1499 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1500 :param retry: (Optional) How to retry the RPC. A None value will disable
1501 retries. A google.api_core.retry.Retry value will enable retries,
1502 and the object will define retriable response codes and errors and
1503 configure backoff and timeout options.
1504
1505 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1506 Retry object and activates it only if certain conditions are met.
1507 This class exists to provide safe defaults for RPC calls that are
1508 not technically safe to retry normally (due to potential data
1509 duplication or other side-effects) but become safe to retry if a
1510 condition such as if_metageneration_match is set.
1511
1512 See the retry.py source code and docstrings in this package
1513 (google.cloud.storage.retry) for information on retry types and how
1514 to configure them.
1515
1516 :type single_shot_download: bool
1517 :param single_shot_download:
1518 (Optional) If true, download the object in a single request.
1519 Caution: Enabling this will increase the memory overload for your application.
1520 Please enable this as per your use case.
1521
1522 :rtype: bytes
1523 :returns: The data stored in this blob.
1524
1525 :raises: :class:`google.cloud.exceptions.NotFound`
1526 """
1527 with create_trace_span(name="Storage.Blob.downloadAsBytes"):
1528 string_buffer = BytesIO()
1529
1530 self._prep_and_do_download(
1531 string_buffer,
1532 client=client,
1533 start=start,
1534 end=end,
1535 raw_download=raw_download,
1536 if_etag_match=if_etag_match,
1537 if_etag_not_match=if_etag_not_match,
1538 if_generation_match=if_generation_match,
1539 if_generation_not_match=if_generation_not_match,
1540 if_metageneration_match=if_metageneration_match,
1541 if_metageneration_not_match=if_metageneration_not_match,
1542 timeout=timeout,
1543 checksum=checksum,
1544 retry=retry,
1545 single_shot_download=single_shot_download,
1546 )
1547 return string_buffer.getvalue()
1548
1549 def download_as_string(
1550 self,
1551 client=None,
1552 start=None,
1553 end=None,
1554 raw_download=False,
1555 if_etag_match=None,
1556 if_etag_not_match=None,
1557 if_generation_match=None,
1558 if_generation_not_match=None,
1559 if_metageneration_match=None,
1560 if_metageneration_not_match=None,
1561 timeout=_DEFAULT_TIMEOUT,
1562 retry=DEFAULT_RETRY,
1563 single_shot_download=False,
1564 ):
1565 """(Deprecated) Download the contents of this blob as a bytes object.
1566
1567 If :attr:`user_project` is set on the bucket, bills the API request
1568 to that project.
1569
1570 .. note::
1571 Deprecated alias for :meth:`download_as_bytes`.
1572
1573 :type client: :class:`~google.cloud.storage.client.Client`
1574 :param client:
1575 (Optional) The client to use. If not passed, falls back to the
1576 ``client`` stored on the blob's bucket.
1577
1578 :type start: int
1579 :param start: (Optional) The first byte in a range to be downloaded.
1580
1581 :type end: int
1582 :param end: (Optional) The last byte in a range to be downloaded.
1583
1584 :type raw_download: bool
1585 :param raw_download:
1586 (Optional) If true, download the object without any expansion.
1587
1588 :type if_etag_match: Union[str, Set[str]]
1589 :param if_etag_match:
1590 (Optional) See :ref:`using-if-etag-match`
1591
1592 :type if_etag_not_match: Union[str, Set[str]]
1593 :param if_etag_not_match:
1594 (Optional) See :ref:`using-if-etag-not-match`
1595
1596 :type if_generation_match: long
1597 :param if_generation_match:
1598 (Optional) See :ref:`using-if-generation-match`
1599
1600 :type if_generation_not_match: long
1601 :param if_generation_not_match:
1602 (Optional) See :ref:`using-if-generation-not-match`
1603
1604 :type if_metageneration_match: long
1605 :param if_metageneration_match:
1606 (Optional) See :ref:`using-if-metageneration-match`
1607
1608 :type if_metageneration_not_match: long
1609 :param if_metageneration_not_match:
1610 (Optional) See :ref:`using-if-metageneration-not-match`
1611
1612 :type timeout: float or tuple
1613 :param timeout:
1614 (Optional) The amount of time, in seconds, to wait
1615 for the server response. See: :ref:`configuring_timeouts`
1616
1617 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1618 :param retry: (Optional) How to retry the RPC. A None value will disable
1619 retries. A google.api_core.retry.Retry value will enable retries,
1620 and the object will define retriable response codes and errors and
1621 configure backoff and timeout options.
1622
1623 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1624 Retry object and activates it only if certain conditions are met.
1625 This class exists to provide safe defaults for RPC calls that are
1626 not technically safe to retry normally (due to potential data
1627 duplication or other side-effects) but become safe to retry if a
1628 condition such as if_metageneration_match is set.
1629
1630 See the retry.py source code and docstrings in this package
1631 (google.cloud.storage.retry) for information on retry types and how
1632 to configure them.
1633
1634 :type single_shot_download: bool
1635 :param single_shot_download:
1636 (Optional) If true, download the object in a single request.
1637 Caution: Enabling this will increase the memory overload for your application.
1638 Please enable this as per your use case.
1639
1640 :rtype: bytes
1641 :returns: The data stored in this blob.
1642
1643 :raises: :class:`google.cloud.exceptions.NotFound`
1644 """
1645 warnings.warn(
1646 _DOWNLOAD_AS_STRING_DEPRECATED,
1647 PendingDeprecationWarning,
1648 stacklevel=2,
1649 )
1650 with create_trace_span(name="Storage.Blob.downloadAsString"):
1651 return self.download_as_bytes(
1652 client=client,
1653 start=start,
1654 end=end,
1655 raw_download=raw_download,
1656 if_etag_match=if_etag_match,
1657 if_etag_not_match=if_etag_not_match,
1658 if_generation_match=if_generation_match,
1659 if_generation_not_match=if_generation_not_match,
1660 if_metageneration_match=if_metageneration_match,
1661 if_metageneration_not_match=if_metageneration_not_match,
1662 timeout=timeout,
1663 retry=retry,
1664 single_shot_download=single_shot_download,
1665 )
1666
1667 def download_as_text(
1668 self,
1669 client=None,
1670 start=None,
1671 end=None,
1672 raw_download=False,
1673 encoding=None,
1674 if_etag_match=None,
1675 if_etag_not_match=None,
1676 if_generation_match=None,
1677 if_generation_not_match=None,
1678 if_metageneration_match=None,
1679 if_metageneration_not_match=None,
1680 timeout=_DEFAULT_TIMEOUT,
1681 retry=DEFAULT_RETRY,
1682 single_shot_download=False,
1683 ):
1684 """Download the contents of this blob as text (*not* bytes).
1685
1686 If :attr:`user_project` is set on the bucket, bills the API request
1687 to that project.
1688
1689 :type client: :class:`~google.cloud.storage.client.Client`
1690 :param client:
1691 (Optional) The client to use. If not passed, falls back to the
1692 ``client`` stored on the blob's bucket.
1693
1694 :type start: int
1695 :param start: (Optional) The first byte in a range to be downloaded.
1696
1697 :type end: int
1698 :param end: (Optional) The last byte in a range to be downloaded.
1699
1700 :type raw_download: bool
1701 :param raw_download:
1702 (Optional) If true, download the object without any expansion.
1703
1704 :type encoding: str
1705 :param encoding: (Optional) encoding to be used to decode the
1706 downloaded bytes. Defaults to the ``charset`` param of
1707 attr:`content_type`, or else to "utf-8".
1708
1709 :type if_etag_match: Union[str, Set[str]]
1710 :param if_etag_match:
1711 (Optional) See :ref:`using-if-etag-match`
1712
1713 :type if_etag_not_match: Union[str, Set[str]]
1714 :param if_etag_not_match:
1715 (Optional) See :ref:`using-if-etag-not-match`
1716
1717 :type if_generation_match: long
1718 :param if_generation_match:
1719 (Optional) See :ref:`using-if-generation-match`
1720
1721 :type if_generation_not_match: long
1722 :param if_generation_not_match:
1723 (Optional) See :ref:`using-if-generation-not-match`
1724
1725 :type if_metageneration_match: long
1726 :param if_metageneration_match:
1727 (Optional) See :ref:`using-if-metageneration-match`
1728
1729 :type if_metageneration_not_match: long
1730 :param if_metageneration_not_match:
1731 (Optional) See :ref:`using-if-metageneration-not-match`
1732
1733 :type timeout: float or tuple
1734 :param timeout:
1735 (Optional) The amount of time, in seconds, to wait
1736 for the server response. See: :ref:`configuring_timeouts`
1737
1738 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1739 :param retry: (Optional) How to retry the RPC. A None value will disable
1740 retries. A google.api_core.retry.Retry value will enable retries,
1741 and the object will define retriable response codes and errors and
1742 configure backoff and timeout options.
1743
1744 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1745 Retry object and activates it only if certain conditions are met.
1746 This class exists to provide safe defaults for RPC calls that are
1747 not technically safe to retry normally (due to potential data
1748 duplication or other side-effects) but become safe to retry if a
1749 condition such as if_metageneration_match is set.
1750
1751 See the retry.py source code and docstrings in this package
1752 (google.cloud.storage.retry) for information on retry types and how
1753 to configure them.
1754
1755 :type single_shot_download: bool
1756 :param single_shot_download:
1757 (Optional) If true, download the object in a single request.
1758 Caution: Enabling this will increase the memory overload for your application.
1759 Please enable this as per your use case.
1760
1761 :rtype: text
1762 :returns: The data stored in this blob, decoded to text.
1763 """
1764 with create_trace_span(name="Storage.Blob.downloadAsText"):
1765 data = self.download_as_bytes(
1766 client=client,
1767 start=start,
1768 end=end,
1769 raw_download=raw_download,
1770 if_etag_match=if_etag_match,
1771 if_etag_not_match=if_etag_not_match,
1772 if_generation_match=if_generation_match,
1773 if_generation_not_match=if_generation_not_match,
1774 if_metageneration_match=if_metageneration_match,
1775 if_metageneration_not_match=if_metageneration_not_match,
1776 timeout=timeout,
1777 retry=retry,
1778 single_shot_download=single_shot_download,
1779 )
1780
1781 if encoding is not None:
1782 return data.decode(encoding)
1783
1784 if self.content_type is not None:
1785 msg = HeaderParser().parsestr("Content-Type: " + self.content_type)
1786 params = dict(msg.get_params()[1:])
1787 if "charset" in params:
1788 return data.decode(params["charset"])
1789
1790 return data.decode("utf-8")
1791
1792 def _get_content_type(self, content_type, filename=None):
1793 """Determine the content type from the current object.
1794
1795 The return value will be determined in order of precedence:
1796
1797 - The value passed in to this method (if not :data:`None`)
1798 - The value stored on the current blob
1799 - The default value ('application/octet-stream')
1800
1801 :type content_type: str
1802 :param content_type: (Optional) Type of content.
1803
1804 :type filename: str
1805 :param filename:
1806 (Optional) The name of the file where the content is stored.
1807
1808 :rtype: str
1809 :returns: Type of content gathered from the object.
1810 """
1811 if content_type is None:
1812 content_type = self.content_type
1813
1814 if content_type is None and filename is not None:
1815 content_type, _ = mimetypes.guess_type(filename)
1816
1817 if content_type is None:
1818 content_type = _DEFAULT_CONTENT_TYPE
1819
1820 return content_type
1821
1822 def _get_writable_metadata(self):
1823 """Get the object / blob metadata which is writable.
1824
1825 This is intended to be used when creating a new object / blob.
1826
1827 See the [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects)
1828 for more information, the fields marked as writable are:
1829
1830 * ``acl``
1831 * ``cacheControl``
1832 * ``contentDisposition``
1833 * ``contentEncoding``
1834 * ``contentLanguage``
1835 * ``contentType``
1836 * ``crc32c``
1837 * ``customTime``
1838 * ``md5Hash``
1839 * ``metadata``
1840 * ``name``
1841 * ``retention``
1842 * ``storageClass``
1843
1844 For now, we don't support ``acl``, access control lists should be
1845 managed directly through :class:`ObjectACL` methods.
1846 """
1847 # NOTE: This assumes `self.name` is unicode.
1848 object_metadata = {"name": self.name}
1849 for key in self._changes:
1850 if key in _WRITABLE_FIELDS:
1851 object_metadata[key] = self._properties[key]
1852
1853 return object_metadata
1854
1855 def _get_upload_arguments(self, client, content_type, filename=None, command=None):
1856 """Get required arguments for performing an upload.
1857
1858 The content type returned will be determined in order of precedence:
1859
1860 - The value passed in to this method (if not :data:`None`)
1861 - The value stored on the current blob
1862 - The default value ('application/octet-stream')
1863
1864 :type content_type: str
1865 :param content_type: Type of content being uploaded (or :data:`None`).
1866
1867 :type command: str
1868 :param command:
1869 (Optional) Information about which interface for upload was used,
1870 to be included in the X-Goog-API-Client header. Please leave as None
1871 unless otherwise directed.
1872
1873 :rtype: tuple
1874 :returns: A triple of
1875
1876 * A header dictionary
1877 * An object metadata dictionary
1878 * The ``content_type`` as a string (according to precedence)
1879 """
1880 content_type = self._get_content_type(content_type, filename=filename)
1881 # Add any client attached custom headers to the upload headers.
1882 headers = {
1883 **_get_default_headers(
1884 client._connection.user_agent, content_type, command=command
1885 ),
1886 **_get_encryption_headers(self._encryption_key),
1887 **client._extra_headers,
1888 }
1889 object_metadata = self._get_writable_metadata()
1890 return headers, object_metadata, content_type
1891
1892 def _do_multipart_upload(
1893 self,
1894 client,
1895 stream,
1896 content_type,
1897 size,
1898 predefined_acl,
1899 if_generation_match,
1900 if_generation_not_match,
1901 if_metageneration_match,
1902 if_metageneration_not_match,
1903 timeout=_DEFAULT_TIMEOUT,
1904 checksum="auto",
1905 retry=None,
1906 command=None,
1907 ):
1908 """Perform a multipart upload.
1909
1910 The content type of the upload will be determined in order
1911 of precedence:
1912
1913 - The value passed in to this method (if not :data:`None`)
1914 - The value stored on the current blob
1915 - The default value ('application/octet-stream')
1916
1917 :type client: :class:`~google.cloud.storage.client.Client`
1918 :param client:
1919 (Optional) The client to use. If not passed, falls back to the
1920 ``client`` stored on the blob's bucket.
1921
1922 :type stream: IO[bytes]
1923 :param stream: A bytes IO object open for reading.
1924
1925 :type content_type: str
1926 :param content_type: Type of content being uploaded (or :data:`None`).
1927
1928 :type size: int
1929 :param size:
1930 The number of bytes to be uploaded (which will be read from
1931 ``stream``). If not provided, the upload will be concluded once
1932 ``stream`` is exhausted (or :data:`None`).
1933
1934 :type predefined_acl: str
1935 :param predefined_acl: (Optional) Predefined access control list
1936
1937 :type if_generation_match: long
1938 :param if_generation_match:
1939 (Optional) See :ref:`using-if-generation-match`
1940
1941 :type if_generation_not_match: long
1942 :param if_generation_not_match:
1943 (Optional) See :ref:`using-if-generation-not-match`
1944
1945 :type if_metageneration_match: long
1946 :param if_metageneration_match:
1947 (Optional) See :ref:`using-if-metageneration-match`
1948
1949 :type if_metageneration_not_match: long
1950 :param if_metageneration_not_match:
1951 (Optional) See :ref:`using-if-metageneration-not-match`
1952
1953 :type timeout: float or tuple
1954 :param timeout:
1955 (Optional) The amount of time, in seconds, to wait
1956 for the server response. See: :ref:`configuring_timeouts`
1957
1958 :type checksum: str
1959 :param checksum:
1960 (Optional) The type of checksum to compute to verify
1961 the integrity of the object. The request metadata will be amended
1962 to include the computed value. Using this option will override a
1963 manually-set checksum value. Supported values are "md5", "crc32c",
1964 "auto" and None. The default is "auto", which will try to detect if
1965 the C extension for crc32c is installed and fall back to md5
1966 otherwise.
1967 :type retry: google.api_core.retry.Retry
1968 :param retry: (Optional) How to retry the RPC. A None value will disable
1969 retries. A google.api_core.retry.Retry value will enable retries,
1970 and the object will configure backoff and timeout options.
1971
1972 This private method does not accept ConditionalRetryPolicy values
1973 because the information necessary to evaluate the policy is instead
1974 evaluated in blob._do_upload().
1975
1976 See the retry.py source code and docstrings in this package
1977 (google.cloud.storage.retry) for information on retry types and how
1978 to configure them.
1979
1980 :type command: str
1981 :param command:
1982 (Optional) Information about which interface for upload was used,
1983 to be included in the X-Goog-API-Client header. Please leave as None
1984 unless otherwise directed.
1985
1986 :rtype: :class:`~requests.Response`
1987 :returns: The "200 OK" response object returned after the multipart
1988 upload request.
1989 :raises: :exc:`ValueError` if ``size`` is not :data:`None` but the
1990 ``stream`` has fewer than ``size`` bytes remaining.
1991 """
1992 if size is None:
1993 data = stream.read()
1994 else:
1995 data = stream.read(size)
1996 if len(data) < size:
1997 msg = _READ_LESS_THAN_SIZE.format(size, len(data))
1998 raise ValueError(msg)
1999
2000 client = self._require_client(client)
2001 transport = self._get_transport(client)
2002 if "metadata" in self._properties and "metadata" not in self._changes:
2003 self._changes.add("metadata")
2004
2005 info = self._get_upload_arguments(client, content_type, command=command)
2006 headers, object_metadata, content_type = info
2007
2008 hostname = _get_host_name(client._connection)
2009 base_url = _MULTIPART_URL_TEMPLATE.format(
2010 hostname=hostname,
2011 bucket_path=self.bucket.path,
2012 api_version=_API_VERSION,
2013 )
2014 name_value_pairs = []
2015
2016 if self.user_project is not None:
2017 name_value_pairs.append(("userProject", self.user_project))
2018
2019 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
2020 # at rest, object resource metadata will store the version of the Key Management
2021 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
2022 # used to upload a new version of the object then the existing kmsKeyName version
2023 # value can't be used in the upload request and the client instead ignores it.
2024 if (
2025 self.kms_key_name is not None
2026 and "cryptoKeyVersions" not in self.kms_key_name
2027 ):
2028 name_value_pairs.append(("kmsKeyName", self.kms_key_name))
2029
2030 if predefined_acl is not None:
2031 name_value_pairs.append(("predefinedAcl", predefined_acl))
2032
2033 if if_generation_match is not None:
2034 name_value_pairs.append(("ifGenerationMatch", if_generation_match))
2035
2036 if if_generation_not_match is not None:
2037 name_value_pairs.append(("ifGenerationNotMatch", if_generation_not_match))
2038
2039 if if_metageneration_match is not None:
2040 name_value_pairs.append(("ifMetagenerationMatch", if_metageneration_match))
2041
2042 if if_metageneration_not_match is not None:
2043 name_value_pairs.append(
2044 ("ifMetaGenerationNotMatch", if_metageneration_not_match)
2045 )
2046
2047 upload_url = _add_query_parameters(base_url, name_value_pairs)
2048 upload = MultipartUpload(
2049 upload_url, headers=headers, checksum=checksum, retry=retry
2050 )
2051
2052 extra_attributes = _get_opentelemetry_attributes_from_url(upload_url)
2053 extra_attributes["upload.checksum"] = f"{checksum}"
2054 args = {"timeout": timeout}
2055 with create_trace_span(
2056 name="Storage.MultipartUpload/transmit",
2057 attributes=extra_attributes,
2058 client=client,
2059 api_request=args,
2060 ):
2061 response = upload.transmit(
2062 transport, data, object_metadata, content_type, timeout=timeout
2063 )
2064
2065 return response
2066
2067 def _initiate_resumable_upload(
2068 self,
2069 client,
2070 stream,
2071 content_type,
2072 size,
2073 predefined_acl=None,
2074 extra_headers=None,
2075 chunk_size=None,
2076 if_generation_match=None,
2077 if_generation_not_match=None,
2078 if_metageneration_match=None,
2079 if_metageneration_not_match=None,
2080 timeout=_DEFAULT_TIMEOUT,
2081 checksum="auto",
2082 retry=None,
2083 command=None,
2084 crc32c_checksum_value=None,
2085 ):
2086 """Initiate a resumable upload.
2087
2088 The content type of the upload will be determined in order
2089 of precedence:
2090
2091 - The value passed in to this method (if not :data:`None`)
2092 - The value stored on the current blob
2093 - The default value ('application/octet-stream')
2094
2095 :type client: :class:`~google.cloud.storage.client.Client`
2096 :param client:
2097 (Optional) The client to use. If not passed, falls back to the
2098 ``client`` stored on the blob's bucket.
2099
2100 :type stream: IO[bytes]
2101 :param stream: A bytes IO object open for reading.
2102
2103 :type content_type: str
2104 :param content_type: Type of content being uploaded (or :data:`None`).
2105
2106 :type size: int
2107 :param size:
2108 The number of bytes to be uploaded (which will be read from
2109 ``stream``). If not provided, the upload will be concluded once
2110 ``stream`` is exhausted (or :data:`None`).
2111
2112 :type predefined_acl: str
2113 :param predefined_acl: (Optional) Predefined access control list
2114
2115 :type extra_headers: dict
2116 :param extra_headers:
2117 (Optional) Extra headers to add to standard headers.
2118
2119 :type chunk_size: int
2120 :param chunk_size:
2121 (Optional) Chunk size to use when creating a
2122 :class:`~google.cloud.storage._media.requests.ResumableUpload`.
2123 If not passed, will fall back to the chunk size on the
2124 current blob, if the chunk size of a current blob is also
2125 `None`, will set the default value.
2126 The default value of ``chunk_size`` is 100 MB.
2127
2128 :type if_generation_match: long
2129 :param if_generation_match:
2130 (Optional) See :ref:`using-if-generation-match`
2131
2132 :type if_generation_not_match: long
2133 :param if_generation_not_match:
2134 (Optional) See :ref:`using-if-generation-not-match`
2135
2136 :type if_metageneration_match: long
2137 :param if_metageneration_match:
2138 (Optional) See :ref:`using-if-metageneration-match`
2139
2140 :type if_metageneration_not_match: long
2141 :param if_metageneration_not_match:
2142 (Optional) See :ref:`using-if-metageneration-not-match`
2143
2144 :type timeout: float or tuple
2145 :param timeout:
2146 (Optional) The amount of time, in seconds, to wait
2147 for the server response. See: :ref:`configuring_timeouts`
2148
2149 :type checksum: str
2150 :param checksum:
2151 (Optional) The type of checksum to compute to verify
2152 the integrity of the object. After the upload is complete, the
2153 server-computed checksum of the resulting object will be checked
2154 and google.cloud.storage.exceptions.DataCorruption will be raised on
2155 a mismatch. On a validation failure, the client will attempt to
2156 delete the uploaded object automatically. Supported values are
2157 "md5", "crc32c", "auto" and None. The default is "auto", which will
2158 try to detect if the C extension for crc32c is installed and fall
2159 back to md5 otherwise.
2160
2161 :type retry: google.api_core.retry.Retry
2162 :param retry: (Optional) How to retry the RPC. A None value will disable
2163 retries. A google.api_core.retry.Retry value will enable retries,
2164 and the object will configure backoff and timeout options.
2165
2166 This private method does not accept ConditionalRetryPolicy values
2167 because the information necessary to evaluate the policy is instead
2168 evaluated in blob._do_upload().
2169
2170 See the retry.py source code and docstrings in this package
2171 (google.cloud.storage.retry) for information on retry types and how
2172 to configure them.
2173
2174 :type command: str
2175 :param command:
2176 (Optional) Information about which interface for upload was used,
2177 to be included in the X-Goog-API-Client header. Please leave as None
2178 unless otherwise directed.
2179
2180 :type crc32c_checksum_value: str
2181 :param crc32c_checksum_value: (Optional) This should be the checksum of
2182 the entire contents of `file`. Applicable while uploading object
2183 greater than `_MAX_MULTIPART_SIZE` bytes.
2184
2185 It can be obtained by running
2186
2187 `gcloud storage hash /path/to/your/file`
2188
2189 or
2190
2191 .. code-block:: python
2192
2193 import google_crc32c
2194 import base64
2195
2196 data = b"Hello, world!"
2197 crc32c_int = google_crc32c.value(data)
2198 crc32c_hex = f"{crc32c_int:08x}"
2199 crc32c_bytes = crc32c_int.to_bytes(4, "big")
2200 base64_encoded = base64.b64encode(crc32c_bytes)
2201 crc32c_base64 = base64_encoded.decode("utf-8")
2202
2203 print(crc32c_base64)
2204
2205 Above code block prints 8 char string of base64 encoded big-endian
2206 bytes of 32 bit CRC32c integer.
2207
2208 More details on CRC32c can be found in Appendix B:
2209 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
2210 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
2211
2212 :rtype: tuple
2213 :returns:
2214 Pair of
2215
2216 * The :class:`~google.cloud.storage._media.requests.ResumableUpload`
2217 that was created
2218 * The ``transport`` used to initiate the upload.
2219 """
2220 client = self._require_client(client)
2221 if chunk_size is None:
2222 chunk_size = self.chunk_size
2223 if chunk_size is None:
2224 chunk_size = _DEFAULT_CHUNKSIZE
2225
2226 transport = self._get_transport(client)
2227 if "metadata" in self._properties and "metadata" not in self._changes:
2228 self._changes.add("metadata")
2229 info = self._get_upload_arguments(client, content_type, command=command)
2230 headers, object_metadata, content_type = info
2231 if extra_headers is not None:
2232 headers.update(extra_headers)
2233
2234 if crc32c_checksum_value is not None:
2235 object_metadata["crc32c"] = crc32c_checksum_value
2236
2237 hostname = _get_host_name(client._connection)
2238 base_url = _RESUMABLE_URL_TEMPLATE.format(
2239 hostname=hostname,
2240 bucket_path=self.bucket.path,
2241 api_version=_API_VERSION,
2242 )
2243 name_value_pairs = []
2244
2245 if self.user_project is not None:
2246 name_value_pairs.append(("userProject", self.user_project))
2247
2248 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
2249 # at rest, object resource metadata will store the version of the Key Management
2250 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
2251 # used to upload a new version of the object then the existing kmsKeyName version
2252 # value can't be used in the upload request and the client instead ignores it.
2253 if (
2254 self.kms_key_name is not None
2255 and "cryptoKeyVersions" not in self.kms_key_name
2256 ):
2257 name_value_pairs.append(("kmsKeyName", self.kms_key_name))
2258
2259 if predefined_acl is not None:
2260 name_value_pairs.append(("predefinedAcl", predefined_acl))
2261
2262 if if_generation_match is not None:
2263 name_value_pairs.append(("ifGenerationMatch", if_generation_match))
2264
2265 if if_generation_not_match is not None:
2266 name_value_pairs.append(("ifGenerationNotMatch", if_generation_not_match))
2267
2268 if if_metageneration_match is not None:
2269 name_value_pairs.append(("ifMetagenerationMatch", if_metageneration_match))
2270
2271 if if_metageneration_not_match is not None:
2272 name_value_pairs.append(
2273 ("ifMetaGenerationNotMatch", if_metageneration_not_match)
2274 )
2275
2276 upload_url = _add_query_parameters(base_url, name_value_pairs)
2277 upload = ResumableUpload(
2278 upload_url,
2279 chunk_size,
2280 headers=headers,
2281 checksum=checksum,
2282 retry=retry,
2283 )
2284
2285 upload.initiate(
2286 transport,
2287 stream,
2288 object_metadata,
2289 content_type,
2290 total_bytes=size,
2291 stream_final=False,
2292 timeout=timeout,
2293 )
2294
2295 return upload, transport
2296
2297 def _do_resumable_upload(
2298 self,
2299 client,
2300 stream,
2301 content_type,
2302 size,
2303 predefined_acl,
2304 if_generation_match,
2305 if_generation_not_match,
2306 if_metageneration_match,
2307 if_metageneration_not_match,
2308 timeout=_DEFAULT_TIMEOUT,
2309 checksum="auto",
2310 retry=None,
2311 command=None,
2312 crc32c_checksum_value=None,
2313 ):
2314 """Perform a resumable upload.
2315
2316 Assumes ``chunk_size`` is not :data:`None` on the current blob.
2317 The default value of ``chunk_size`` is 100 MB.
2318
2319 The content type of the upload will be determined in order
2320 of precedence:
2321
2322 - The value passed in to this method (if not :data:`None`)
2323 - The value stored on the current blob
2324 - The default value ('application/octet-stream')
2325
2326 :type client: :class:`~google.cloud.storage.client.Client`
2327 :param client:
2328 (Optional) The client to use. If not passed, falls back to the
2329 ``client`` stored on the blob's bucket.
2330
2331 :type stream: IO[bytes]
2332 :param stream: A bytes IO object open for reading.
2333
2334 :type content_type: str
2335 :param content_type: Type of content being uploaded (or :data:`None`).
2336
2337 :type size: int
2338 :param size:
2339 The number of bytes to be uploaded (which will be read from
2340 ``stream``). If not provided, the upload will be concluded once
2341 ``stream`` is exhausted (or :data:`None`).
2342
2343 :type predefined_acl: str
2344 :param predefined_acl: (Optional) Predefined access control list
2345
2346 :type if_generation_match: long
2347 :param if_generation_match:
2348 (Optional) See :ref:`using-if-generation-match`
2349
2350 :type if_generation_not_match: long
2351 :param if_generation_not_match:
2352 (Optional) See :ref:`using-if-generation-not-match`
2353
2354 :type if_metageneration_match: long
2355 :param if_metageneration_match:
2356 (Optional) See :ref:`using-if-metageneration-match`
2357
2358 :type if_metageneration_not_match: long
2359 :param if_metageneration_not_match:
2360 (Optional) See :ref:`using-if-metageneration-not-match`
2361
2362 :type timeout: float or tuple
2363 :param timeout:
2364 (Optional) The amount of time, in seconds, to wait
2365 for the server response. See: :ref:`configuring_timeouts`
2366
2367 :type checksum: str
2368 :param checksum:
2369 (Optional) The type of checksum to compute to verify
2370 the integrity of the object. After the upload is complete, the
2371 server-computed checksum of the resulting object will be checked
2372 and google.cloud.storage.exceptions.DataCorruption will be raised on
2373 a mismatch. On a validation failure, the client will attempt to
2374 delete the uploaded object automatically. Supported values are
2375 "md5", "crc32c", "auto" and None. The default is "auto", which will
2376 try to detect if the C extension for crc32c is installed and fall
2377 back to md5 otherwise.
2378
2379 :type retry: google.api_core.retry.Retry
2380 :param retry: (Optional) How to retry the RPC. A None value will disable
2381 retries. A google.api_core.retry.Retry value will enable retries,
2382 and the object will configure backoff and timeout options.
2383
2384 This private method does not accept ConditionalRetryPolicy values
2385 because the information necessary to evaluate the policy is instead
2386 evaluated in blob._do_upload().
2387
2388 See the retry.py source code and docstrings in this package
2389 (google.cloud.storage.retry) for information on retry types and how
2390 to configure them.
2391
2392 :type command: str
2393 :param command:
2394 (Optional) Information about which interface for upload was used,
2395 to be included in the X-Goog-API-Client header. Please leave as None
2396 unless otherwise directed.
2397
2398 :type crc32c_checksum_value: str
2399 :param crc32c_checksum_value: (Optional) This should be the checksum of
2400 the entire contents of `stream`. Applicable while uploading object
2401 greater than `_MAX_MULTIPART_SIZE` bytes.
2402
2403 It can be obtained by running
2404
2405 `gcloud storage hash /path/to/your/file`
2406
2407 or
2408
2409 .. code-block:: python
2410
2411 import google_crc32c
2412 import base64
2413
2414 data = b"Hello, world!"
2415 crc32c_int = google_crc32c.value(data)
2416 crc32c_hex = f"{crc32c_int:08x}"
2417 crc32c_bytes = crc32c_int.to_bytes(4, "big")
2418 base64_encoded = base64.b64encode(crc32c_bytes)
2419 crc32c_base64 = base64_encoded.decode("utf-8")
2420
2421 print(crc32c_base64)
2422
2423 Above code block prints 8 char string of base64 encoded big-endian
2424 bytes of 32 bit CRC32c integer.
2425
2426 More details on CRC32c can be found in Appendix B:
2427 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
2428 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
2429
2430 :rtype: :class:`~requests.Response`
2431 :returns: The "200 OK" response object returned after the final chunk
2432 is uploaded.
2433 """
2434 upload, transport = self._initiate_resumable_upload(
2435 client,
2436 stream,
2437 content_type,
2438 size,
2439 predefined_acl=predefined_acl,
2440 if_generation_match=if_generation_match,
2441 if_generation_not_match=if_generation_not_match,
2442 if_metageneration_match=if_metageneration_match,
2443 if_metageneration_not_match=if_metageneration_not_match,
2444 timeout=timeout,
2445 checksum=checksum,
2446 retry=retry,
2447 command=command,
2448 crc32c_checksum_value=crc32c_checksum_value,
2449 )
2450 extra_attributes = _get_opentelemetry_attributes_from_url(upload.resumable_url)
2451 extra_attributes["upload.chunk_size"] = upload.chunk_size
2452 extra_attributes["upload.checksum"] = f"{checksum}"
2453
2454 args = {"timeout": timeout}
2455 with create_trace_span(
2456 name="Storage.ResumableUpload/transmitNextChunk",
2457 attributes=extra_attributes,
2458 client=client,
2459 api_request=args,
2460 ):
2461 while not upload.finished:
2462 try:
2463 response = upload.transmit_next_chunk(transport, timeout=timeout)
2464 except DataCorruption:
2465 # Attempt to delete the corrupted object.
2466 self.delete()
2467 raise
2468 return response
2469
2470 def _do_upload(
2471 self,
2472 client,
2473 stream,
2474 content_type,
2475 size,
2476 predefined_acl,
2477 if_generation_match,
2478 if_generation_not_match,
2479 if_metageneration_match,
2480 if_metageneration_not_match,
2481 timeout=_DEFAULT_TIMEOUT,
2482 checksum="auto",
2483 retry=None,
2484 command=None,
2485 crc32c_checksum_value=None,
2486 ):
2487 """Determine an upload strategy and then perform the upload.
2488
2489 If the size of the data to be uploaded exceeds 8 MB a resumable media
2490 request will be used, otherwise the content and the metadata will be
2491 uploaded in a single multipart upload request.
2492
2493 The content type of the upload will be determined in order
2494 of precedence:
2495
2496 - The value passed in to this method (if not :data:`None`)
2497 - The value stored on the current blob
2498 - The default value ('application/octet-stream')
2499
2500 :type client: :class:`~google.cloud.storage.client.Client`
2501 :param client:
2502 (Optional) The client to use. If not passed, falls back to the
2503 ``client`` stored on the blob's bucket.
2504
2505 :type stream: IO[bytes]
2506 :param stream: A bytes IO object open for reading.
2507
2508 :type content_type: str
2509 :param content_type: Type of content being uploaded (or :data:`None`).
2510
2511 :type size: int
2512 :param size:
2513 The number of bytes to be uploaded (which will be read from
2514 ``stream``). If not provided, the upload will be concluded once
2515 ``stream`` is exhausted (or :data:`None`).
2516
2517 :type predefined_acl: str
2518 :param predefined_acl: (Optional) Predefined access control list
2519
2520 :type if_generation_match: long
2521 :param if_generation_match:
2522 (Optional) See :ref:`using-if-generation-match`
2523
2524 :type if_generation_not_match: long
2525 :param if_generation_not_match:
2526 (Optional) See :ref:`using-if-generation-not-match`
2527
2528 :type if_metageneration_match: long
2529 :param if_metageneration_match:
2530 (Optional) See :ref:`using-if-metageneration-match`
2531
2532 :type if_metageneration_not_match: long
2533 :param if_metageneration_not_match:
2534 (Optional) See :ref:`using-if-metageneration-not-match`
2535
2536 :type timeout: float or tuple
2537 :param timeout:
2538 (Optional) The amount of time, in seconds, to wait
2539 for the server response. See: :ref:`configuring_timeouts`
2540
2541 :type checksum: str
2542 :param checksum:
2543 (Optional) The type of checksum to compute to verify
2544 the integrity of the object. If the upload is completed in a single
2545 request, the checksum will be entirely precomputed and the remote
2546 server will handle verification and error handling. If the upload
2547 is too large and must be transmitted in multiple requests, the
2548 checksum will be incrementally computed and the client will handle
2549 verification and error handling, raising
2550 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2551 attempting to delete the corrupted file. Supported values are
2552 "md5", "crc32c", "auto" and None. The default is "auto", which will
2553 try to detect if the C extension for crc32c is installed and fall
2554 back to md5 otherwise.
2555
2556 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2557 :param retry: (Optional) How to retry the RPC. A None value will disable
2558 retries. A google.api_core.retry.Retry value will enable retries,
2559 and the object will define retriable response codes and errors and
2560 configure backoff and timeout options.
2561
2562 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2563 Retry object and activates it only if certain conditions are met.
2564 This class exists to provide safe defaults for RPC calls that are
2565 not technically safe to retry normally (due to potential data
2566 duplication or other side-effects) but become safe to retry if a
2567 condition such as if_generation_match is set.
2568
2569 See the retry.py source code and docstrings in this package
2570 (google.cloud.storage.retry) for information on retry types and how
2571 to configure them.
2572
2573 :type command: str
2574 :param command:
2575 (Optional) Information about which interface for upload was used,
2576 to be included in the X-Goog-API-Client header. Please leave as None
2577 unless otherwise directed.
2578
2579 :type crc32c_checksum_value: str
2580 :param crc32c_checksum_value: (Optional) This should be the checksum of
2581 the entire contents of `file_obj`. Applicable while uploading object
2582 greater than `_MAX_MULTIPART_SIZE` bytes.
2583
2584 It can be obtained by running
2585
2586 `gcloud storage hash /path/to/your/file`
2587
2588 or
2589
2590 .. code-block:: python
2591
2592 import google_crc32c
2593 import base64
2594
2595 data = b"Hello, world!"
2596 crc32c_int = google_crc32c.value(data)
2597 crc32c_hex = f"{crc32c_int:08x}"
2598 crc32c_bytes = crc32c_int.to_bytes(4, "big")
2599 base64_encoded = base64.b64encode(crc32c_bytes)
2600 crc32c_base64 = base64_encoded.decode("utf-8")
2601
2602 print(crc32c_base64)
2603
2604 Above code block prints 8 char string of base64 encoded big-endian
2605 bytes of 32 bit CRC32c integer.
2606
2607 More details on CRC32c can be found in Appendix B:
2608 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
2609 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
2610
2611 :rtype: dict
2612 :returns: The parsed JSON from the "200 OK" response. This will be the
2613 **only** response in the multipart case and it will be the
2614 **final** response in the resumable case.
2615 """
2616
2617 # Handle ConditionalRetryPolicy.
2618 if isinstance(retry, ConditionalRetryPolicy):
2619 # Conditional retries are designed for non-media calls, which change
2620 # arguments into query_params dictionaries. Media operations work
2621 # differently, so here we make a "fake" query_params to feed to the
2622 # ConditionalRetryPolicy.
2623 query_params = {
2624 "ifGenerationMatch": if_generation_match,
2625 "ifMetagenerationMatch": if_metageneration_match,
2626 }
2627 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
2628
2629 if size is not None and size <= _MAX_MULTIPART_SIZE:
2630 response = self._do_multipart_upload(
2631 client,
2632 stream,
2633 content_type,
2634 size,
2635 predefined_acl,
2636 if_generation_match,
2637 if_generation_not_match,
2638 if_metageneration_match,
2639 if_metageneration_not_match,
2640 timeout=timeout,
2641 checksum=checksum,
2642 retry=retry,
2643 command=command,
2644 )
2645 else:
2646 response = self._do_resumable_upload(
2647 client,
2648 stream,
2649 content_type,
2650 size,
2651 predefined_acl,
2652 if_generation_match,
2653 if_generation_not_match,
2654 if_metageneration_match,
2655 if_metageneration_not_match,
2656 timeout=timeout,
2657 checksum=checksum,
2658 retry=retry,
2659 command=command,
2660 crc32c_checksum_value=crc32c_checksum_value,
2661 )
2662
2663 return response.json()
2664
2665 def _prep_and_do_upload(
2666 self,
2667 file_obj,
2668 rewind=False,
2669 size=None,
2670 content_type=None,
2671 client=None,
2672 predefined_acl=None,
2673 if_generation_match=None,
2674 if_generation_not_match=None,
2675 if_metageneration_match=None,
2676 if_metageneration_not_match=None,
2677 timeout=_DEFAULT_TIMEOUT,
2678 checksum="auto",
2679 retry=DEFAULT_RETRY,
2680 command=None,
2681 crc32c_checksum_value=None,
2682 ):
2683 """Upload the contents of this blob from a file-like object.
2684
2685 The content type of the upload will be determined in order
2686 of precedence:
2687
2688 - The value passed in to this method (if not :data:`None`)
2689 - The value stored on the current blob
2690 - The default value ('application/octet-stream')
2691
2692 .. note::
2693 The effect of uploading to an existing blob depends on the
2694 "versioning" and "lifecycle" policies defined on the blob's
2695 bucket. In the absence of those policies, upload will
2696 overwrite any existing contents.
2697
2698 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2699 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2700 API documents for details.
2701
2702 If the size of the data to be uploaded exceeds 8 MB a resumable media
2703 request will be used, otherwise the content and the metadata will be
2704 uploaded in a single multipart upload request.
2705
2706 For more fine-grained over the upload process, check out
2707 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
2708
2709 If :attr:`user_project` is set on the bucket, bills the API request
2710 to that project.
2711
2712 :type file_obj: file
2713 :param file_obj: A file handle opened in binary mode for reading.
2714
2715 :type rewind: bool
2716 :param rewind:
2717 If True, seek to the beginning of the file handle before writing
2718 the file to Cloud Storage.
2719
2720 :type size: int
2721 :param size:
2722 The number of bytes to be uploaded (which will be read from
2723 ``file_obj``). If not provided, the upload will be concluded once
2724 ``file_obj`` is exhausted.
2725
2726 :type content_type: str
2727 :param content_type: (Optional) Type of content being uploaded.
2728
2729 :type client: :class:`~google.cloud.storage.client.Client`
2730 :param client:
2731 (Optional) The client to use. If not passed, falls back to the
2732 ``client`` stored on the blob's bucket.
2733
2734 :type predefined_acl: str
2735 :param predefined_acl: (Optional) Predefined access control list
2736
2737 :type if_generation_match: long
2738 :param if_generation_match:
2739 (Optional) See :ref:`using-if-generation-match`
2740
2741 :type if_generation_not_match: long
2742 :param if_generation_not_match:
2743 (Optional) See :ref:`using-if-generation-not-match`
2744
2745 :type if_metageneration_match: long
2746 :param if_metageneration_match:
2747 (Optional) See :ref:`using-if-metageneration-match`
2748
2749 :type if_metageneration_not_match: long
2750 :param if_metageneration_not_match:
2751 (Optional) See :ref:`using-if-metageneration-not-match`
2752
2753 :type timeout: float or tuple
2754 :param timeout:
2755 (Optional) The amount of time, in seconds, to wait
2756 for the server response. See: :ref:`configuring_timeouts`
2757
2758 :type checksum: str
2759 :param checksum:
2760 (Optional) The type of checksum to compute to verify
2761 the integrity of the object. If the upload is completed in a single
2762 request, the checksum will be entirely precomputed and the remote
2763 server will handle verification and error handling. If the upload
2764 is too large and must be transmitted in multiple requests, the
2765 checksum will be incrementally computed and the client will handle
2766 verification and error handling, raising
2767 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2768 attempting to delete the corrupted file. Supported values are
2769 "md5", "crc32c", "auto" and None. The default is "auto", which will
2770 try to detect if the C extension for crc32c is installed and fall
2771 back to md5 otherwise.
2772
2773 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2774 :param retry: (Optional) How to retry the RPC. A None value will disable
2775 retries. A google.api_core.retry.Retry value will enable retries,
2776 and the object will define retriable response codes and errors and
2777 configure backoff and timeout options.
2778
2779 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2780 Retry object and activates it only if certain conditions are met.
2781 This class exists to provide safe defaults for RPC calls that are
2782 not technically safe to retry normally (due to potential data
2783 duplication or other side-effects) but become safe to retry if a
2784 condition such as if_generation_match is set.
2785
2786 See the retry.py source code and docstrings in this package
2787 (google.cloud.storage.retry) for information on retry types and how
2788 to configure them.
2789
2790 :type command: str
2791 :param command:
2792 (Optional) Information about which interface for upload was used,
2793 to be included in the X-Goog-API-Client header. Please leave as None
2794 unless otherwise directed.
2795
2796 :type crc32c_checksum_value: str
2797 :param crc32c_checksum_value: (Optional) This should be the checksum of
2798 the entire contents of `file_obj`. Applicable while uploading object
2799 greater than `_MAX_MULTIPART_SIZE` bytes.
2800
2801 It can be obtained by running
2802
2803 `gcloud storage hash /path/to/your/file`
2804
2805 or
2806
2807 .. code-block:: python
2808
2809 import google_crc32c
2810 import base64
2811
2812 data = b"Hello, world!"
2813 crc32c_int = google_crc32c.value(data)
2814 crc32c_hex = f"{crc32c_int:08x}"
2815 crc32c_bytes = crc32c_int.to_bytes(4, "big")
2816 base64_encoded = base64.b64encode(crc32c_bytes)
2817 crc32c_base64 = base64_encoded.decode("utf-8")
2818
2819 print(crc32c_base64)
2820
2821 Above code block prints 8 char string of base64 encoded big-endian
2822 bytes of 32 bit CRC32c integer.
2823
2824 More details on CRC32c can be found in Appendix B:
2825 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
2826 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
2827
2828 :raises: :class:`~google.cloud.exceptions.GoogleCloudError`
2829 if the upload response returns an error status.
2830 """
2831 _maybe_rewind(file_obj, rewind=rewind)
2832 predefined_acl = ACL.validate_predefined(predefined_acl)
2833
2834 try:
2835 created_json = self._do_upload(
2836 client,
2837 file_obj,
2838 content_type,
2839 size,
2840 predefined_acl,
2841 if_generation_match,
2842 if_generation_not_match,
2843 if_metageneration_match,
2844 if_metageneration_not_match,
2845 timeout=timeout,
2846 checksum=checksum,
2847 retry=retry,
2848 command=command,
2849 crc32c_checksum_value=crc32c_checksum_value,
2850 )
2851 self._set_properties(created_json)
2852 except InvalidResponse as exc:
2853 _raise_from_invalid_response(exc)
2854
2855 def upload_from_file(
2856 self,
2857 file_obj,
2858 rewind=False,
2859 size=None,
2860 content_type=None,
2861 client=None,
2862 predefined_acl=None,
2863 if_generation_match=None,
2864 if_generation_not_match=None,
2865 if_metageneration_match=None,
2866 if_metageneration_not_match=None,
2867 timeout=_DEFAULT_TIMEOUT,
2868 checksum="auto",
2869 retry=DEFAULT_RETRY,
2870 crc32c_checksum_value=None,
2871 ):
2872 """Upload the contents of this blob from a file-like object.
2873
2874 The content type of the upload will be determined in order
2875 of precedence:
2876
2877 - The value passed in to this method (if not :data:`None`)
2878 - The value stored on the current blob
2879 - The default value ('application/octet-stream')
2880
2881 .. note::
2882 The effect of uploading to an existing blob depends on the
2883 "versioning" and "lifecycle" policies defined on the blob's
2884 bucket. In the absence of those policies, upload will
2885 overwrite any existing contents.
2886
2887 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2888 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2889 API documents for details.
2890
2891 If the size of the data to be uploaded exceeds 8 MB a resumable media
2892 request will be used, otherwise the content and the metadata will be
2893 uploaded in a single multipart upload request.
2894
2895 For more fine-grained over the upload process, check out
2896 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
2897
2898 If :attr:`user_project` is set on the bucket, bills the API request
2899 to that project.
2900
2901 :type file_obj: file
2902 :param file_obj: A file handle opened in binary mode for reading.
2903
2904 :type rewind: bool
2905 :param rewind:
2906 If True, seek to the beginning of the file handle before writing
2907 the file to Cloud Storage.
2908
2909 :type size: int
2910 :param size:
2911 The number of bytes to be uploaded (which will be read from
2912 ``file_obj``). If not provided, the upload will be concluded once
2913 ``file_obj`` is exhausted.
2914
2915 :type content_type: str
2916 :param content_type: (Optional) Type of content being uploaded.
2917
2918 :type client: :class:`~google.cloud.storage.client.Client`
2919 :param client:
2920 (Optional) The client to use. If not passed, falls back to the
2921 ``client`` stored on the blob's bucket.
2922
2923 :type predefined_acl: str
2924 :param predefined_acl: (Optional) Predefined access control list
2925
2926 :type if_generation_match: long
2927 :param if_generation_match:
2928 (Optional) See :ref:`using-if-generation-match`
2929
2930 :type if_generation_not_match: long
2931 :param if_generation_not_match:
2932 (Optional) See :ref:`using-if-generation-not-match`
2933
2934 :type if_metageneration_match: long
2935 :param if_metageneration_match:
2936 (Optional) See :ref:`using-if-metageneration-match`
2937
2938 :type if_metageneration_not_match: long
2939 :param if_metageneration_not_match:
2940 (Optional) See :ref:`using-if-metageneration-not-match`
2941
2942 :type timeout: float or tuple
2943 :param timeout:
2944 (Optional) The amount of time, in seconds, to wait
2945 for the server response. See: :ref:`configuring_timeouts`
2946
2947 :type checksum: str
2948 :param checksum:
2949 (Optional) The type of checksum to compute to verify
2950 the integrity of the object. If the upload is completed in a single
2951 request, the checksum will be entirely precomputed and the remote
2952 server will handle verification and error handling. If the upload
2953 is too large and must be transmitted in multiple requests, the
2954 checksum will be incrementally computed and the client will handle
2955 verification and error handling, raising
2956 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2957 attempting to delete the corrupted file. Supported values are
2958 "md5", "crc32c", "auto" and None. The default is "auto", which will
2959 try to detect if the C extension for crc32c is installed and fall
2960 back to md5 otherwise.
2961
2962 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2963 :param retry: (Optional) How to retry the RPC. A None value will disable
2964 retries. A google.api_core.retry.Retry value will enable retries,
2965 and the object will define retriable response codes and errors and
2966 configure backoff and timeout options.
2967
2968 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2969 Retry object and activates it only if certain conditions are met.
2970 This class exists to provide safe defaults for RPC calls that are
2971 not technically safe to retry normally (due to potential data
2972 duplication or other side-effects) but become safe to retry if a
2973 condition such as if_generation_match is set.
2974
2975 See the retry.py source code and docstrings in this package
2976 (google.cloud.storage.retry) for information on retry types and how
2977 to configure them.
2978
2979 :type crc32c_checksum_value: str
2980 :param crc32c_checksum_value: (Optional) This should be the checksum of
2981 the entire contents of `file_obj`. Applicable while uploading object
2982 greater than `_MAX_MULTIPART_SIZE` bytes.
2983
2984 It can be obtained by running
2985
2986 `gcloud storage hash /path/to/your/file`
2987
2988 or
2989
2990 .. code-block:: python
2991
2992 import google_crc32c
2993 import base64
2994
2995 data = b"Hello, world!"
2996 crc32c_int = google_crc32c.value(data)
2997 crc32c_hex = f"{crc32c_int:08x}"
2998 crc32c_bytes = crc32c_int.to_bytes(4, "big")
2999 base64_encoded = base64.b64encode(crc32c_bytes)
3000 crc32c_base64 = base64_encoded.decode("utf-8")
3001
3002 print(crc32c_base64)
3003
3004 Above code block prints 8 char string of base64 encoded big-endian
3005 bytes of 32 bit CRC32c integer.
3006
3007 More details on CRC32c can be found in Appendix B:
3008 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
3009 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
3010
3011 :raises: :class:`~google.cloud.exceptions.GoogleCloudError`
3012 if the upload response returns an error status.
3013 """
3014 with create_trace_span(name="Storage.Blob.uploadFromFile"):
3015 self._prep_and_do_upload(
3016 file_obj,
3017 rewind=rewind,
3018 size=size,
3019 content_type=content_type,
3020 client=client,
3021 predefined_acl=predefined_acl,
3022 if_generation_match=if_generation_match,
3023 if_generation_not_match=if_generation_not_match,
3024 if_metageneration_match=if_metageneration_match,
3025 if_metageneration_not_match=if_metageneration_not_match,
3026 timeout=timeout,
3027 checksum=checksum,
3028 retry=retry,
3029 crc32c_checksum_value=crc32c_checksum_value,
3030 )
3031
3032 def _handle_filename_and_upload(self, filename, content_type=None, *args, **kwargs):
3033 """Upload this blob's contents from the content of a named file.
3034
3035 :type filename: str
3036 :param filename: The path to the file.
3037
3038 :type content_type: str
3039 :param content_type: (Optional) Type of content being uploaded.
3040
3041 For *args and **kwargs, refer to the documentation for upload_from_filename() for more information.
3042 """
3043
3044 content_type = self._get_content_type(content_type, filename=filename)
3045
3046 with open(filename, "rb") as file_obj:
3047 total_bytes = os.fstat(file_obj.fileno()).st_size
3048 self._prep_and_do_upload(
3049 file_obj,
3050 content_type=content_type,
3051 size=total_bytes,
3052 *args,
3053 **kwargs,
3054 )
3055
3056 def upload_from_filename(
3057 self,
3058 filename,
3059 content_type=None,
3060 client=None,
3061 predefined_acl=None,
3062 if_generation_match=None,
3063 if_generation_not_match=None,
3064 if_metageneration_match=None,
3065 if_metageneration_not_match=None,
3066 timeout=_DEFAULT_TIMEOUT,
3067 checksum="auto",
3068 retry=DEFAULT_RETRY,
3069 crc32c_checksum_value=None,
3070 ):
3071 """Upload this blob's contents from the content of a named file.
3072
3073 The content type of the upload will be determined in order
3074 of precedence:
3075
3076 - The value passed in to this method (if not :data:`None`)
3077 - The value stored on the current blob
3078 - The value given by ``mimetypes.guess_type``
3079 - The default value ('application/octet-stream')
3080
3081 .. note::
3082 The effect of uploading to an existing blob depends on the
3083 "versioning" and "lifecycle" policies defined on the blob's
3084 bucket. In the absence of those policies, upload will
3085 overwrite any existing contents.
3086
3087 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3088 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3089 API documents for details.
3090
3091 If :attr:`user_project` is set on the bucket, bills the API request
3092 to that project.
3093
3094 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-upload-encrypted-file#storage_upload_encrypted_file-python)
3095 to upload a file with a
3096 [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied).
3097
3098 :type filename: str
3099 :param filename: The path to the file.
3100
3101 :type content_type: str
3102 :param content_type: (Optional) Type of content being uploaded.
3103
3104 :type client: :class:`~google.cloud.storage.client.Client`
3105 :param client:
3106 (Optional) The client to use. If not passed, falls back to the
3107 ``client`` stored on the blob's bucket.
3108
3109 :type predefined_acl: str
3110 :param predefined_acl: (Optional) Predefined access control list
3111
3112 :type if_generation_match: long
3113 :param if_generation_match:
3114 (Optional) See :ref:`using-if-generation-match`
3115
3116 :type if_generation_not_match: long
3117 :param if_generation_not_match:
3118 (Optional) See :ref:`using-if-generation-not-match`
3119
3120 :type if_metageneration_match: long
3121 :param if_metageneration_match:
3122 (Optional) See :ref:`using-if-metageneration-match`
3123
3124 :type if_metageneration_not_match: long
3125 :param if_metageneration_not_match:
3126 (Optional) See :ref:`using-if-metageneration-not-match`
3127
3128 :type timeout: float or tuple
3129 :param timeout:
3130 (Optional) The amount of time, in seconds, to wait
3131 for the server response. See: :ref:`configuring_timeouts`
3132
3133 :type checksum: str
3134 :param checksum:
3135 (Optional) The type of checksum to compute to verify
3136 the integrity of the object. If the upload is completed in a single
3137 request, the checksum will be entirely precomputed and the remote
3138 server will handle verification and error handling. If the upload
3139 is too large and must be transmitted in multiple requests, the
3140 checksum will be incrementally computed and the client will handle
3141 verification and error handling, raising
3142 google.cloud.storage.exceptions.DataCorruption on a mismatch and
3143 attempting to delete the corrupted file. Supported values are
3144 "md5", "crc32c", "auto" and None. The default is "auto", which will
3145 try to detect if the C extension for crc32c is installed and fall
3146 back to md5 otherwise.
3147
3148 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3149 :param retry: (Optional) How to retry the RPC. A None value will disable
3150 retries. A google.api_core.retry.Retry value will enable retries,
3151 and the object will define retriable response codes and errors and
3152 configure backoff and timeout options.
3153
3154 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3155 Retry object and activates it only if certain conditions are met.
3156 This class exists to provide safe defaults for RPC calls that are
3157 not technically safe to retry normally (due to potential data
3158 duplication or other side-effects) but become safe to retry if a
3159 condition such as if_generation_match is set.
3160
3161 See the retry.py source code and docstrings in this package
3162 (google.cloud.storage.retry) for information on retry types and how
3163 to configure them.
3164
3165 :type crc32c_checksum_value: str
3166 :param crc32c_checksum_value: (Optional) This should be the checksum of
3167 the entire contents of `filename`. Applicable while uploading object
3168 greater than `_MAX_MULTIPART_SIZE` bytes.
3169
3170 It can be obtained by running
3171
3172 `gcloud storage hash /path/to/your/file`
3173
3174 or
3175
3176 .. code-block:: python
3177
3178 import google_crc32c
3179 import base64
3180
3181 data = b"Hello, world!"
3182 crc32c_int = google_crc32c.value(data)
3183 crc32c_hex = f"{crc32c_int:08x}"
3184 crc32c_bytes = crc32c_int.to_bytes(4, "big")
3185 base64_encoded = base64.b64encode(crc32c_bytes)
3186 crc32c_base64 = base64_encoded.decode("utf-8")
3187
3188 print(crc32c_base64)
3189
3190 Above code block prints 8 char string of base64 encoded big-endian
3191 bytes of 32 bit CRC32c integer.
3192
3193 More details on CRC32c can be found in Appendix B:
3194 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
3195 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
3196 """
3197 with create_trace_span(name="Storage.Blob.uploadFromFilename"):
3198 self._handle_filename_and_upload(
3199 filename,
3200 content_type=content_type,
3201 client=client,
3202 predefined_acl=predefined_acl,
3203 if_generation_match=if_generation_match,
3204 if_generation_not_match=if_generation_not_match,
3205 if_metageneration_match=if_metageneration_match,
3206 if_metageneration_not_match=if_metageneration_not_match,
3207 timeout=timeout,
3208 checksum=checksum,
3209 retry=retry,
3210 crc32c_checksum_value=crc32c_checksum_value,
3211 )
3212
3213 def upload_from_string(
3214 self,
3215 data,
3216 content_type="text/plain",
3217 client=None,
3218 predefined_acl=None,
3219 if_generation_match=None,
3220 if_generation_not_match=None,
3221 if_metageneration_match=None,
3222 if_metageneration_not_match=None,
3223 timeout=_DEFAULT_TIMEOUT,
3224 checksum="auto",
3225 retry=DEFAULT_RETRY,
3226 crc32c_checksum_value=None,
3227 ):
3228 """Upload contents of this blob from the provided string.
3229
3230 .. note::
3231 The effect of uploading to an existing blob depends on the
3232 "versioning" and "lifecycle" policies defined on the blob's
3233 bucket. In the absence of those policies, upload will
3234 overwrite any existing contents.
3235
3236 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3237 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3238 API documents for details.
3239
3240 If :attr:`user_project` is set on the bucket, bills the API request
3241 to that project.
3242
3243 :type data: bytes or str
3244 :param data:
3245 The data to store in this blob. If the value is text, it will be
3246 encoded as UTF-8.
3247
3248 :type content_type: str
3249 :param content_type:
3250 (Optional) Type of content being uploaded. Defaults to
3251 ``'text/plain'``.
3252
3253 :type client: :class:`~google.cloud.storage.client.Client`
3254 :param client:
3255 (Optional) The client to use. If not passed, falls back to the
3256 ``client`` stored on the blob's bucket.
3257
3258 :type predefined_acl: str
3259 :param predefined_acl: (Optional) Predefined access control list
3260
3261 :type if_generation_match: long
3262 :param if_generation_match:
3263 (Optional) See :ref:`using-if-generation-match`
3264
3265 :type if_generation_not_match: long
3266 :param if_generation_not_match:
3267 (Optional) See :ref:`using-if-generation-not-match`
3268
3269 :type if_metageneration_match: long
3270 :param if_metageneration_match:
3271 (Optional) See :ref:`using-if-metageneration-match`
3272
3273 :type if_metageneration_not_match: long
3274 :param if_metageneration_not_match:
3275 (Optional) See :ref:`using-if-metageneration-not-match`
3276
3277 :type timeout: float or tuple
3278 :param timeout:
3279 (Optional) The amount of time, in seconds, to wait
3280 for the server response. See: :ref:`configuring_timeouts`
3281
3282 :type checksum: str
3283 :param checksum:
3284 (Optional) The type of checksum to compute to verify
3285 the integrity of the object. If the upload is completed in a single
3286 request, the checksum will be entirely precomputed and the remote
3287 server will handle verification and error handling. If the upload
3288 is too large and must be transmitted in multiple requests, the
3289 checksum will be incrementally computed and the client will handle
3290 verification and error handling, raising
3291 google.cloud.storage.exceptions.DataCorruption on a mismatch and
3292 attempting to delete the corrupted file. Supported values are
3293 "md5", "crc32c", "auto" and None. The default is "auto", which will
3294 try to detect if the C extension for crc32c is installed and fall
3295 back to md5 otherwise.
3296
3297 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3298 :param retry: (Optional) How to retry the RPC. A None value will disable
3299 retries. A google.api_core.retry.Retry value will enable retries,
3300 and the object will define retriable response codes and errors and
3301 configure backoff and timeout options.
3302
3303 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3304 Retry object and activates it only if certain conditions are met.
3305 This class exists to provide safe defaults for RPC calls that are
3306 not technically safe to retry normally (due to potential data
3307 duplication or other side-effects) but become safe to retry if a
3308 condition such as if_generation_match is set.
3309
3310 See the retry.py source code and docstrings in this package
3311 (google.cloud.storage.retry) for information on retry types and how
3312 to configure them.
3313
3314 :type crc32c_checksum_value: str
3315 :param crc32c_checksum_value: (Optional) This should be the checksum of
3316 the entire contents of `file_obj`. Applicable while uploading object
3317 greater than `_MAX_MULTIPART_SIZE` bytes.
3318
3319 It can be obtained by running
3320
3321 `gcloud storage hash /path/to/your/file`
3322
3323 or
3324
3325 .. code-block:: python
3326
3327 import google_crc32c
3328 import base64
3329
3330 data = b"Hello, world!"
3331 crc32c_int = google_crc32c.value(data)
3332 crc32c_hex = f"{crc32c_int:08x}"
3333 crc32c_bytes = crc32c_int.to_bytes(4, "big")
3334 base64_encoded = base64.b64encode(crc32c_bytes)
3335 crc32c_base64 = base64_encoded.decode("utf-8")
3336
3337 print(crc32c_base64)
3338
3339 Above code block prints 8 char string of base64 encoded big-endian
3340 bytes of 32 bit CRC32c integer.
3341
3342 More details on CRC32c can be found in Appendix B:
3343 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
3344 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
3345 """
3346 with create_trace_span(name="Storage.Blob.uploadFromString"):
3347 data = _to_bytes(data, encoding="utf-8")
3348 string_buffer = BytesIO(data)
3349 self.upload_from_file(
3350 file_obj=string_buffer,
3351 size=len(data),
3352 content_type=content_type,
3353 client=client,
3354 predefined_acl=predefined_acl,
3355 if_generation_match=if_generation_match,
3356 if_generation_not_match=if_generation_not_match,
3357 if_metageneration_match=if_metageneration_match,
3358 if_metageneration_not_match=if_metageneration_not_match,
3359 timeout=timeout,
3360 checksum=checksum,
3361 retry=retry,
3362 crc32c_checksum_value=crc32c_checksum_value,
3363 )
3364
3365 def create_resumable_upload_session(
3366 self,
3367 content_type=None,
3368 size=None,
3369 origin=None,
3370 client=None,
3371 timeout=_DEFAULT_TIMEOUT,
3372 checksum="auto",
3373 predefined_acl=None,
3374 if_generation_match=None,
3375 if_generation_not_match=None,
3376 if_metageneration_match=None,
3377 if_metageneration_not_match=None,
3378 retry=DEFAULT_RETRY,
3379 ):
3380 """Create a resumable upload session.
3381
3382 Resumable upload sessions allow you to start an upload session from
3383 one client and complete the session in another. This method is called
3384 by the initiator to set the metadata and limits. The initiator then
3385 passes the session URL to the client that will upload the binary data.
3386 The client performs a PUT request on the session URL to complete the
3387 upload. This process allows untrusted clients to upload to an
3388 access-controlled bucket.
3389
3390 For more details, see the
3391 documentation on [`signed URLs`](https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable).
3392
3393 The content type of the upload will be determined in order
3394 of precedence:
3395
3396 - The value passed in to this method (if not :data:`None`)
3397 - The value stored on the current blob
3398 - The default value ('application/octet-stream')
3399
3400 .. note::
3401 The effect of uploading to an existing blob depends on the
3402 "versioning" and "lifecycle" policies defined on the blob's
3403 bucket. In the absence of those policies, upload will
3404 overwrite any existing contents.
3405
3406 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3407 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3408 API documents for details.
3409
3410 If :attr:`encryption_key` is set, the blob will be encrypted with
3411 a [`customer-supplied`](https://cloud.google.com/storage/docs/encryption#customer-supplied)
3412 encryption key.
3413
3414 If :attr:`user_project` is set on the bucket, bills the API request
3415 to that project.
3416
3417 :type size: int
3418 :param size:
3419 (Optional) The maximum number of bytes that can be uploaded using
3420 this session. If the size is not known when creating the session,
3421 this should be left blank.
3422
3423 :type content_type: str
3424 :param content_type: (Optional) Type of content being uploaded.
3425
3426 :type origin: str
3427 :param origin:
3428 (Optional) If set, the upload can only be completed by a user-agent
3429 that uploads from the given origin. This can be useful when passing
3430 the session to a web client.
3431
3432 :type client: :class:`~google.cloud.storage.client.Client`
3433 :param client:
3434 (Optional) The client to use. If not passed, falls back to the
3435 ``client`` stored on the blob's bucket.
3436
3437 :type timeout: float or tuple
3438 :param timeout:
3439 (Optional) The amount of time, in seconds, to wait
3440 for the server response. See: :ref:`configuring_timeouts`
3441
3442 :type checksum: str
3443 :param checksum:
3444 (Optional) The type of checksum to compute to verify
3445 the integrity of the object. After the upload is complete, the
3446 server-computed checksum of the resulting object will be checked
3447 and google.cloud.storage.exceptions.DataCorruption will be raised on
3448 a mismatch. On a validation failure, the client will attempt to
3449 delete the uploaded object automatically. Supported values are
3450 "md5", "crc32c", "auto" and None. The default is "auto", which will
3451 try to detect if the C extension for crc32c is installed and fall
3452 back to md5 otherwise.
3453
3454 :type predefined_acl: str
3455 :param predefined_acl: (Optional) Predefined access control list
3456
3457 :type if_generation_match: long
3458 :param if_generation_match:
3459 (Optional) See :ref:`using-if-generation-match`
3460
3461 :type if_generation_not_match: long
3462 :param if_generation_not_match:
3463 (Optional) See :ref:`using-if-generation-not-match`
3464
3465 :type if_metageneration_match: long
3466 :param if_metageneration_match:
3467 (Optional) See :ref:`using-if-metageneration-match`
3468
3469 :type if_metageneration_not_match: long
3470 :param if_metageneration_not_match:
3471 (Optional) See :ref:`using-if-metageneration-not-match`
3472
3473 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3474 :param retry: (Optional) How to retry the RPC. A None value will disable
3475 retries. A google.api_core.retry.Retry value will enable retries,
3476 and the object will define retriable response codes and errors and
3477 configure backoff and timeout options.
3478
3479 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3480 Retry object and activates it only if certain conditions are met.
3481 This class exists to provide safe defaults for RPC calls that are
3482 not technically safe to retry normally (due to potential data
3483 duplication or other side-effects) but become safe to retry if a
3484 condition such as if_generation_match is set.
3485
3486 See the retry.py source code and docstrings in this package
3487 (google.cloud.storage.retry) for information on retry types and how
3488 to configure them.
3489
3490 :rtype: str
3491 :returns: The resumable upload session URL. The upload can be
3492 completed by making an HTTP PUT request with the
3493 file's contents.
3494
3495 :raises: :class:`google.cloud.exceptions.GoogleCloudError`
3496 if the session creation response returns an error status.
3497 """
3498 with create_trace_span(name="Storage.Blob.createResumableUploadSession"):
3499 # Handle ConditionalRetryPolicy.
3500 if isinstance(retry, ConditionalRetryPolicy):
3501 # Conditional retries are designed for non-media calls, which change
3502 # arguments into query_params dictionaries. Media operations work
3503 # differently, so here we make a "fake" query_params to feed to the
3504 # ConditionalRetryPolicy.
3505 query_params = {
3506 "ifGenerationMatch": if_generation_match,
3507 "ifMetagenerationMatch": if_metageneration_match,
3508 }
3509 retry = retry.get_retry_policy_if_conditions_met(
3510 query_params=query_params
3511 )
3512
3513 extra_headers = {}
3514 if origin is not None:
3515 # This header is specifically for client-side uploads, it
3516 # determines the origins allowed for CORS.
3517 extra_headers["Origin"] = origin
3518
3519 try:
3520 fake_stream = BytesIO(b"")
3521 # Send a fake the chunk size which we **know** will be acceptable
3522 # to the `ResumableUpload` constructor. The chunk size only
3523 # matters when **sending** bytes to an upload.
3524 upload, _ = self._initiate_resumable_upload(
3525 client,
3526 fake_stream,
3527 content_type,
3528 size,
3529 predefined_acl=predefined_acl,
3530 if_generation_match=if_generation_match,
3531 if_generation_not_match=if_generation_not_match,
3532 if_metageneration_match=if_metageneration_match,
3533 if_metageneration_not_match=if_metageneration_not_match,
3534 extra_headers=extra_headers,
3535 chunk_size=self._CHUNK_SIZE_MULTIPLE,
3536 timeout=timeout,
3537 checksum=checksum,
3538 retry=retry,
3539 )
3540
3541 return upload.resumable_url
3542 except InvalidResponse as exc:
3543 _raise_from_invalid_response(exc)
3544
3545 def get_iam_policy(
3546 self,
3547 client=None,
3548 requested_policy_version=None,
3549 timeout=_DEFAULT_TIMEOUT,
3550 retry=DEFAULT_RETRY,
3551 ):
3552 """Retrieve the IAM policy for the object.
3553
3554 .. note::
3555
3556 Blob- / object-level IAM support does not yet exist and methods
3557 currently call an internal ACL backend not providing any utility
3558 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3559 in the future and is currently undocumented. Use :attr:`acl` for
3560 managing object access control.
3561
3562 If :attr:`user_project` is set on the bucket, bills the API request
3563 to that project.
3564
3565 :type client: :class:`~google.cloud.storage.client.Client`
3566 :param client:
3567 (Optional) The client to use. If not passed, falls back to the
3568 ``client`` stored on the current object's bucket.
3569
3570 :type requested_policy_version: int or ``NoneType``
3571 :param requested_policy_version:
3572 (Optional) The version of IAM policies to request. If a policy
3573 with a condition is requested without setting this, the server will
3574 return an error. This must be set to a value of 3 to retrieve IAM
3575 policies containing conditions. This is to prevent client code that
3576 isn't aware of IAM conditions from interpreting and modifying
3577 policies incorrectly. The service might return a policy with
3578 version lower than the one that was requested, based on the feature
3579 syntax in the policy fetched.
3580
3581 :type timeout: float or tuple
3582 :param timeout:
3583 (Optional) The amount of time, in seconds, to wait
3584 for the server response. See: :ref:`configuring_timeouts`
3585
3586 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3587 :param retry:
3588 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3589
3590 :rtype: :class:`google.api_core.iam.Policy`
3591 :returns: the policy instance, based on the resource returned from
3592 the ``getIamPolicy`` API request.
3593 """
3594 with create_trace_span(name="Storage.Blob.getIamPolicy"):
3595 client = self._require_client(client)
3596
3597 query_params = {}
3598
3599 if self.user_project is not None:
3600 query_params["userProject"] = self.user_project
3601
3602 if requested_policy_version is not None:
3603 query_params["optionsRequestedPolicyVersion"] = requested_policy_version
3604
3605 info = client._get_resource(
3606 f"{self.path}/iam",
3607 query_params=query_params,
3608 timeout=timeout,
3609 retry=retry,
3610 _target_object=None,
3611 )
3612 return Policy.from_api_repr(info)
3613
3614 def set_iam_policy(
3615 self,
3616 policy,
3617 client=None,
3618 timeout=_DEFAULT_TIMEOUT,
3619 retry=DEFAULT_RETRY_IF_ETAG_IN_JSON,
3620 ):
3621 """Update the IAM policy for the bucket.
3622
3623 .. note::
3624
3625 Blob- / object-level IAM support does not yet exist and methods
3626 currently call an internal ACL backend not providing any utility
3627 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3628 in the future and is currently undocumented. Use :attr:`acl` for
3629 managing object access control.
3630
3631 If :attr:`user_project` is set on the bucket, bills the API request
3632 to that project.
3633
3634 :type policy: :class:`google.api_core.iam.Policy`
3635 :param policy: policy instance used to update bucket's IAM policy.
3636
3637 :type client: :class:`~google.cloud.storage.client.Client`
3638 :param client:
3639 (Optional) The client to use. If not passed, falls back to the
3640 ``client`` stored on the current bucket.
3641
3642 :type timeout: float or tuple
3643 :param timeout:
3644 (Optional) The amount of time, in seconds, to wait
3645 for the server response. See: :ref:`configuring_timeouts`
3646
3647 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3648 :param retry:
3649 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3650
3651 :rtype: :class:`google.api_core.iam.Policy`
3652 :returns: the policy instance, based on the resource returned from
3653 the ``setIamPolicy`` API request.
3654 """
3655 with create_trace_span(name="Storage.Blob.setIamPolicy"):
3656 client = self._require_client(client)
3657
3658 query_params = {}
3659
3660 if self.user_project is not None:
3661 query_params["userProject"] = self.user_project
3662
3663 path = f"{self.path}/iam"
3664 resource = policy.to_api_repr()
3665 resource["resourceId"] = self.path
3666 info = client._put_resource(
3667 path,
3668 resource,
3669 query_params=query_params,
3670 timeout=timeout,
3671 retry=retry,
3672 _target_object=None,
3673 )
3674 return Policy.from_api_repr(info)
3675
3676 def test_iam_permissions(
3677 self,
3678 permissions,
3679 client=None,
3680 timeout=_DEFAULT_TIMEOUT,
3681 retry=DEFAULT_RETRY,
3682 ):
3683 """API call: test permissions
3684
3685 .. note::
3686
3687 Blob- / object-level IAM support does not yet exist and methods
3688 currently call an internal ACL backend not providing any utility
3689 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3690 in the future and is currently undocumented. Use :attr:`acl` for
3691 managing object access control.
3692
3693 If :attr:`user_project` is set on the bucket, bills the API request
3694 to that project.
3695
3696 :type permissions: list of string
3697 :param permissions: the permissions to check
3698
3699 :type client: :class:`~google.cloud.storage.client.Client`
3700 :param client:
3701 (Optional) The client to use. If not passed, falls back to the
3702 ``client`` stored on the current bucket.
3703
3704 :type timeout: float or tuple
3705 :param timeout:
3706 (Optional) The amount of time, in seconds, to wait
3707 for the server response. See: :ref:`configuring_timeouts`
3708
3709 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3710 :param retry:
3711 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3712
3713 :rtype: list of string
3714 :returns: the permissions returned by the ``testIamPermissions`` API
3715 request.
3716 """
3717 with create_trace_span(name="Storage.Blob.testIamPermissions"):
3718 client = self._require_client(client)
3719 query_params = {"permissions": permissions}
3720
3721 if self.user_project is not None:
3722 query_params["userProject"] = self.user_project
3723
3724 path = f"{self.path}/iam/testPermissions"
3725 resp = client._get_resource(
3726 path,
3727 query_params=query_params,
3728 timeout=timeout,
3729 retry=retry,
3730 _target_object=None,
3731 )
3732
3733 return resp.get("permissions", [])
3734
3735 def make_public(
3736 self,
3737 client=None,
3738 timeout=_DEFAULT_TIMEOUT,
3739 if_generation_match=None,
3740 if_generation_not_match=None,
3741 if_metageneration_match=None,
3742 if_metageneration_not_match=None,
3743 retry=DEFAULT_RETRY,
3744 ):
3745 """Update blob's ACL, granting read access to anonymous users.
3746
3747 :type client: :class:`~google.cloud.storage.client.Client` or
3748 ``NoneType``
3749 :param client: (Optional) The client to use. If not passed, falls back
3750 to the ``client`` stored on the blob's bucket.
3751
3752 :type timeout: float or tuple
3753 :param timeout:
3754 (Optional) The amount of time, in seconds, to wait
3755 for the server response. See: :ref:`configuring_timeouts`
3756
3757 :type if_generation_match: long
3758 :param if_generation_match:
3759 (Optional) See :ref:`using-if-generation-match`
3760
3761 :type if_generation_not_match: long
3762 :param if_generation_not_match:
3763 (Optional) See :ref:`using-if-generation-not-match`
3764
3765 :type if_metageneration_match: long
3766 :param if_metageneration_match:
3767 (Optional) See :ref:`using-if-metageneration-match`
3768
3769 :type if_metageneration_not_match: long
3770 :param if_metageneration_not_match:
3771 (Optional) See :ref:`using-if-metageneration-not-match`
3772
3773 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3774 :param retry:
3775 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3776 """
3777 with create_trace_span(name="Storage.Blob.makePublic"):
3778 self.acl.all().grant_read()
3779 self.acl.save(
3780 client=client,
3781 timeout=timeout,
3782 if_generation_match=if_generation_match,
3783 if_generation_not_match=if_generation_not_match,
3784 if_metageneration_match=if_metageneration_match,
3785 if_metageneration_not_match=if_metageneration_not_match,
3786 retry=retry,
3787 )
3788
3789 def make_private(
3790 self,
3791 client=None,
3792 timeout=_DEFAULT_TIMEOUT,
3793 if_generation_match=None,
3794 if_generation_not_match=None,
3795 if_metageneration_match=None,
3796 if_metageneration_not_match=None,
3797 retry=DEFAULT_RETRY,
3798 ):
3799 """Update blob's ACL, revoking read access for anonymous users.
3800
3801 :type client: :class:`~google.cloud.storage.client.Client` or
3802 ``NoneType``
3803 :param client: (Optional) The client to use. If not passed, falls back
3804 to the ``client`` stored on the blob's bucket.
3805
3806 :type timeout: float or tuple
3807 :param timeout:
3808 (Optional) The amount of time, in seconds, to wait
3809 for the server response. See: :ref:`configuring_timeouts`
3810
3811 :type if_generation_match: long
3812 :param if_generation_match:
3813 (Optional) See :ref:`using-if-generation-match`
3814
3815 :type if_generation_not_match: long
3816 :param if_generation_not_match:
3817 (Optional) See :ref:`using-if-generation-not-match`
3818
3819 :type if_metageneration_match: long
3820 :param if_metageneration_match:
3821 (Optional) See :ref:`using-if-metageneration-match`
3822
3823 :type if_metageneration_not_match: long
3824 :param if_metageneration_not_match:
3825 (Optional) See :ref:`using-if-metageneration-not-match`
3826
3827 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3828 :param retry:
3829 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3830 """
3831 with create_trace_span(name="Storage.Blob.makePrivate"):
3832 self.acl.all().revoke_read()
3833 self.acl.save(
3834 client=client,
3835 timeout=timeout,
3836 if_generation_match=if_generation_match,
3837 if_generation_not_match=if_generation_not_match,
3838 if_metageneration_match=if_metageneration_match,
3839 if_metageneration_not_match=if_metageneration_not_match,
3840 retry=retry,
3841 )
3842
3843 def compose(
3844 self,
3845 sources,
3846 client=None,
3847 timeout=_DEFAULT_TIMEOUT,
3848 if_generation_match=None,
3849 if_metageneration_match=None,
3850 if_source_generation_match=None,
3851 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3852 ):
3853 """Concatenate source blobs into this one.
3854
3855 If :attr:`user_project` is set on the bucket, bills the API request
3856 to that project.
3857
3858 See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/objects/compose)
3859 and a [code sample](https://cloud.google.com/storage/docs/samples/storage-compose-file#storage_compose_file-python).
3860
3861 :type sources: list of :class:`Blob`
3862 :param sources: Blobs whose contents will be composed into this blob.
3863
3864 :type client: :class:`~google.cloud.storage.client.Client`
3865 :param client:
3866 (Optional) The client to use. If not passed, falls back to the
3867 ``client`` stored on the blob's bucket.
3868
3869 :type timeout: float or tuple
3870 :param timeout:
3871 (Optional) The amount of time, in seconds, to wait
3872 for the server response. See: :ref:`configuring_timeouts`
3873
3874 :type if_generation_match: long
3875 :param if_generation_match:
3876 (Optional) Makes the operation conditional on whether the
3877 destination object's current generation matches the given value.
3878 Setting to 0 makes the operation succeed only if there are no live
3879 versions of the object.
3880 Note: In a previous version, this argument worked identically to the
3881 ``if_source_generation_match`` argument. For
3882 backwards-compatibility reasons, if a list is passed in,
3883 this argument will behave like ``if_source_generation_match``
3884 and also issue a DeprecationWarning.
3885
3886 :type if_metageneration_match: long
3887 :param if_metageneration_match:
3888 (Optional) Makes the operation conditional on whether the
3889 destination object's current metageneration matches the given
3890 value.
3891
3892 If a list of long is passed in, no match operation will be
3893 performed. (Deprecated: type(list of long) is supported for
3894 backwards-compatability reasons only.)
3895
3896 :type if_source_generation_match: list of long
3897 :param if_source_generation_match:
3898 (Optional) Makes the operation conditional on whether the current
3899 generation of each source blob matches the corresponding generation.
3900 The list must match ``sources`` item-to-item.
3901
3902 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3903 :param retry:
3904 (Optional) How to retry the RPC.
3905 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
3906 policy which will only enable retries if ``if_generation_match`` or ``generation``
3907 is set, in order to ensure requests are idempotent before retrying them.
3908 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
3909 to enable retries regardless of generation precondition setting.
3910 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
3911 """
3912 with create_trace_span(name="Storage.Blob.compose"):
3913 sources_len = len(sources)
3914 client = self._require_client(client)
3915 query_params = {}
3916
3917 if isinstance(if_generation_match, list):
3918 warnings.warn(
3919 _COMPOSE_IF_GENERATION_LIST_DEPRECATED,
3920 DeprecationWarning,
3921 stacklevel=2,
3922 )
3923
3924 if if_source_generation_match is not None:
3925 raise ValueError(
3926 _COMPOSE_IF_GENERATION_LIST_AND_IF_SOURCE_GENERATION_ERROR
3927 )
3928
3929 if_source_generation_match = if_generation_match
3930 if_generation_match = None
3931
3932 if isinstance(if_metageneration_match, list):
3933 warnings.warn(
3934 _COMPOSE_IF_METAGENERATION_LIST_DEPRECATED,
3935 DeprecationWarning,
3936 stacklevel=2,
3937 )
3938
3939 if_metageneration_match = None
3940
3941 if if_source_generation_match is None:
3942 if_source_generation_match = [None] * sources_len
3943 if len(if_source_generation_match) != sources_len:
3944 raise ValueError(_COMPOSE_IF_SOURCE_GENERATION_MISMATCH_ERROR)
3945
3946 source_objects = []
3947 for source, source_generation in zip(sources, if_source_generation_match):
3948 source_object = {
3949 "name": source.name,
3950 "generation": source.generation,
3951 }
3952
3953 preconditions = {}
3954 if source_generation is not None:
3955 preconditions["ifGenerationMatch"] = source_generation
3956
3957 if preconditions:
3958 source_object["objectPreconditions"] = preconditions
3959
3960 source_objects.append(source_object)
3961
3962 request = {
3963 "sourceObjects": source_objects,
3964 "destination": self._properties.copy(),
3965 }
3966
3967 if self.user_project is not None:
3968 query_params["userProject"] = self.user_project
3969
3970 _add_generation_match_parameters(
3971 query_params,
3972 if_generation_match=if_generation_match,
3973 if_metageneration_match=if_metageneration_match,
3974 )
3975
3976 api_response = client._post_resource(
3977 f"{self.path}/compose",
3978 request,
3979 query_params=query_params,
3980 timeout=timeout,
3981 retry=retry,
3982 _target_object=self,
3983 )
3984 self._set_properties(api_response)
3985
3986 def rewrite(
3987 self,
3988 source,
3989 token=None,
3990 client=None,
3991 if_generation_match=None,
3992 if_generation_not_match=None,
3993 if_metageneration_match=None,
3994 if_metageneration_not_match=None,
3995 if_source_generation_match=None,
3996 if_source_generation_not_match=None,
3997 if_source_metageneration_match=None,
3998 if_source_metageneration_not_match=None,
3999 timeout=_DEFAULT_TIMEOUT,
4000 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
4001 ):
4002 """Rewrite source blob into this one.
4003
4004 If :attr:`user_project` is set on the bucket, bills the API request
4005 to that project.
4006
4007 .. note::
4008
4009 ``rewrite`` is not supported in a ``Batch`` context.
4010
4011 :type source: :class:`Blob`
4012 :param source: blob whose contents will be rewritten into this blob.
4013
4014 :type token: str
4015 :param token:
4016 (Optional) Token returned from an earlier, not-completed call to
4017 rewrite the same source blob. If passed, result will include
4018 updated status, total bytes written.
4019
4020 :type client: :class:`~google.cloud.storage.client.Client`
4021 :param client:
4022 (Optional) The client to use. If not passed, falls back to the
4023 ``client`` stored on the blob's bucket.
4024
4025 :type if_generation_match: long
4026 :param if_generation_match:
4027 (Optional) See :ref:`using-if-generation-match`
4028 Note that the generation to be matched is that of the
4029 ``destination`` blob.
4030
4031 :type if_generation_not_match: long
4032 :param if_generation_not_match:
4033 (Optional) See :ref:`using-if-generation-not-match`
4034 Note that the generation to be matched is that of the
4035 ``destination`` blob.
4036
4037 :type if_metageneration_match: long
4038 :param if_metageneration_match:
4039 (Optional) See :ref:`using-if-metageneration-match`
4040 Note that the metageneration to be matched is that of the
4041 ``destination`` blob.
4042
4043 :type if_metageneration_not_match: long
4044 :param if_metageneration_not_match:
4045 (Optional) See :ref:`using-if-metageneration-not-match`
4046 Note that the metageneration to be matched is that of the
4047 ``destination`` blob.
4048
4049 :type if_source_generation_match: long
4050 :param if_source_generation_match:
4051 (Optional) Makes the operation conditional on whether the source
4052 object's generation matches the given value.
4053
4054 :type if_source_generation_not_match: long
4055 :param if_source_generation_not_match:
4056 (Optional) Makes the operation conditional on whether the source
4057 object's generation does not match the given value.
4058
4059 :type if_source_metageneration_match: long
4060 :param if_source_metageneration_match:
4061 (Optional) Makes the operation conditional on whether the source
4062 object's current metageneration matches the given value.
4063
4064 :type if_source_metageneration_not_match: long
4065 :param if_source_metageneration_not_match:
4066 (Optional) Makes the operation conditional on whether the source
4067 object's current metageneration does not match the given value.
4068
4069 :type timeout: float or tuple
4070 :param timeout:
4071 (Optional) The amount of time, in seconds, to wait
4072 for the server response. See: :ref:`configuring_timeouts`
4073
4074 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4075 :param retry:
4076 (Optional) How to retry the RPC.
4077 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
4078 policy which will only enable retries if ``if_generation_match`` or ``generation``
4079 is set, in order to ensure requests are idempotent before retrying them.
4080 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
4081 to enable retries regardless of generation precondition setting.
4082 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
4083
4084 :rtype: tuple
4085 :returns: ``(token, bytes_rewritten, total_bytes)``, where ``token``
4086 is a rewrite token (``None`` if the rewrite is complete),
4087 ``bytes_rewritten`` is the number of bytes rewritten so far,
4088 and ``total_bytes`` is the total number of bytes to be
4089 rewritten.
4090 """
4091 with create_trace_span(name="Storage.Blob.rewrite"):
4092 client = self._require_client(client)
4093 headers = _get_encryption_headers(self._encryption_key)
4094 headers.update(_get_encryption_headers(source._encryption_key, source=True))
4095
4096 query_params = self._query_params
4097 if "generation" in query_params:
4098 del query_params["generation"]
4099
4100 if token:
4101 query_params["rewriteToken"] = token
4102
4103 if source.generation:
4104 query_params["sourceGeneration"] = source.generation
4105
4106 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
4107 # at rest, object resource metadata will store the version of the Key Management
4108 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
4109 # used to rewrite the object, then the existing kmsKeyName version
4110 # value can't be used in the rewrite request and the client instead ignores it.
4111 if (
4112 self.kms_key_name is not None
4113 and "cryptoKeyVersions" not in self.kms_key_name
4114 ):
4115 query_params["destinationKmsKeyName"] = self.kms_key_name
4116
4117 _add_generation_match_parameters(
4118 query_params,
4119 if_generation_match=if_generation_match,
4120 if_generation_not_match=if_generation_not_match,
4121 if_metageneration_match=if_metageneration_match,
4122 if_metageneration_not_match=if_metageneration_not_match,
4123 if_source_generation_match=if_source_generation_match,
4124 if_source_generation_not_match=if_source_generation_not_match,
4125 if_source_metageneration_match=if_source_metageneration_match,
4126 if_source_metageneration_not_match=if_source_metageneration_not_match,
4127 )
4128
4129 path = f"{source.path}/rewriteTo{self.path}"
4130 api_response = client._post_resource(
4131 path,
4132 self._properties,
4133 query_params=query_params,
4134 headers=headers,
4135 timeout=timeout,
4136 retry=retry,
4137 _target_object=self,
4138 )
4139 rewritten = int(api_response["totalBytesRewritten"])
4140 size = int(api_response["objectSize"])
4141
4142 # The resource key is set if and only if the API response is
4143 # completely done. Additionally, there is no rewrite token to return
4144 # in this case.
4145 if api_response["done"]:
4146 self._set_properties(api_response["resource"])
4147 return None, rewritten, size
4148
4149 return api_response["rewriteToken"], rewritten, size
4150
4151 def update_storage_class(
4152 self,
4153 new_class,
4154 client=None,
4155 if_generation_match=None,
4156 if_generation_not_match=None,
4157 if_metageneration_match=None,
4158 if_metageneration_not_match=None,
4159 if_source_generation_match=None,
4160 if_source_generation_not_match=None,
4161 if_source_metageneration_match=None,
4162 if_source_metageneration_not_match=None,
4163 timeout=_DEFAULT_TIMEOUT,
4164 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
4165 ):
4166 """Update blob's storage class via a rewrite-in-place. This helper will
4167 wait for the rewrite to complete before returning, so it may take some
4168 time for large files.
4169
4170 See
4171 https://cloud.google.com/storage/docs/per-object-storage-class
4172
4173 If :attr:`user_project` is set on the bucket, bills the API request
4174 to that project.
4175
4176 :type new_class: str
4177 :param new_class:
4178 new storage class for the object. One of:
4179 :attr:`~google.cloud.storage.constants.NEARLINE_STORAGE_CLASS`,
4180 :attr:`~google.cloud.storage.constants.COLDLINE_STORAGE_CLASS`,
4181 :attr:`~google.cloud.storage.constants.ARCHIVE_STORAGE_CLASS`,
4182 :attr:`~google.cloud.storage.constants.STANDARD_STORAGE_CLASS`,
4183 :attr:`~google.cloud.storage.constants.MULTI_REGIONAL_LEGACY_STORAGE_CLASS`,
4184 or
4185 :attr:`~google.cloud.storage.constants.REGIONAL_LEGACY_STORAGE_CLASS`.
4186
4187 :type client: :class:`~google.cloud.storage.client.Client`
4188 :param client:
4189 (Optional) The client to use. If not passed, falls back to the
4190 ``client`` stored on the blob's bucket.
4191
4192 :type if_generation_match: long
4193 :param if_generation_match:
4194 (Optional) See :ref:`using-if-generation-match`
4195 Note that the generation to be matched is that of the
4196 ``destination`` blob.
4197
4198 :type if_generation_not_match: long
4199 :param if_generation_not_match:
4200 (Optional) See :ref:`using-if-generation-not-match`
4201 Note that the generation to be matched is that of the
4202 ``destination`` blob.
4203
4204 :type if_metageneration_match: long
4205 :param if_metageneration_match:
4206 (Optional) See :ref:`using-if-metageneration-match`
4207 Note that the metageneration to be matched is that of the
4208 ``destination`` blob.
4209
4210 :type if_metageneration_not_match: long
4211 :param if_metageneration_not_match:
4212 (Optional) See :ref:`using-if-metageneration-not-match`
4213 Note that the metageneration to be matched is that of the
4214 ``destination`` blob.
4215
4216 :type if_source_generation_match: long
4217 :param if_source_generation_match:
4218 (Optional) Makes the operation conditional on whether the source
4219 object's generation matches the given value.
4220
4221 :type if_source_generation_not_match: long
4222 :param if_source_generation_not_match:
4223 (Optional) Makes the operation conditional on whether the source
4224 object's generation does not match the given value.
4225
4226 :type if_source_metageneration_match: long
4227 :param if_source_metageneration_match:
4228 (Optional) Makes the operation conditional on whether the source
4229 object's current metageneration matches the given value.
4230
4231 :type if_source_metageneration_not_match: long
4232 :param if_source_metageneration_not_match:
4233 (Optional) Makes the operation conditional on whether the source
4234 object's current metageneration does not match the given value.
4235
4236 :type timeout: float or tuple
4237 :param timeout:
4238 (Optional) The amount of time, in seconds, to wait
4239 for the server response. See: :ref:`configuring_timeouts`
4240
4241 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4242 :param retry:
4243 (Optional) How to retry the RPC.
4244 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
4245 policy which will only enable retries if ``if_generation_match`` or ``generation``
4246 is set, in order to ensure requests are idempotent before retrying them.
4247 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
4248 to enable retries regardless of generation precondition setting.
4249 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
4250 """
4251 with create_trace_span(name="Storage.Blob.updateStorageClass"):
4252 # Update current blob's storage class prior to rewrite
4253 self._patch_property("storageClass", new_class)
4254
4255 # Execute consecutive rewrite operations until operation is done
4256 token, _, _ = self.rewrite(
4257 self,
4258 if_generation_match=if_generation_match,
4259 if_generation_not_match=if_generation_not_match,
4260 if_metageneration_match=if_metageneration_match,
4261 if_metageneration_not_match=if_metageneration_not_match,
4262 if_source_generation_match=if_source_generation_match,
4263 if_source_generation_not_match=if_source_generation_not_match,
4264 if_source_metageneration_match=if_source_metageneration_match,
4265 if_source_metageneration_not_match=if_source_metageneration_not_match,
4266 timeout=timeout,
4267 retry=retry,
4268 )
4269 while token is not None:
4270 token, _, _ = self.rewrite(
4271 self,
4272 token=token,
4273 if_generation_match=if_generation_match,
4274 if_generation_not_match=if_generation_not_match,
4275 if_metageneration_match=if_metageneration_match,
4276 if_metageneration_not_match=if_metageneration_not_match,
4277 if_source_generation_match=if_source_generation_match,
4278 if_source_generation_not_match=if_source_generation_not_match,
4279 if_source_metageneration_match=if_source_metageneration_match,
4280 if_source_metageneration_not_match=if_source_metageneration_not_match,
4281 timeout=timeout,
4282 retry=retry,
4283 )
4284
4285 def open(
4286 self,
4287 mode="r",
4288 chunk_size=None,
4289 ignore_flush=None,
4290 encoding=None,
4291 errors=None,
4292 newline=None,
4293 **kwargs,
4294 ):
4295 r"""Create a file handler for file-like I/O to or from this blob.
4296
4297 This method can be used as a context manager, just like Python's
4298 built-in 'open()' function.
4299
4300 While reading, as with other read methods, if blob.generation is not set
4301 the most recent blob generation will be used. Because the file-like IO
4302 reader downloads progressively in chunks, this could result in data from
4303 multiple versions being mixed together. If this is a concern, use
4304 either bucket.get_blob(), or blob.reload(), which will download the
4305 latest generation number and set it; or, if the generation is known, set
4306 it manually, for instance with bucket.blob(generation=123456).
4307
4308 Checksumming (hashing) to verify data integrity is disabled for reads
4309 using this feature because reads are implemented using request ranges,
4310 which do not provide checksums to validate. See
4311 https://cloud.google.com/storage/docs/hashes-etags for details.
4312
4313 See a [code sample](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_fileio_write_read.py).
4314
4315 Keyword arguments to pass to the underlying API calls.
4316 For both uploads and downloads, the following arguments are
4317 supported:
4318
4319 - ``if_generation_match``
4320 - ``if_generation_not_match``
4321 - ``if_metageneration_match``
4322 - ``if_metageneration_not_match``
4323 - ``timeout``
4324 - ``retry``
4325
4326 For downloads only, the following additional arguments are supported:
4327
4328 - ``raw_download``
4329 - ``single_shot_download``
4330
4331 For uploads only, the following additional arguments are supported:
4332
4333 - ``content_type``
4334 - ``predefined_acl``
4335 - ``checksum``
4336
4337 :type mode: str
4338 :param mode:
4339 (Optional) A mode string, as per standard Python `open()` semantics.The first
4340 character must be 'r', to open the blob for reading, or 'w' to open
4341 it for writing. The second character, if present, must be 't' for
4342 (unicode) text mode, or 'b' for bytes mode. If the second character
4343 is omitted, text mode is the default.
4344
4345 :type chunk_size: long
4346 :param chunk_size:
4347 (Optional) For reads, the minimum number of bytes to read at a time.
4348 If fewer bytes than the chunk_size are requested, the remainder is
4349 buffered. For writes, the maximum number of bytes to buffer before
4350 sending data to the server, and the size of each request when data
4351 is sent. Writes are implemented as a "resumable upload", so
4352 chunk_size for writes must be exactly a multiple of 256KiB as with
4353 other resumable uploads. The default is 40 MiB.
4354
4355 :type ignore_flush: bool
4356 :param ignore_flush:
4357 (Optional) For non text-mode writes, makes flush() do nothing
4358 instead of raising an error. flush() without closing is not
4359 supported by the remote service and therefore calling it normally
4360 results in io.UnsupportedOperation. However, that behavior is
4361 incompatible with some consumers and wrappers of file objects in
4362 Python, such as zipfile.ZipFile or io.TextIOWrapper. Setting
4363 ignore_flush will cause flush() to successfully do nothing, for
4364 compatibility with those contexts. The correct way to actually flush
4365 data to the remote server is to close() (using a context manager,
4366 such as in the example, will cause this to happen automatically).
4367
4368 :type encoding: str
4369 :param encoding:
4370 (Optional) For text mode only, the name of the encoding that the stream will
4371 be decoded or encoded with. If omitted, it defaults to
4372 locale.getpreferredencoding(False).
4373
4374 :type errors: str
4375 :param errors:
4376 (Optional) For text mode only, an optional string that specifies how encoding
4377 and decoding errors are to be handled. Pass 'strict' to raise a
4378 ValueError exception if there is an encoding error (the default of
4379 None has the same effect), or pass 'ignore' to ignore errors. (Note
4380 that ignoring encoding errors can lead to data loss.) Other more
4381 rarely-used options are also available; see the Python 'io' module
4382 documentation for 'io.TextIOWrapper' for a complete list.
4383
4384 :type newline: str
4385 :param newline:
4386 (Optional) For text mode only, controls how line endings are handled. It can
4387 be None, '', '\n', '\r', and '\r\n'. If None, reads use "universal
4388 newline mode" and writes use the system default. See the Python
4389 'io' module documentation for 'io.TextIOWrapper' for details.
4390
4391 :returns: A 'BlobReader' or 'BlobWriter' from
4392 'google.cloud.storage.fileio', or an 'io.TextIOWrapper' around one
4393 of those classes, depending on the 'mode' argument.
4394 """
4395 with create_trace_span(name="Storage.Blob.open"):
4396 if mode == "rb":
4397 if encoding or errors or newline:
4398 raise ValueError(
4399 "encoding, errors and newline arguments are for text mode only"
4400 )
4401 if ignore_flush:
4402 raise ValueError(
4403 "ignore_flush argument is for non-text write mode only"
4404 )
4405 return BlobReader(self, chunk_size=chunk_size, **kwargs)
4406 elif mode == "wb":
4407 if encoding or errors or newline:
4408 raise ValueError(
4409 "encoding, errors and newline arguments are for text mode only"
4410 )
4411 return BlobWriter(
4412 self,
4413 chunk_size=chunk_size,
4414 ignore_flush=ignore_flush,
4415 **kwargs,
4416 )
4417 elif mode in ("r", "rt"):
4418 if ignore_flush:
4419 raise ValueError(
4420 "ignore_flush argument is for non-text write mode only"
4421 )
4422 return TextIOWrapper(
4423 BlobReader(self, chunk_size=chunk_size, **kwargs),
4424 encoding=encoding,
4425 errors=errors,
4426 newline=newline,
4427 )
4428 elif mode in ("w", "wt"):
4429 if ignore_flush is False:
4430 raise ValueError(
4431 "ignore_flush is required for text mode writing and "
4432 "cannot be set to False"
4433 )
4434 return TextIOWrapper(
4435 BlobWriter(
4436 self, chunk_size=chunk_size, ignore_flush=True, **kwargs
4437 ),
4438 encoding=encoding,
4439 errors=errors,
4440 newline=newline,
4441 )
4442 else:
4443 raise NotImplementedError(
4444 "Supported modes strings are 'r', 'rb', 'rt', 'w', 'wb', and 'wt' only."
4445 )
4446
4447 cache_control = _scalar_property("cacheControl")
4448 """HTTP 'Cache-Control' header for this object.
4449
4450 See [`RFC 7234`](https://tools.ietf.org/html/rfc7234#section-5.2)
4451 and [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4452
4453 :rtype: str or ``NoneType``
4454
4455 """
4456
4457 content_disposition = _scalar_property("contentDisposition")
4458 """HTTP 'Content-Disposition' header for this object.
4459
4460 See [`RFC 6266`](https://tools.ietf.org/html/rfc7234#section-5.2) and
4461 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4462
4463 :rtype: str or ``NoneType``
4464 """
4465
4466 content_encoding = _scalar_property("contentEncoding")
4467 """HTTP 'Content-Encoding' header for this object.
4468
4469 See [`RFC 7231`](https://tools.ietf.org/html/rfc7231#section-3.1.2.2) and
4470 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4471
4472 :rtype: str or ``NoneType``
4473 """
4474
4475 content_language = _scalar_property("contentLanguage")
4476 """HTTP 'Content-Language' header for this object.
4477
4478 See [`BCP47`](https://tools.ietf.org/html/bcp47) and
4479 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4480
4481 :rtype: str or ``NoneType``
4482 """
4483
4484 content_type = _scalar_property(_CONTENT_TYPE_FIELD)
4485 """HTTP 'Content-Type' header for this object.
4486
4487 See [`RFC 2616`](https://tools.ietf.org/html/rfc2616#section-14.17) and
4488 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4489
4490 :rtype: str or ``NoneType``
4491 """
4492
4493 crc32c = _scalar_property("crc32c")
4494 """CRC32C checksum for this object.
4495
4496 This returns the blob's CRC32C checksum. To retrieve the value, first use a
4497 reload method of the Blob class which loads the blob's properties from the server.
4498
4499 See [`RFC 4960`](https://tools.ietf.org/html/rfc4960#appendix-B) and
4500 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4501
4502 If not set before upload, the server will compute the hash.
4503
4504 :rtype: str or ``NoneType``
4505 """
4506
4507 def _prep_and_do_download(
4508 self,
4509 file_obj,
4510 client=None,
4511 start=None,
4512 end=None,
4513 raw_download=False,
4514 if_etag_match=None,
4515 if_etag_not_match=None,
4516 if_generation_match=None,
4517 if_generation_not_match=None,
4518 if_metageneration_match=None,
4519 if_metageneration_not_match=None,
4520 timeout=_DEFAULT_TIMEOUT,
4521 checksum="auto",
4522 retry=DEFAULT_RETRY,
4523 single_shot_download=False,
4524 command=None,
4525 ):
4526 """Download the contents of a blob object into a file-like object.
4527
4528 See https://cloud.google.com/storage/docs/downloading-objects
4529
4530 If :attr:`user_project` is set on the bucket, bills the API request
4531 to that project.
4532
4533 :type file_obj: file
4534 :param file_obj: A file handle to which to write the blob's data.
4535
4536 :type client: :class:`~google.cloud.storage.client.Client`
4537 :param client:
4538 (Optional) The client to use. If not passed, falls back to the
4539 ``client`` stored on the blob's bucket.
4540
4541 :type start: int
4542 :param start: (Optional) The first byte in a range to be downloaded.
4543
4544 :type end: int
4545 :param end: (Optional) The last byte in a range to be downloaded.
4546
4547 :type raw_download: bool
4548 :param raw_download:
4549 (Optional) If true, download the object without any expansion.
4550
4551 :type if_etag_match: Union[str, Set[str]]
4552 :param if_etag_match:
4553 (Optional) See :ref:`using-if-etag-match`
4554
4555 :type if_etag_not_match: Union[str, Set[str]]
4556 :param if_etag_not_match:
4557 (Optional) See :ref:`using-if-etag-not-match`
4558
4559 :type if_generation_match: long
4560 :param if_generation_match:
4561 (Optional) See :ref:`using-if-generation-match`
4562
4563 :type if_generation_not_match: long
4564 :param if_generation_not_match:
4565 (Optional) See :ref:`using-if-generation-not-match`
4566
4567 :type if_metageneration_match: long
4568 :param if_metageneration_match:
4569 (Optional) See :ref:`using-if-metageneration-match`
4570
4571 :type if_metageneration_not_match: long
4572 :param if_metageneration_not_match:
4573 (Optional) See :ref:`using-if-metageneration-not-match`
4574
4575 :type timeout: float or tuple
4576 :param timeout:
4577 (Optional) The amount of time, in seconds, to wait
4578 for the server response. See: :ref:`configuring_timeouts`
4579
4580 :type checksum: str
4581 :param checksum:
4582 (Optional) The type of checksum to compute to verify the integrity
4583 of the object. The response headers must contain a checksum of the
4584 requested type. If the headers lack an appropriate checksum (for
4585 instance in the case of transcoded or ranged downloads where the
4586 remote service does not know the correct checksum, including
4587 downloads where chunk_size is set) an INFO-level log will be
4588 emitted. Supported values are "md5", "crc32c", "auto" and None. The
4589 default is "auto", which will try to detect if the C extension for
4590 crc32c is installed and fall back to md5 otherwise.
4591
4592 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4593 :param retry: (Optional) How to retry the RPC. A None value will disable
4594 retries. A google.api_core.retry.Retry value will enable retries,
4595 and the object will define retriable response codes and errors and
4596 configure backoff and timeout options.
4597
4598 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
4599 Retry object and activates it only if certain conditions are met.
4600 This class exists to provide safe defaults for RPC calls that are
4601 not technically safe to retry normally (due to potential data
4602 duplication or other side-effects) but become safe to retry if a
4603 condition such as if_metageneration_match is set.
4604
4605 See the retry.py source code and docstrings in this package
4606 (google.cloud.storage.retry) for information on retry types and how
4607 to configure them.
4608
4609 :type single_shot_download: bool
4610 :param single_shot_download:
4611 (Optional) If true, download the object in a single request.
4612 Caution: Enabling this will increase the memory overload for your application.
4613 Please enable this as per your use case.
4614
4615 :type command: str
4616 :param command:
4617 (Optional) Information about which interface for download was used,
4618 to be included in the X-Goog-API-Client header. Please leave as None
4619 unless otherwise directed.
4620 """
4621 # Handle ConditionalRetryPolicy.
4622 if isinstance(retry, ConditionalRetryPolicy):
4623 # Conditional retries are designed for non-media calls, which change
4624 # arguments into query_params dictionaries. Media operations work
4625 # differently, so here we make a "fake" query_params to feed to the
4626 # ConditionalRetryPolicy.
4627 query_params = {
4628 "ifGenerationMatch": if_generation_match,
4629 "ifMetagenerationMatch": if_metageneration_match,
4630 }
4631 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
4632
4633 client = self._require_client(client)
4634
4635 download_url = self._get_download_url(
4636 client,
4637 if_generation_match=if_generation_match,
4638 if_generation_not_match=if_generation_not_match,
4639 if_metageneration_match=if_metageneration_match,
4640 if_metageneration_not_match=if_metageneration_not_match,
4641 )
4642 headers = _get_encryption_headers(self._encryption_key)
4643 headers["accept-encoding"] = "gzip"
4644 _add_etag_match_headers(
4645 headers,
4646 if_etag_match=if_etag_match,
4647 if_etag_not_match=if_etag_not_match,
4648 )
4649 # Add any client attached custom headers to be sent with the request.
4650 headers = {
4651 **_get_default_headers(client._connection.user_agent, command=command),
4652 **headers,
4653 **client._extra_headers,
4654 }
4655
4656 transport = client._http
4657
4658 try:
4659 self._do_download(
4660 transport,
4661 file_obj,
4662 download_url,
4663 headers,
4664 start,
4665 end,
4666 raw_download,
4667 timeout=timeout,
4668 checksum=checksum,
4669 retry=retry,
4670 single_shot_download=single_shot_download,
4671 )
4672 except InvalidResponse as exc:
4673 _raise_from_invalid_response(exc)
4674
4675 @property
4676 def component_count(self):
4677 """Number of underlying components that make up this object.
4678
4679 See https://cloud.google.com/storage/docs/json_api/v1/objects
4680
4681 :rtype: int or ``NoneType``
4682 :returns: The component count (in case of a composed object) or
4683 ``None`` if the blob's resource has not been loaded from
4684 the server. This property will not be set on objects
4685 not created via ``compose``.
4686 """
4687 component_count = self._properties.get("componentCount")
4688 if component_count is not None:
4689 return int(component_count)
4690
4691 @property
4692 def etag(self):
4693 """Retrieve the ETag for the object.
4694
4695 See [`RFC 2616 (etags)`](https://tools.ietf.org/html/rfc2616#section-3.11) and
4696 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4697
4698 :rtype: str or ``NoneType``
4699 :returns: The blob etag or ``None`` if the blob's resource has not
4700 been loaded from the server.
4701 """
4702 return self._properties.get("etag")
4703
4704 event_based_hold = _scalar_property("eventBasedHold")
4705 """Is an event-based hold active on the object?
4706
4707 See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4708
4709 If the property is not set locally, returns :data:`None`.
4710
4711 :rtype: bool or ``NoneType``
4712 """
4713
4714 @property
4715 def generation(self):
4716 """Retrieve the generation for the object.
4717
4718 See https://cloud.google.com/storage/docs/json_api/v1/objects
4719
4720 :rtype: int or ``NoneType``
4721 :returns: The generation of the blob or ``None`` if the blob's
4722 resource has not been loaded from the server.
4723 """
4724 generation = self._properties.get("generation")
4725 if generation is not None:
4726 return int(generation)
4727
4728 @property
4729 def id(self):
4730 """Retrieve the ID for the object.
4731
4732 See https://cloud.google.com/storage/docs/json_api/v1/objects
4733
4734 The ID consists of the bucket name, object name, and generation number.
4735
4736 :rtype: str or ``NoneType``
4737 :returns: The ID of the blob or ``None`` if the blob's
4738 resource has not been loaded from the server.
4739 """
4740 return self._properties.get("id")
4741
4742 md5_hash = _scalar_property("md5Hash")
4743 """MD5 hash for this object.
4744
4745 This returns the blob's MD5 hash. To retrieve the value, first use a
4746 reload method of the Blob class which loads the blob's properties from the server.
4747
4748 See [`RFC 1321`](https://tools.ietf.org/html/rfc1321) and
4749 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4750
4751 If not set before upload, the server will compute the hash.
4752
4753 :rtype: str or ``NoneType``
4754 """
4755
4756 @property
4757 def media_link(self):
4758 """Retrieve the media download URI for the object.
4759
4760 See https://cloud.google.com/storage/docs/json_api/v1/objects
4761
4762 :rtype: str or ``NoneType``
4763 :returns: The media link for the blob or ``None`` if the blob's
4764 resource has not been loaded from the server.
4765 """
4766 return self._properties.get("mediaLink")
4767
4768 @property
4769 def metadata(self):
4770 """Retrieve arbitrary/application specific metadata for the object.
4771
4772 See https://cloud.google.com/storage/docs/json_api/v1/objects
4773
4774 :setter: Update arbitrary/application specific metadata for the
4775 object.
4776 :getter: Retrieve arbitrary/application specific metadata for
4777 the object.
4778
4779 :rtype: dict or ``NoneType``
4780 :returns: The metadata associated with the blob or ``None`` if the
4781 property is not set.
4782 """
4783 return copy.deepcopy(self._properties.get("metadata"))
4784
4785 @metadata.setter
4786 def metadata(self, value):
4787 """Update arbitrary/application specific metadata for the object.
4788
4789 Values are stored to GCS as strings. To delete a key, set its value to
4790 None and call blob.patch().
4791
4792 See https://cloud.google.com/storage/docs/json_api/v1/objects
4793
4794 :type value: dict
4795 :param value: The blob metadata to set.
4796 """
4797 if value is not None:
4798 value = {k: str(v) if v is not None else None for k, v in value.items()}
4799 self._patch_property("metadata", value)
4800
4801 @property
4802 def metageneration(self):
4803 """Retrieve the metageneration for the object.
4804
4805 See https://cloud.google.com/storage/docs/json_api/v1/objects
4806
4807 :rtype: int or ``NoneType``
4808 :returns: The metageneration of the blob or ``None`` if the blob's
4809 resource has not been loaded from the server.
4810 """
4811 metageneration = self._properties.get("metageneration")
4812 if metageneration is not None:
4813 return int(metageneration)
4814
4815 @property
4816 def owner(self):
4817 """Retrieve info about the owner of the object.
4818
4819 See https://cloud.google.com/storage/docs/json_api/v1/objects
4820
4821 :rtype: dict or ``NoneType``
4822 :returns: Mapping of owner's role/ID, or ``None`` if the blob's
4823 resource has not been loaded from the server.
4824 """
4825 return copy.deepcopy(self._properties.get("owner"))
4826
4827 @property
4828 def retention_expiration_time(self):
4829 """Retrieve timestamp at which the object's retention period expires.
4830
4831 See https://cloud.google.com/storage/docs/json_api/v1/objects
4832
4833 :rtype: :class:`datetime.datetime` or ``NoneType``
4834 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4835 ``None`` if the property is not set locally.
4836 """
4837 value = self._properties.get("retentionExpirationTime")
4838 if value is not None:
4839 return _rfc3339_nanos_to_datetime(value)
4840
4841 @property
4842 def self_link(self):
4843 """Retrieve the URI for the object.
4844
4845 See https://cloud.google.com/storage/docs/json_api/v1/objects
4846
4847 :rtype: str or ``NoneType``
4848 :returns: The self link for the blob or ``None`` if the blob's
4849 resource has not been loaded from the server.
4850 """
4851 return self._properties.get("selfLink")
4852
4853 @property
4854 def size(self):
4855 """Size of the object, in bytes.
4856
4857 See https://cloud.google.com/storage/docs/json_api/v1/objects
4858
4859 :rtype: int or ``NoneType``
4860 :returns: The size of the blob or ``None`` if the blob's
4861 resource has not been loaded from the server.
4862 """
4863 size = self._properties.get("size")
4864 if size is not None:
4865 return int(size)
4866
4867 @property
4868 def kms_key_name(self):
4869 """Resource name of Cloud KMS key used to encrypt the blob's contents.
4870
4871 :rtype: str or ``NoneType``
4872 :returns:
4873 The resource name or ``None`` if no Cloud KMS key was used,
4874 or the blob's resource has not been loaded from the server.
4875 """
4876 return self._properties.get("kmsKeyName")
4877
4878 @kms_key_name.setter
4879 def kms_key_name(self, value):
4880 """Set KMS encryption key for object.
4881
4882 :type value: str or ``NoneType``
4883 :param value: new KMS key name (None to clear any existing key).
4884 """
4885 self._patch_property("kmsKeyName", value)
4886
4887 storage_class = _scalar_property("storageClass")
4888 """Retrieve the storage class for the object.
4889
4890 This can only be set at blob / object **creation** time. If you'd
4891 like to change the storage class **after** the blob / object already
4892 exists in a bucket, call :meth:`update_storage_class` (which uses
4893 :meth:`rewrite`).
4894
4895 See https://cloud.google.com/storage/docs/storage-classes
4896
4897 :rtype: str or ``NoneType``
4898 :returns:
4899 If set, one of
4900 :attr:`~google.cloud.storage.constants.STANDARD_STORAGE_CLASS`,
4901 :attr:`~google.cloud.storage.constants.NEARLINE_STORAGE_CLASS`,
4902 :attr:`~google.cloud.storage.constants.COLDLINE_STORAGE_CLASS`,
4903 :attr:`~google.cloud.storage.constants.ARCHIVE_STORAGE_CLASS`,
4904 :attr:`~google.cloud.storage.constants.MULTI_REGIONAL_LEGACY_STORAGE_CLASS`,
4905 :attr:`~google.cloud.storage.constants.REGIONAL_LEGACY_STORAGE_CLASS`,
4906 :attr:`~google.cloud.storage.constants.DURABLE_REDUCED_AVAILABILITY_STORAGE_CLASS`,
4907 else ``None``.
4908 """
4909
4910 temporary_hold = _scalar_property("temporaryHold")
4911 """Is a temporary hold active on the object?
4912
4913 See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4914
4915 If the property is not set locally, returns :data:`None`.
4916
4917 :rtype: bool or ``NoneType``
4918 """
4919
4920 @property
4921 def time_deleted(self):
4922 """Retrieve the timestamp at which the object was deleted.
4923
4924 See https://cloud.google.com/storage/docs/json_api/v1/objects
4925
4926 :rtype: :class:`datetime.datetime` or ``NoneType``
4927 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4928 ``None`` if the blob's resource has not been loaded from
4929 the server (see :meth:`reload`). If the blob has
4930 not been deleted, this will never be set.
4931 """
4932 value = self._properties.get("timeDeleted")
4933 if value is not None:
4934 return _rfc3339_nanos_to_datetime(value)
4935
4936 @property
4937 def time_created(self):
4938 """Retrieve the timestamp at which the object was created.
4939
4940 See https://cloud.google.com/storage/docs/json_api/v1/objects
4941
4942 :rtype: :class:`datetime.datetime` or ``NoneType``
4943 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4944 ``None`` if the blob's resource has not been loaded from
4945 the server (see :meth:`reload`).
4946 """
4947 value = self._properties.get("timeCreated")
4948 if value is not None:
4949 return _rfc3339_nanos_to_datetime(value)
4950
4951 @property
4952 def updated(self):
4953 """Retrieve the timestamp at which the object was updated.
4954
4955 See https://cloud.google.com/storage/docs/json_api/v1/objects
4956
4957 :rtype: :class:`datetime.datetime` or ``NoneType``
4958 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4959 ``None`` if the blob's resource has not been loaded from
4960 the server (see :meth:`reload`).
4961 """
4962 value = self._properties.get("updated")
4963 if value is not None:
4964 return _rfc3339_nanos_to_datetime(value)
4965
4966 @property
4967 def custom_time(self):
4968 """Retrieve the custom time for the object.
4969
4970 See https://cloud.google.com/storage/docs/json_api/v1/objects
4971
4972 :rtype: :class:`datetime.datetime` or ``NoneType``
4973 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4974 ``None`` if the blob's resource has not been loaded from
4975 the server (see :meth:`reload`).
4976 """
4977 value = self._properties.get("customTime")
4978 if value is not None:
4979 return _rfc3339_nanos_to_datetime(value)
4980
4981 @custom_time.setter
4982 def custom_time(self, value):
4983 """Set the custom time for the object.
4984
4985 Once set on the server side object, this value can't be unset, but may
4986 only changed to a custom datetime in the future.
4987
4988 If :attr:`custom_time` must be unset, either perform a rewrite
4989 operation or upload the data again.
4990
4991 See https://cloud.google.com/storage/docs/json_api/v1/objects
4992
4993 :type value: :class:`datetime.datetime`
4994 :param value: new value
4995 """
4996 if value is not None:
4997 value = _datetime_to_rfc3339(value)
4998
4999 self._patch_property("customTime", value)
5000
5001 @property
5002 def retention(self):
5003 """Retrieve the retention configuration for this object.
5004
5005 :rtype: :class:`Retention`
5006 :returns: an instance for managing the object's retention configuration.
5007 """
5008 info = self._properties.get("retention", {})
5009 return Retention.from_api_repr(info, self)
5010
5011 @property
5012 def soft_delete_time(self):
5013 """If this object has been soft-deleted, returns the time at which it became soft-deleted.
5014
5015 :rtype: :class:`datetime.datetime` or ``NoneType``
5016 :returns:
5017 (readonly) The time that the object became soft-deleted.
5018 Note this property is only set for soft-deleted objects.
5019 """
5020 soft_delete_time = self._properties.get("softDeleteTime")
5021 if soft_delete_time is not None:
5022 return _rfc3339_nanos_to_datetime(soft_delete_time)
5023
5024 @property
5025 def hard_delete_time(self):
5026 """If this object has been soft-deleted, returns the time at which it will be permanently deleted.
5027
5028 :rtype: :class:`datetime.datetime` or ``NoneType``
5029 :returns:
5030 (readonly) The time that the object will be permanently deleted.
5031 Note this property is only set for soft-deleted objects.
5032 """
5033 hard_delete_time = self._properties.get("hardDeleteTime")
5034 if hard_delete_time is not None:
5035 return _rfc3339_nanos_to_datetime(hard_delete_time)
5036
5037
5038def _get_host_name(connection):
5039 """Returns the host name from the given connection.
5040
5041 :type connection: :class:`~google.cloud.storage._http.Connection`
5042 :param connection: The connection object.
5043
5044 :rtype: str
5045 :returns: The host name.
5046 """
5047 # TODO: After google-cloud-core 1.6.0 is stable and we upgrade it
5048 # to 1.6.0 in setup.py, we no longer need to check the attribute
5049 # existence. We can simply return connection.get_api_base_url_for_mtls().
5050 return (
5051 connection.API_BASE_URL
5052 if not hasattr(connection, "get_api_base_url_for_mtls")
5053 else connection.get_api_base_url_for_mtls()
5054 )
5055
5056
5057def _get_encryption_headers(key, source=False):
5058 """Builds customer encryption key headers
5059
5060 :type key: bytes
5061 :param key: 32 byte key to build request key and hash.
5062
5063 :type source: bool
5064 :param source: If true, return headers for the "source" blob; otherwise,
5065 return headers for the "destination" blob.
5066
5067 :rtype: dict
5068 :returns: dict of HTTP headers being sent in request.
5069 """
5070 if key is None:
5071 return {}
5072
5073 key = _to_bytes(key)
5074 key_hash = hashlib.sha256(key).digest()
5075 key_hash = base64.b64encode(key_hash)
5076 key = base64.b64encode(key)
5077
5078 if source:
5079 prefix = "X-Goog-Copy-Source-Encryption-"
5080 else:
5081 prefix = "X-Goog-Encryption-"
5082
5083 return {
5084 prefix + "Algorithm": "AES256",
5085 prefix + "Key": _bytes_to_unicode(key),
5086 prefix + "Key-Sha256": _bytes_to_unicode(key_hash),
5087 }
5088
5089
5090def _quote(value, safe=b"~"):
5091 """URL-quote a string.
5092
5093 If the value is unicode, this method first UTF-8 encodes it as bytes and
5094 then quotes the bytes. (In Python 3, ``urllib.parse.quote`` does this
5095 encoding automatically, but in Python 2, non-ASCII characters cannot be
5096 quoted.)
5097
5098 :type value: str or bytes
5099 :param value: The value to be URL-quoted.
5100
5101 :type safe: bytes
5102 :param safe: Bytes *not* to be quoted. By default, includes only ``b'~'``.
5103
5104 :rtype: str
5105 :returns: The encoded value (bytes in Python 2, unicode in Python 3).
5106 """
5107 value = _to_bytes(value, encoding="utf-8")
5108 return quote(value, safe=safe)
5109
5110
5111def _maybe_rewind(stream, rewind=False):
5112 """Rewind the stream if desired.
5113
5114 :type stream: IO[bytes]
5115 :param stream: A bytes IO object open for reading.
5116
5117 :type rewind: bool
5118 :param rewind: Indicates if we should seek to the beginning of the stream.
5119 """
5120 if rewind:
5121 stream.seek(0, os.SEEK_SET)
5122
5123
5124def _raise_from_invalid_response(error):
5125 """Re-wrap and raise an ``InvalidResponse`` exception.
5126
5127 :type error: :exc:`google.cloud.storage.exceptions.InvalidResponse`
5128 :param error: A caught exception from the ``google-resumable-media``
5129 library.
5130
5131 :raises: :class:`~google.cloud.exceptions.GoogleCloudError` corresponding
5132 to the failed status code
5133 """
5134 response = error.response
5135
5136 # The 'response.text' gives the actual reason of error, where 'error' gives
5137 # the message of expected status code.
5138 if response.text:
5139 error_message = response.text + ": " + str(error)
5140 else:
5141 error_message = str(error)
5142
5143 message = f"{response.request.method} {response.request.url}: {error_message}"
5144
5145 raise exceptions.from_http_status(response.status_code, message, response=response)
5146
5147
5148def _add_query_parameters(base_url, name_value_pairs):
5149 """Add one query parameter to a base URL.
5150
5151 :type base_url: string
5152 :param base_url: Base URL (may already contain query parameters)
5153
5154 :type name_value_pairs: list of (string, string) tuples.
5155 :param name_value_pairs: Names and values of the query parameters to add
5156
5157 :rtype: string
5158 :returns: URL with additional query strings appended.
5159 """
5160 if len(name_value_pairs) == 0:
5161 return base_url
5162
5163 scheme, netloc, path, query, frag = urlsplit(base_url)
5164 query = parse_qsl(query)
5165 query.extend(name_value_pairs)
5166 return urlunsplit((scheme, netloc, path, urlencode(query), frag))
5167
5168
5169class Retention(dict):
5170 """Map an object's retention configuration.
5171
5172 :type blob: :class:`Blob`
5173 :params blob: blob for which this retention configuration applies to.
5174
5175 :type mode: str or ``NoneType``
5176 :params mode:
5177 (Optional) The mode of the retention configuration, which can be either Unlocked or Locked.
5178 See: https://cloud.google.com/storage/docs/object-lock
5179
5180 :type retain_until_time: :class:`datetime.datetime` or ``NoneType``
5181 :params retain_until_time:
5182 (Optional) The earliest time that the object can be deleted or replaced, which is the
5183 retention configuration set for this object.
5184
5185 :type retention_expiration_time: :class:`datetime.datetime` or ``NoneType``
5186 :params retention_expiration_time:
5187 (Optional) The earliest time that the object can be deleted, which depends on any
5188 retention configuration set for the object and any retention policy set for the bucket
5189 that contains the object. This value should normally only be set by the back-end API.
5190 """
5191
5192 def __init__(
5193 self,
5194 blob,
5195 mode=None,
5196 retain_until_time=None,
5197 retention_expiration_time=None,
5198 ):
5199 data = {"mode": mode}
5200 if retain_until_time is not None:
5201 retain_until_time = _datetime_to_rfc3339(retain_until_time)
5202 data["retainUntilTime"] = retain_until_time
5203
5204 if retention_expiration_time is not None:
5205 retention_expiration_time = _datetime_to_rfc3339(retention_expiration_time)
5206 data["retentionExpirationTime"] = retention_expiration_time
5207
5208 super(Retention, self).__init__(data)
5209 self._blob = blob
5210
5211 @classmethod
5212 def from_api_repr(cls, resource, blob):
5213 """Factory: construct instance from resource.
5214
5215 :type blob: :class:`Blob`
5216 :params blob: Blob for which this retention configuration applies to.
5217
5218 :type resource: dict
5219 :param resource: mapping as returned from API call.
5220
5221 :rtype: :class:`Retention`
5222 :returns: Retention configuration created from resource.
5223 """
5224 instance = cls(blob)
5225 instance.update(resource)
5226 return instance
5227
5228 @property
5229 def blob(self):
5230 """Blob for which this retention configuration applies to.
5231
5232 :rtype: :class:`Blob`
5233 :returns: the instance's blob.
5234 """
5235 return self._blob
5236
5237 @property
5238 def mode(self):
5239 """The mode of the retention configuration. Options are 'Unlocked' or 'Locked'.
5240
5241 :rtype: string
5242 :returns: The mode of the retention configuration, which can be either set to 'Unlocked' or 'Locked'.
5243 """
5244 return self.get("mode")
5245
5246 @mode.setter
5247 def mode(self, value):
5248 self["mode"] = value
5249 self.blob._patch_property("retention", self)
5250
5251 @property
5252 def retain_until_time(self):
5253 """The earliest time that the object can be deleted or replaced, which is the
5254 retention configuration set for this object.
5255
5256 :rtype: :class:`datetime.datetime` or ``NoneType``
5257 :returns: Datetime object parsed from RFC3339 valid timestamp, or
5258 ``None`` if the blob's resource has not been loaded from
5259 the server (see :meth:`reload`).
5260 """
5261 value = self.get("retainUntilTime")
5262 if value is not None:
5263 return _rfc3339_nanos_to_datetime(value)
5264
5265 @retain_until_time.setter
5266 def retain_until_time(self, value):
5267 """Set the retain_until_time for the object retention configuration.
5268
5269 :type value: :class:`datetime.datetime`
5270 :param value: The earliest time that the object can be deleted or replaced.
5271 """
5272 if value is not None:
5273 value = _datetime_to_rfc3339(value)
5274 self["retainUntilTime"] = value
5275 self.blob._patch_property("retention", self)
5276
5277 @property
5278 def retention_expiration_time(self):
5279 """The earliest time that the object can be deleted, which depends on any
5280 retention configuration set for the object and any retention policy set for
5281 the bucket that contains the object.
5282
5283 :rtype: :class:`datetime.datetime` or ``NoneType``
5284 :returns:
5285 (readonly) The earliest time that the object can be deleted.
5286 """
5287 retention_expiration_time = self.get("retentionExpirationTime")
5288 if retention_expiration_time is not None:
5289 return _rfc3339_nanos_to_datetime(retention_expiration_time)