1# Copyright 2014 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15# pylint: disable=too-many-lines
16
17"""Create / interact with Google Cloud Storage blobs."""
18
19import base64
20import copy
21import hashlib
22from io import BytesIO
23from io import TextIOWrapper
24import logging
25import mimetypes
26import os
27import re
28from email.parser import HeaderParser
29from urllib.parse import parse_qsl
30from urllib.parse import quote
31from urllib.parse import urlencode
32from urllib.parse import urlsplit
33from urllib.parse import urlunsplit
34import warnings
35
36from google.cloud.storage._media.requests import ChunkedDownload
37from google.cloud.storage._media.requests import Download
38from google.cloud.storage._media.requests import RawDownload
39from google.cloud.storage._media.requests import RawChunkedDownload
40from google.cloud.storage._media.requests import MultipartUpload
41from google.cloud.storage._media.requests import ResumableUpload
42
43from google.api_core.iam import Policy
44from google.cloud import exceptions
45from google.cloud._helpers import _bytes_to_unicode
46from google.cloud._helpers import _datetime_to_rfc3339
47from google.cloud._helpers import _rfc3339_nanos_to_datetime
48from google.cloud._helpers import _to_bytes
49from google.cloud.exceptions import NotFound
50from google.cloud.storage._helpers import _add_etag_match_headers
51from google.cloud.storage._helpers import _add_generation_match_parameters
52from google.cloud.storage._helpers import _PropertyMixin
53from google.cloud.storage._helpers import _scalar_property
54from google.cloud.storage._helpers import _bucket_bound_hostname_url
55from google.cloud.storage._helpers import _raise_if_more_than_one_set
56from google.cloud.storage._helpers import _get_default_headers
57from google.cloud.storage._helpers import _get_default_storage_base_url
58from google.cloud.storage._signing import generate_signed_url_v2
59from google.cloud.storage._signing import generate_signed_url_v4
60from google.cloud.storage._helpers import _API_VERSION
61from google.cloud.storage._helpers import _virtual_hosted_style_base_url
62from google.cloud.storage._opentelemetry_tracing import create_trace_span
63from google.cloud.storage.acl import ACL
64from google.cloud.storage.acl import ObjectACL
65from google.cloud.storage.constants import _DEFAULT_TIMEOUT
66from google.cloud.storage.constants import ARCHIVE_STORAGE_CLASS
67from google.cloud.storage.constants import COLDLINE_STORAGE_CLASS
68from google.cloud.storage.constants import MULTI_REGIONAL_LEGACY_STORAGE_CLASS
69from google.cloud.storage.constants import NEARLINE_STORAGE_CLASS
70from google.cloud.storage.constants import REGIONAL_LEGACY_STORAGE_CLASS
71from google.cloud.storage.constants import STANDARD_STORAGE_CLASS
72from google.cloud.storage.exceptions import DataCorruption
73from google.cloud.storage.exceptions import InvalidResponse
74from google.cloud.storage.retry import ConditionalRetryPolicy
75from google.cloud.storage.retry import DEFAULT_RETRY
76from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON
77from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED
78from google.cloud.storage.fileio import BlobReader
79from google.cloud.storage.fileio import BlobWriter
80
81
82_DEFAULT_CONTENT_TYPE = "application/octet-stream"
83_DOWNLOAD_URL_TEMPLATE = "{hostname}/download/storage/{api_version}{path}?alt=media"
84_BASE_UPLOAD_TEMPLATE = (
85 "{hostname}/upload/storage/{api_version}{bucket_path}/o?uploadType="
86)
87_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart"
88_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable"
89# NOTE: "acl" is also writeable but we defer ACL management to
90# the classes in the google.cloud.storage.acl module.
91_CONTENT_TYPE_FIELD = "contentType"
92_WRITABLE_FIELDS = (
93 "cacheControl",
94 "contentDisposition",
95 "contentEncoding",
96 "contentLanguage",
97 _CONTENT_TYPE_FIELD,
98 "crc32c",
99 "customTime",
100 "md5Hash",
101 "metadata",
102 "name",
103 "retention",
104 "storageClass",
105)
106_READ_LESS_THAN_SIZE = (
107 "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining."
108)
109_CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE = (
110 "A checksum of type `{}` was requested, but checksumming is not available "
111 "for downloads when chunk_size is set."
112)
113_COMPOSE_IF_GENERATION_LIST_DEPRECATED = (
114 "'if_generation_match: type list' is deprecated and supported for "
115 "backwards-compatability reasons only. Use 'if_source_generation_match' "
116 "instead' to match source objects' generations."
117)
118_COMPOSE_IF_GENERATION_LIST_AND_IF_SOURCE_GENERATION_ERROR = (
119 "Use 'if_generation_match' to match the generation of the destination "
120 "object by passing in a generation number, instead of a list. "
121 "Use 'if_source_generation_match' to match source objects generations."
122)
123_COMPOSE_IF_METAGENERATION_LIST_DEPRECATED = (
124 "'if_metageneration_match: type list' is deprecated and supported for "
125 "backwards-compatability reasons only. Note that the metageneration to "
126 "be matched is that of the destination blob. Please pass in a single "
127 "value (type long)."
128)
129_COMPOSE_IF_SOURCE_GENERATION_MISMATCH_ERROR = (
130 "'if_source_generation_match' length must be the same as 'sources' length"
131)
132_DOWNLOAD_AS_STRING_DEPRECATED = (
133 "Blob.download_as_string() is deprecated and will be removed in future. "
134 "Use Blob.download_as_bytes() instead."
135)
136_FROM_STRING_DEPRECATED = (
137 "Blob.from_string() is deprecated and will be removed in future. "
138 "Use Blob.from_uri() instead."
139)
140_GS_URL_REGEX_PATTERN = re.compile(
141 r"(?P<scheme>gs)://(?P<bucket_name>[a-z0-9_.-]+)/(?P<object_name>.+)"
142)
143
144_DEFAULT_CHUNKSIZE = 104857600 # 1024 * 1024 B * 100 = 100 MiB
145_MAX_MULTIPART_SIZE = 8388608 # 8 MiB
146
147_logger = logging.getLogger(__name__)
148
149
150class Blob(_PropertyMixin):
151 """A wrapper around Cloud Storage's concept of an ``Object``.
152
153 :type name: str
154 :param name: The name of the blob. This corresponds to the unique path of
155 the object in the bucket. If bytes, will be converted to a
156 unicode object. Blob / object names can contain any sequence
157 of valid unicode characters, of length 1-1024 bytes when
158 UTF-8 encoded.
159
160 :type bucket: :class:`google.cloud.storage.bucket.Bucket`
161 :param bucket: The bucket to which this blob belongs.
162
163 :type chunk_size: int
164 :param chunk_size:
165 (Optional) The size of a chunk of data whenever iterating (in bytes).
166 This must be a multiple of 256 KB per the API specification. If not
167 specified, the chunk_size of the blob itself is used. If that is not
168 specified, a default value of 40 MB is used.
169
170 :type encryption_key: bytes
171 :param encryption_key:
172 (Optional) 32 byte encryption key for customer-supplied encryption.
173 See https://cloud.google.com/storage/docs/encryption#customer-supplied.
174
175 :type kms_key_name: str
176 :param kms_key_name:
177 (Optional) Resource name of Cloud KMS key used to encrypt the blob's
178 contents.
179
180 :type generation: long
181 :param generation:
182 (Optional) If present, selects a specific revision of this object.
183 """
184
185 _chunk_size = None # Default value for each instance.
186 _CHUNK_SIZE_MULTIPLE = 256 * 1024
187 """Number (256 KB, in bytes) that must divide the chunk size."""
188
189 STORAGE_CLASSES = (
190 STANDARD_STORAGE_CLASS,
191 NEARLINE_STORAGE_CLASS,
192 COLDLINE_STORAGE_CLASS,
193 ARCHIVE_STORAGE_CLASS,
194 MULTI_REGIONAL_LEGACY_STORAGE_CLASS,
195 REGIONAL_LEGACY_STORAGE_CLASS,
196 )
197 """Allowed values for :attr:`storage_class`.
198
199 See
200 https://cloud.google.com/storage/docs/json_api/v1/objects#storageClass
201 https://cloud.google.com/storage/docs/per-object-storage-class
202
203 .. note::
204 This list does not include 'DURABLE_REDUCED_AVAILABILITY', which
205 is only documented for buckets (and deprecated).
206 """
207
208 def __init__(
209 self,
210 name,
211 bucket,
212 chunk_size=None,
213 encryption_key=None,
214 kms_key_name=None,
215 generation=None,
216 ):
217 """
218 property :attr:`name`
219 Get the blob's name.
220 """
221 name = _bytes_to_unicode(name)
222 super(Blob, self).__init__(name=name)
223
224 self.chunk_size = chunk_size # Check that setter accepts value.
225 self._bucket = bucket
226 self._acl = ObjectACL(self)
227 _raise_if_more_than_one_set(
228 encryption_key=encryption_key, kms_key_name=kms_key_name
229 )
230
231 self._encryption_key = encryption_key
232
233 if kms_key_name is not None:
234 self._properties["kmsKeyName"] = kms_key_name
235
236 if generation is not None:
237 self._properties["generation"] = generation
238
239 @property
240 def bucket(self):
241 """Bucket which contains the object.
242
243 :rtype: :class:`~google.cloud.storage.bucket.Bucket`
244 :returns: The object's bucket.
245 """
246 return self._bucket
247
248 @property
249 def chunk_size(self):
250 """Get the blob's default chunk size.
251
252 :rtype: int or ``NoneType``
253 :returns: The current blob's chunk size, if it is set.
254 """
255 return self._chunk_size
256
257 @chunk_size.setter
258 def chunk_size(self, value):
259 """Set the blob's default chunk size.
260
261 :type value: int
262 :param value: (Optional) The current blob's chunk size, if it is set.
263
264 :raises: :class:`ValueError` if ``value`` is not ``None`` and is not a
265 multiple of 256 KB.
266 """
267 if value is not None and value > 0 and value % self._CHUNK_SIZE_MULTIPLE != 0:
268 raise ValueError(
269 "Chunk size must be a multiple of %d." % (self._CHUNK_SIZE_MULTIPLE,)
270 )
271 self._chunk_size = value
272
273 @property
274 def encryption_key(self):
275 """Retrieve the customer-supplied encryption key for the object.
276
277 :rtype: bytes or ``NoneType``
278 :returns:
279 The encryption key or ``None`` if no customer-supplied encryption key was used,
280 or the blob's resource has not been loaded from the server.
281 """
282 return self._encryption_key
283
284 @encryption_key.setter
285 def encryption_key(self, value):
286 """Set the blob's encryption key.
287
288 See https://cloud.google.com/storage/docs/encryption#customer-supplied
289
290 To perform a key rotation for an encrypted blob, use :meth:`rewrite`.
291 See https://cloud.google.com/storage/docs/encryption/using-customer-supplied-keys?hl=ca#rotating
292
293 :type value: bytes
294 :param value: 32 byte encryption key for customer-supplied encryption.
295 """
296 self._encryption_key = value
297
298 @staticmethod
299 def path_helper(bucket_path, blob_name):
300 """Relative URL path for a blob.
301
302 :type bucket_path: str
303 :param bucket_path: The URL path for a bucket.
304
305 :type blob_name: str
306 :param blob_name: The name of the blob.
307
308 :rtype: str
309 :returns: The relative URL path for ``blob_name``.
310 """
311 return bucket_path + "/o/" + _quote(blob_name)
312
313 @property
314 def acl(self):
315 """Create our ACL on demand."""
316 return self._acl
317
318 def __repr__(self):
319 if self.bucket:
320 bucket_name = self.bucket.name
321 else:
322 bucket_name = None
323
324 return f"<Blob: {bucket_name}, {self.name}, {self.generation}>"
325
326 @property
327 def path(self):
328 """Getter property for the URL path to this Blob.
329
330 :rtype: str
331 :returns: The URL path to this Blob.
332 """
333 if not self.name:
334 raise ValueError("Cannot determine path without a blob name.")
335
336 return self.path_helper(self.bucket.path, self.name)
337
338 @property
339 def client(self):
340 """The client bound to this blob."""
341 return self.bucket.client
342
343 @property
344 def user_project(self):
345 """Project ID billed for API requests made via this blob.
346
347 Derived from bucket's value.
348
349 :rtype: str
350 """
351 return self.bucket.user_project
352
353 def _encryption_headers(self):
354 """Return any encryption headers needed to fetch the object.
355
356 :rtype: List(Tuple(str, str))
357 :returns: a list of tuples to be passed as headers.
358 """
359 return _get_encryption_headers(self._encryption_key)
360
361 @property
362 def _query_params(self):
363 """Default query parameters."""
364 params = {}
365 if self.generation is not None:
366 params["generation"] = self.generation
367 if self.user_project is not None:
368 params["userProject"] = self.user_project
369 return params
370
371 @property
372 def public_url(self):
373 """The public URL for this blob.
374
375 Use :meth:`make_public` to enable anonymous access via the returned
376 URL.
377
378 :rtype: `string`
379 :returns: The public URL for this blob.
380 """
381 if self.client:
382 endpoint = self.client.api_endpoint
383 else:
384 endpoint = _get_default_storage_base_url()
385 return "{storage_base_url}/{bucket_name}/{quoted_name}".format(
386 storage_base_url=endpoint,
387 bucket_name=self.bucket.name,
388 quoted_name=_quote(self.name, safe=b"/~"),
389 )
390
391 @classmethod
392 def from_uri(cls, uri, client=None):
393 """Get a constructor for blob object by URI.
394
395 .. code-block:: python
396
397 from google.cloud import storage
398 from google.cloud.storage.blob import Blob
399 client = storage.Client()
400 blob = Blob.from_uri("gs://bucket/object", client=client)
401
402 :type uri: str
403 :param uri: The blob uri following a gs://bucket/object pattern.
404 Both a bucket and object name is required to construct a blob object.
405
406 :type client: :class:`~google.cloud.storage.client.Client`
407 :param client:
408 (Optional) The client to use. Application code should
409 *always* pass ``client``.
410
411 :rtype: :class:`google.cloud.storage.blob.Blob`
412 :returns: The blob object created.
413 """
414 from google.cloud.storage.bucket import Bucket
415
416 match = _GS_URL_REGEX_PATTERN.match(uri)
417 if not match:
418 raise ValueError("URI pattern must be gs://bucket/object")
419 bucket = Bucket(client, name=match.group("bucket_name"))
420 return cls(match.group("object_name"), bucket)
421
422 @classmethod
423 def from_string(cls, uri, client=None):
424 """(Deprecated) Get a constructor for blob object by URI.
425
426 .. note::
427 Deprecated alias for :meth:`from_uri`.
428
429 .. code-block:: python
430
431 from google.cloud import storage
432 from google.cloud.storage.blob import Blob
433 client = storage.Client()
434 blob = Blob.from_string("gs://bucket/object", client=client)
435
436 :type uri: str
437 :param uri: The blob uri following a gs://bucket/object pattern.
438 Both a bucket and object name is required to construct a blob object.
439
440 :type client: :class:`~google.cloud.storage.client.Client`
441 :param client:
442 (Optional) The client to use. Application code should
443 *always* pass ``client``.
444
445 :rtype: :class:`google.cloud.storage.blob.Blob`
446 :returns: The blob object created.
447 """
448 warnings.warn(_FROM_STRING_DEPRECATED, PendingDeprecationWarning, stacklevel=2)
449 return Blob.from_uri(uri=uri, client=client)
450
451 def generate_signed_url(
452 self,
453 expiration=None,
454 api_access_endpoint=None,
455 method="GET",
456 content_md5=None,
457 content_type=None,
458 response_disposition=None,
459 response_type=None,
460 generation=None,
461 headers=None,
462 query_parameters=None,
463 client=None,
464 credentials=None,
465 version=None,
466 service_account_email=None,
467 access_token=None,
468 virtual_hosted_style=False,
469 bucket_bound_hostname=None,
470 scheme="http",
471 ):
472 """Generates a signed URL for this blob.
473
474 .. note::
475
476 If you are on Google Compute Engine, you can't generate a signed
477 URL using GCE service account.
478 If you'd like to be able to generate a signed URL from GCE,
479 you can use a standard service account from a JSON file rather
480 than a GCE service account.
481
482 If you have a blob that you want to allow access to for a set
483 amount of time, you can use this method to generate a URL that
484 is only valid within a certain time period.
485
486 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-generate-signed-url-v4#storage_generate_signed_url_v4-python).
487
488 This is particularly useful if you don't want publicly
489 accessible blobs, but don't want to require users to explicitly
490 log in.
491
492 If ``bucket_bound_hostname`` is set as an argument of :attr:`api_access_endpoint`,
493 ``https`` works only if using a ``CDN``.
494
495 :type expiration: Union[Integer, datetime.datetime, datetime.timedelta]
496 :param expiration:
497 Point in time when the signed URL should expire. If a ``datetime``
498 instance is passed without an explicit ``tzinfo`` set, it will be
499 assumed to be ``UTC``.
500
501 :type api_access_endpoint: str
502 :param api_access_endpoint: (Optional) URI base, for instance
503 "https://storage.googleapis.com". If not specified, the client's
504 api_endpoint will be used. Incompatible with bucket_bound_hostname.
505
506 :type method: str
507 :param method: The HTTP verb that will be used when requesting the URL.
508
509 :type content_md5: str
510 :param content_md5:
511 (Optional) The MD5 hash of the object referenced by ``resource``.
512
513 :type content_type: str
514 :param content_type:
515 (Optional) The content type of the object referenced by
516 ``resource``.
517
518 :type response_disposition: str
519 :param response_disposition:
520 (Optional) Content disposition of responses to requests for the
521 signed URL. For example, to enable the signed URL to initiate a
522 file of ``blog.png``, use the value ``'attachment;
523 filename=blob.png'``.
524
525 :type response_type: str
526 :param response_type:
527 (Optional) Content type of responses to requests for the signed
528 URL. Ignored if content_type is set on object/blob metadata.
529
530 :type generation: str
531 :param generation:
532 (Optional) A value that indicates which generation of the resource
533 to fetch.
534
535 :type headers: dict
536 :param headers:
537 (Optional) Additional HTTP headers to be included as part of the
538 signed URLs. See:
539 https://cloud.google.com/storage/docs/xml-api/reference-headers
540 Requests using the signed URL *must* pass the specified header
541 (name and value) with each request for the URL.
542
543 :type query_parameters: dict
544 :param query_parameters:
545 (Optional) Additional query parameters to be included as part of the
546 signed URLs. See:
547 https://cloud.google.com/storage/docs/xml-api/reference-headers#query
548
549 :type client: :class:`~google.cloud.storage.client.Client`
550 :param client:
551 (Optional) The client to use. If not passed, falls back to the
552 ``client`` stored on the blob's bucket.
553
554 :type credentials: :class:`google.auth.credentials.Credentials`
555 :param credentials:
556 (Optional) The authorization credentials to attach to requests.
557 These credentials identify this application to the service. If
558 none are specified, the client will attempt to ascertain the
559 credentials from the environment.
560
561 :type version: str
562 :param version:
563 (Optional) The version of signed credential to create. Must be one
564 of 'v2' | 'v4'.
565
566 :type service_account_email: str
567 :param service_account_email:
568 (Optional) E-mail address of the service account.
569
570 :type access_token: str
571 :param access_token: (Optional) Access token for a service account.
572
573 :type virtual_hosted_style: bool
574 :param virtual_hosted_style:
575 (Optional) If true, then construct the URL relative the bucket's
576 virtual hostname, e.g., '<bucket-name>.storage.googleapis.com'.
577 Incompatible with bucket_bound_hostname.
578
579 :type bucket_bound_hostname: str
580 :param bucket_bound_hostname:
581 (Optional) If passed, then construct the URL relative to the bucket-bound hostname.
582 Value can be a bare or with scheme, e.g., 'example.com' or 'http://example.com'.
583 Incompatible with api_access_endpoint and virtual_hosted_style.
584 See: https://cloud.google.com/storage/docs/request-endpoints#cname
585
586 :type scheme: str
587 :param scheme:
588 (Optional) If ``bucket_bound_hostname`` is passed as a bare
589 hostname, use this value as the scheme. ``https`` will work only
590 when using a CDN. Defaults to ``"http"``.
591
592 :raises: :exc:`ValueError` when version is invalid or mutually exclusive arguments are used.
593 :raises: :exc:`TypeError` when expiration is not a valid type.
594 :raises: :exc:`AttributeError` if credentials is not an instance
595 of :class:`google.auth.credentials.Signing`.
596
597 :rtype: str
598 :returns: A signed URL you can use to access the resource
599 until expiration.
600 """
601 if version is None:
602 version = "v2"
603 elif version not in ("v2", "v4"):
604 raise ValueError("'version' must be either 'v2' or 'v4'")
605
606 if (
607 api_access_endpoint is not None or virtual_hosted_style
608 ) and bucket_bound_hostname:
609 raise ValueError(
610 "The bucket_bound_hostname argument is not compatible with "
611 "either api_access_endpoint or virtual_hosted_style."
612 )
613
614 if api_access_endpoint is None:
615 client = self._require_client(client)
616 api_access_endpoint = client.api_endpoint
617
618 quoted_name = _quote(self.name, safe=b"/~")
619
620 # If you are on Google Compute Engine, you can't generate a signed URL
621 # using GCE service account.
622 # See https://github.com/googleapis/google-auth-library-python/issues/50
623 if virtual_hosted_style:
624 api_access_endpoint = _virtual_hosted_style_base_url(
625 api_access_endpoint, self.bucket.name
626 )
627 resource = f"/{quoted_name}"
628 elif bucket_bound_hostname:
629 api_access_endpoint = _bucket_bound_hostname_url(
630 bucket_bound_hostname, scheme
631 )
632 resource = f"/{quoted_name}"
633 else:
634 resource = f"/{self.bucket.name}/{quoted_name}"
635
636 if credentials is None:
637 client = self._require_client(client) # May be redundant, but that's ok.
638 credentials = client._credentials
639
640 client = self._require_client(client)
641 universe_domain = client.universe_domain
642
643 if version == "v2":
644 helper = generate_signed_url_v2
645 else:
646 helper = generate_signed_url_v4
647
648 if self._encryption_key is not None:
649 encryption_headers = _get_encryption_headers(self._encryption_key)
650 if headers is None:
651 headers = {}
652 if version == "v2":
653 # See: https://cloud.google.com/storage/docs/access-control/signed-urls-v2#about-canonical-extension-headers
654 v2_copy_only = "X-Goog-Encryption-Algorithm"
655 headers[v2_copy_only] = encryption_headers[v2_copy_only]
656 else:
657 headers.update(encryption_headers)
658
659 return helper(
660 credentials,
661 resource=resource,
662 expiration=expiration,
663 api_access_endpoint=api_access_endpoint,
664 method=method.upper(),
665 content_md5=content_md5,
666 content_type=content_type,
667 response_type=response_type,
668 response_disposition=response_disposition,
669 generation=generation,
670 headers=headers,
671 query_parameters=query_parameters,
672 service_account_email=service_account_email,
673 access_token=access_token,
674 universe_domain=universe_domain,
675 )
676
677 def exists(
678 self,
679 client=None,
680 if_etag_match=None,
681 if_etag_not_match=None,
682 if_generation_match=None,
683 if_generation_not_match=None,
684 if_metageneration_match=None,
685 if_metageneration_not_match=None,
686 timeout=_DEFAULT_TIMEOUT,
687 retry=DEFAULT_RETRY,
688 soft_deleted=None,
689 ):
690 """Determines whether or not this blob exists.
691
692 If :attr:`user_project` is set on the bucket, bills the API request
693 to that project.
694
695 :type client: :class:`~google.cloud.storage.client.Client`
696 :param client:
697 (Optional) The client to use. If not passed, falls back to the
698 ``client`` stored on the blob's bucket.
699
700 :type if_etag_match: Union[str, Set[str]]
701 :param if_etag_match:
702 (Optional) See :ref:`using-if-etag-match`
703
704 :type if_etag_not_match: Union[str, Set[str]]
705 :param if_etag_not_match:
706 (Optional) See :ref:`using-if-etag-not-match`
707
708 :type if_generation_match: long
709 :param if_generation_match:
710 (Optional) See :ref:`using-if-generation-match`
711
712 :type if_generation_not_match: long
713 :param if_generation_not_match:
714 (Optional) See :ref:`using-if-generation-not-match`
715
716 :type if_metageneration_match: long
717 :param if_metageneration_match:
718 (Optional) See :ref:`using-if-metageneration-match`
719
720 :type if_metageneration_not_match: long
721 :param if_metageneration_not_match:
722 (Optional) See :ref:`using-if-metageneration-not-match`
723
724 :type timeout: float or tuple
725 :param timeout:
726 (Optional) The amount of time, in seconds, to wait
727 for the server response. See: :ref:`configuring_timeouts`
728
729 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
730 :param retry:
731 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
732
733 :type soft_deleted: bool
734 :param soft_deleted:
735 (Optional) If True, looks for a soft-deleted object. Will only return True
736 if the object exists and is in a soft-deleted state.
737 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True.
738 See: https://cloud.google.com/storage/docs/soft-delete
739
740 :rtype: bool
741 :returns: True if the blob exists in Cloud Storage.
742 """
743 with create_trace_span(name="Storage.Blob.exists"):
744 client = self._require_client(client)
745 # We only need the status code (200 or not) so we seek to
746 # minimize the returned payload.
747 query_params = self._query_params
748 query_params["fields"] = "name"
749 if soft_deleted is not None:
750 query_params["softDeleted"] = soft_deleted
751
752 _add_generation_match_parameters(
753 query_params,
754 if_generation_match=if_generation_match,
755 if_generation_not_match=if_generation_not_match,
756 if_metageneration_match=if_metageneration_match,
757 if_metageneration_not_match=if_metageneration_not_match,
758 )
759
760 headers = {}
761 _add_etag_match_headers(
762 headers,
763 if_etag_match=if_etag_match,
764 if_etag_not_match=if_etag_not_match,
765 )
766
767 try:
768 # We intentionally pass `_target_object=None` since fields=name
769 # would limit the local properties.
770 client._get_resource(
771 self.path,
772 query_params=query_params,
773 headers=headers,
774 timeout=timeout,
775 retry=retry,
776 _target_object=None,
777 )
778 except NotFound:
779 # NOTE: This will not fail immediately in a batch. However, when
780 # Batch.finish() is called, the resulting `NotFound` will be
781 # raised.
782 return False
783 return True
784
785 def delete(
786 self,
787 client=None,
788 if_generation_match=None,
789 if_generation_not_match=None,
790 if_metageneration_match=None,
791 if_metageneration_not_match=None,
792 timeout=_DEFAULT_TIMEOUT,
793 retry=DEFAULT_RETRY,
794 ):
795 """Deletes a blob from Cloud Storage.
796
797 If :attr:`user_project` is set on the bucket, bills the API request
798 to that project.
799
800 :type client: :class:`~google.cloud.storage.client.Client`
801 :param client:
802 (Optional) The client to use. If not passed, falls back to the
803 ``client`` stored on the blob's bucket.
804
805 :type if_generation_match: long
806 :param if_generation_match:
807 (Optional) See :ref:`using-if-generation-match`
808
809 :type if_generation_not_match: long
810 :param if_generation_not_match:
811 (Optional) See :ref:`using-if-generation-not-match`
812
813 :type if_metageneration_match: long
814 :param if_metageneration_match:
815 (Optional) See :ref:`using-if-metageneration-match`
816
817 :type if_metageneration_not_match: long
818 :param if_metageneration_not_match:
819 (Optional) See :ref:`using-if-metageneration-not-match`
820
821 :type timeout: float or tuple
822 :param timeout:
823 (Optional) The amount of time, in seconds, to wait
824 for the server response. See: :ref:`configuring_timeouts`
825
826 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
827 :param retry: (Optional) How to retry the RPC. A None value will disable
828 retries. A google.api_core.retry.Retry value will enable retries,
829 and the object will define retriable response codes and errors and
830 configure backoff and timeout options.
831
832 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
833 Retry object and activates it only if certain conditions are met.
834 This class exists to provide safe defaults for RPC calls that are
835 not technically safe to retry normally (due to potential data
836 duplication or other side-effects) but become safe to retry if a
837 condition such as if_generation_match is set.
838
839 See the retry.py source code and docstrings in this package
840 (google.cloud.storage.retry) for information on retry types and how
841 to configure them.
842
843 :raises: :class:`google.cloud.exceptions.NotFound`
844 (propagated from
845 :meth:`google.cloud.storage.bucket.Bucket.delete_blob`).
846 """
847 with create_trace_span(name="Storage.Blob.delete"):
848 self.bucket.delete_blob(
849 self.name,
850 client=client,
851 generation=self.generation,
852 timeout=timeout,
853 if_generation_match=if_generation_match,
854 if_generation_not_match=if_generation_not_match,
855 if_metageneration_match=if_metageneration_match,
856 if_metageneration_not_match=if_metageneration_not_match,
857 retry=retry,
858 )
859
860 def _get_transport(self, client):
861 """Return the client's transport.
862
863 :type client: :class:`~google.cloud.storage.client.Client`
864 :param client:
865 (Optional) The client to use. If not passed, falls back to the
866 ``client`` stored on the blob's bucket.
867
868 :rtype transport:
869 :class:`~google.auth.transport.requests.AuthorizedSession`
870 :returns: The transport (with credentials) that will
871 make authenticated requests.
872 """
873 client = self._require_client(client)
874 return client._http
875
876 def _get_download_url(
877 self,
878 client,
879 if_generation_match=None,
880 if_generation_not_match=None,
881 if_metageneration_match=None,
882 if_metageneration_not_match=None,
883 ):
884 """Get the download URL for the current blob.
885
886 If the ``media_link`` has been loaded, it will be used, otherwise
887 the URL will be constructed from the current blob's path (and possibly
888 generation) to avoid a round trip.
889
890 :type client: :class:`~google.cloud.storage.client.Client`
891 :param client: The client to use.
892
893 :type if_generation_match: long
894 :param if_generation_match:
895 (Optional) See :ref:`using-if-generation-match`
896
897 :type if_generation_not_match: long
898 :param if_generation_not_match:
899 (Optional) See :ref:`using-if-generation-not-match`
900
901 :type if_metageneration_match: long
902 :param if_metageneration_match:
903 (Optional) See :ref:`using-if-metageneration-match`
904
905 :type if_metageneration_not_match: long
906 :param if_metageneration_not_match:
907 (Optional) See :ref:`using-if-metageneration-not-match`
908
909 :rtype: str
910 :returns: The download URL for the current blob.
911 """
912 name_value_pairs = []
913 if self.media_link is None:
914 hostname = _get_host_name(client._connection)
915 base_url = _DOWNLOAD_URL_TEMPLATE.format(
916 hostname=hostname, path=self.path, api_version=_API_VERSION
917 )
918 if self.generation is not None:
919 name_value_pairs.append(("generation", f"{self.generation:d}"))
920 else:
921 base_url = self.media_link
922
923 if self.user_project is not None:
924 name_value_pairs.append(("userProject", self.user_project))
925
926 _add_generation_match_parameters(
927 name_value_pairs,
928 if_generation_match=if_generation_match,
929 if_generation_not_match=if_generation_not_match,
930 if_metageneration_match=if_metageneration_match,
931 if_metageneration_not_match=if_metageneration_not_match,
932 )
933 return _add_query_parameters(base_url, name_value_pairs)
934
935 def _extract_headers_from_download(self, response):
936 """Extract headers from a non-chunked request's http object.
937
938 This avoids the need to make a second request for commonly used
939 headers.
940
941 :type response:
942 :class requests.models.Response
943 :param response: The server response from downloading a non-chunked file
944 """
945 self._properties["contentEncoding"] = response.headers.get(
946 "Content-Encoding", None
947 )
948 self._properties[_CONTENT_TYPE_FIELD] = response.headers.get(
949 "Content-Type", None
950 )
951 self._properties["cacheControl"] = response.headers.get("Cache-Control", None)
952 self._properties["storageClass"] = response.headers.get(
953 "X-Goog-Storage-Class", None
954 )
955 self._properties["contentLanguage"] = response.headers.get(
956 "Content-Language", None
957 )
958 self._properties["etag"] = response.headers.get("ETag", None)
959 self._properties["generation"] = response.headers.get("X-goog-generation", None)
960 self._properties["metageneration"] = response.headers.get(
961 "X-goog-metageneration", None
962 )
963 # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==',
964 x_goog_hash = response.headers.get("X-Goog-Hash", "")
965
966 if x_goog_hash:
967 digests = {}
968 for encoded_digest in x_goog_hash.split(","):
969 match = re.match(r"(crc32c|md5)=([\w\d/\+/]+={0,3})", encoded_digest)
970 if match:
971 method, digest = match.groups()
972 digests[method] = digest
973
974 self._properties["crc32c"] = digests.get("crc32c", None)
975 self._properties["md5Hash"] = digests.get("md5", None)
976
977 def _do_download(
978 self,
979 transport,
980 file_obj,
981 download_url,
982 headers,
983 start=None,
984 end=None,
985 raw_download=False,
986 timeout=_DEFAULT_TIMEOUT,
987 checksum="auto",
988 retry=DEFAULT_RETRY,
989 single_shot_download=False,
990 ):
991 """Perform a download without any error handling.
992
993 This is intended to be called by :meth:`_prep_and_do_download` so it can
994 be wrapped with error handling / remapping.
995
996 :type transport:
997 :class:`~google.auth.transport.requests.AuthorizedSession`
998 :param transport:
999 The transport (with credentials) that will make authenticated
1000 requests.
1001
1002 :type file_obj: file
1003 :param file_obj: A file handle to which to write the blob's data.
1004
1005 :type download_url: str
1006 :param download_url: The URL where the media can be accessed.
1007
1008 :type headers: dict
1009 :param headers: Headers to be sent with the request(s).
1010
1011 :type start: int
1012 :param start: (Optional) The first byte in a range to be downloaded.
1013
1014 :type end: int
1015 :param end: (Optional) The last byte in a range to be downloaded.
1016
1017 :type raw_download: bool
1018 :param raw_download:
1019 (Optional) If true, download the object without any expansion.
1020
1021 :type timeout: float or tuple
1022 :param timeout:
1023 (Optional) The amount of time, in seconds, to wait
1024 for the server response. See: :ref:`configuring_timeouts`
1025
1026 :type checksum: str
1027 :param checksum:
1028 (Optional) The type of checksum to compute to verify the integrity
1029 of the object. The response headers must contain a checksum of the
1030 requested type. If the headers lack an appropriate checksum (for
1031 instance in the case of transcoded or ranged downloads where the
1032 remote service does not know the correct checksum, including
1033 downloads where chunk_size is set) an INFO-level log will be
1034 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1035 default is "auto", which will try to detect if the C extension for
1036 crc32c is installed and fall back to md5 otherwise.
1037
1038 :type retry: google.api_core.retry.Retry
1039 :param retry: (Optional) How to retry the RPC. A None value will disable
1040 retries. A google.api_core.retry.Retry value will enable retries,
1041 and the object will configure backoff and timeout options.
1042
1043 This private method does not accept ConditionalRetryPolicy values
1044 because the information necessary to evaluate the policy is instead
1045 evaluated in blob._prep_and_do_download().
1046
1047 See the retry.py source code and docstrings in this package
1048 (google.cloud.storage.retry) for information on retry types and how
1049 to configure them.
1050
1051 :type single_shot_download: bool
1052 :param single_shot_download:
1053 (Optional) If true, download the object in a single request.
1054 Caution: Enabling this will increase the memory overload for your application.
1055 Please enable this as per your use case.
1056 """
1057
1058 extra_attributes = {
1059 "url.full": download_url,
1060 "download.chunk_size": f"{self.chunk_size}",
1061 "download.raw_download": raw_download,
1062 "upload.checksum": f"{checksum}",
1063 "download.single_shot_download": single_shot_download,
1064 }
1065 args = {"timeout": timeout}
1066
1067 if self.chunk_size is None:
1068 if raw_download:
1069 klass = RawDownload
1070 download_class = "RawDownload"
1071 else:
1072 klass = Download
1073 download_class = "Download"
1074
1075 download = klass(
1076 download_url,
1077 stream=file_obj,
1078 headers=headers,
1079 start=start,
1080 end=end,
1081 checksum=checksum,
1082 retry=retry,
1083 # NOTE: single_shot_download is only supported in Download and RawDownload
1084 # classes, i.e., when chunk_size is set to None (the default value). It is
1085 # not supported for chunked downloads.
1086 single_shot_download=single_shot_download,
1087 )
1088 with create_trace_span(
1089 name=f"Storage.{download_class}/consume",
1090 attributes=extra_attributes,
1091 api_request=args,
1092 ):
1093 response = download.consume(transport, timeout=timeout)
1094 self._extract_headers_from_download(response)
1095 else:
1096 if checksum:
1097 msg = _CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE.format(checksum)
1098 _logger.info(msg)
1099
1100 if raw_download:
1101 klass = RawChunkedDownload
1102 download_class = "RawChunkedDownload"
1103 else:
1104 klass = ChunkedDownload
1105 download_class = "ChunkedDownload"
1106
1107 download = klass(
1108 download_url,
1109 self.chunk_size,
1110 file_obj,
1111 headers=headers,
1112 start=start if start else 0,
1113 end=end,
1114 retry=retry,
1115 )
1116
1117 with create_trace_span(
1118 name=f"Storage.{download_class}/consumeNextChunk",
1119 attributes=extra_attributes,
1120 api_request=args,
1121 ):
1122 while not download.finished:
1123 download.consume_next_chunk(transport, timeout=timeout)
1124
1125 def download_to_file(
1126 self,
1127 file_obj,
1128 client=None,
1129 start=None,
1130 end=None,
1131 raw_download=False,
1132 if_etag_match=None,
1133 if_etag_not_match=None,
1134 if_generation_match=None,
1135 if_generation_not_match=None,
1136 if_metageneration_match=None,
1137 if_metageneration_not_match=None,
1138 timeout=_DEFAULT_TIMEOUT,
1139 checksum="auto",
1140 retry=DEFAULT_RETRY,
1141 single_shot_download=False,
1142 ):
1143 """Download the contents of this blob into a file-like object.
1144
1145 .. note::
1146
1147 If the server-set property, :attr:`media_link`, is not yet
1148 initialized, makes an additional API request to load it.
1149
1150 If the :attr:`chunk_size` of a current blob is `None`, will download data
1151 in single download request otherwise it will download the :attr:`chunk_size`
1152 of data in each request.
1153
1154 For more fine-grained control over the download process, check out
1155 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
1156 For example, this library allows downloading **parts** of a blob rather than the whole thing.
1157
1158 If :attr:`user_project` is set on the bucket, bills the API request
1159 to that project.
1160
1161 :type file_obj: file
1162 :param file_obj: A file handle to which to write the blob's data.
1163
1164 :type client: :class:`~google.cloud.storage.client.Client`
1165 :param client:
1166 (Optional) The client to use. If not passed, falls back to the
1167 ``client`` stored on the blob's bucket.
1168
1169 :type start: int
1170 :param start: (Optional) The first byte in a range to be downloaded.
1171
1172 :type end: int
1173 :param end: (Optional) The last byte in a range to be downloaded.
1174
1175 :type raw_download: bool
1176 :param raw_download:
1177 (Optional) If true, download the object without any expansion.
1178
1179 :type if_etag_match: Union[str, Set[str]]
1180 :param if_etag_match:
1181 (Optional) See :ref:`using-if-etag-match`
1182
1183 :type if_etag_not_match: Union[str, Set[str]]
1184 :param if_etag_not_match:
1185 (Optional) See :ref:`using-if-etag-not-match`
1186
1187 :type if_generation_match: long
1188 :param if_generation_match:
1189 (Optional) See :ref:`using-if-generation-match`
1190
1191 :type if_generation_not_match: long
1192 :param if_generation_not_match:
1193 (Optional) See :ref:`using-if-generation-not-match`
1194
1195 :type if_metageneration_match: long
1196 :param if_metageneration_match:
1197 (Optional) See :ref:`using-if-metageneration-match`
1198
1199 :type if_metageneration_not_match: long
1200 :param if_metageneration_not_match:
1201 (Optional) See :ref:`using-if-metageneration-not-match`
1202
1203 :type timeout: float or tuple
1204 :param timeout:
1205 (Optional) The amount of time, in seconds, to wait
1206 for the server response. See: :ref:`configuring_timeouts`
1207
1208 :type checksum: str
1209 :param checksum:
1210 (Optional) The type of checksum to compute to verify the integrity
1211 of the object. The response headers must contain a checksum of the
1212 requested type. If the headers lack an appropriate checksum (for
1213 instance in the case of transcoded or ranged downloads where the
1214 remote service does not know the correct checksum, including
1215 downloads where chunk_size is set) an INFO-level log will be
1216 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1217 default is "auto", which will try to detect if the C extension for
1218 crc32c is installed and fall back to md5 otherwise.
1219
1220 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1221 :param retry: (Optional) How to retry the RPC. A None value will disable
1222 retries. A google.api_core.retry.Retry value will enable retries,
1223 and the object will define retriable response codes and errors and
1224 configure backoff and timeout options.
1225
1226 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1227 Retry object and activates it only if certain conditions are met.
1228 This class exists to provide safe defaults for RPC calls that are
1229 not technically safe to retry normally (due to potential data
1230 duplication or other side-effects) but become safe to retry if a
1231 condition such as if_metageneration_match is set.
1232
1233 See the retry.py source code and docstrings in this package
1234 (google.cloud.storage.retry) for information on retry types and how
1235 to configure them.
1236
1237 :type single_shot_download: bool
1238 :param single_shot_download:
1239 (Optional) If true, download the object in a single request.
1240 Caution: Enabling this will increase the memory overload for your application.
1241 Please enable this as per your use case.
1242
1243 :raises: :class:`google.cloud.exceptions.NotFound`
1244 """
1245 with create_trace_span(name="Storage.Blob.downloadToFile"):
1246 self._prep_and_do_download(
1247 file_obj,
1248 client=client,
1249 start=start,
1250 end=end,
1251 raw_download=raw_download,
1252 if_etag_match=if_etag_match,
1253 if_etag_not_match=if_etag_not_match,
1254 if_generation_match=if_generation_match,
1255 if_generation_not_match=if_generation_not_match,
1256 if_metageneration_match=if_metageneration_match,
1257 if_metageneration_not_match=if_metageneration_not_match,
1258 timeout=timeout,
1259 checksum=checksum,
1260 retry=retry,
1261 single_shot_download=single_shot_download,
1262 )
1263
1264 def _handle_filename_and_download(self, filename, *args, **kwargs):
1265 """Download the contents of this blob into a named file.
1266
1267 :type filename: str
1268 :param filename: A filename to be passed to ``open``.
1269
1270 For *args and **kwargs, refer to the documentation for download_to_filename() for more information.
1271 """
1272
1273 try:
1274 with open(filename, "wb") as file_obj:
1275 self._prep_and_do_download(
1276 file_obj,
1277 *args,
1278 **kwargs,
1279 )
1280
1281 except (DataCorruption, NotFound):
1282 # Delete the corrupt or empty downloaded file.
1283 os.remove(filename)
1284 raise
1285
1286 updated = self.updated
1287 if updated is not None:
1288 mtime = updated.timestamp()
1289 os.utime(file_obj.name, (mtime, mtime))
1290
1291 def download_to_filename(
1292 self,
1293 filename,
1294 client=None,
1295 start=None,
1296 end=None,
1297 raw_download=False,
1298 if_etag_match=None,
1299 if_etag_not_match=None,
1300 if_generation_match=None,
1301 if_generation_not_match=None,
1302 if_metageneration_match=None,
1303 if_metageneration_not_match=None,
1304 timeout=_DEFAULT_TIMEOUT,
1305 checksum="auto",
1306 retry=DEFAULT_RETRY,
1307 single_shot_download=False,
1308 ):
1309 """Download the contents of this blob into a named file.
1310
1311 If :attr:`user_project` is set on the bucket, bills the API request
1312 to that project.
1313
1314 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-download-encrypted-file#storage_download_encrypted_file-python)
1315 to download a file with a [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied).
1316
1317 :type filename: str
1318 :param filename: A filename to be passed to ``open``.
1319
1320 :type client: :class:`~google.cloud.storage.client.Client`
1321 :param client:
1322 (Optional) The client to use. If not passed, falls back to the
1323 ``client`` stored on the blob's bucket.
1324
1325 :type start: int
1326 :param start: (Optional) The first byte in a range to be downloaded.
1327
1328 :type end: int
1329 :param end: (Optional) The last byte in a range to be downloaded.
1330
1331 :type raw_download: bool
1332 :param raw_download:
1333 (Optional) If true, download the object without any expansion.
1334
1335 :type if_etag_match: Union[str, Set[str]]
1336 :param if_etag_match:
1337 (Optional) See :ref:`using-if-etag-match`
1338
1339 :type if_etag_not_match: Union[str, Set[str]]
1340 :param if_etag_not_match:
1341 (Optional) See :ref:`using-if-etag-not-match`
1342
1343 :type if_generation_match: long
1344 :param if_generation_match:
1345 (Optional) See :ref:`using-if-generation-match`
1346
1347 :type if_generation_not_match: long
1348 :param if_generation_not_match:
1349 (Optional) See :ref:`using-if-generation-not-match`
1350
1351 :type if_metageneration_match: long
1352 :param if_metageneration_match:
1353 (Optional) See :ref:`using-if-metageneration-match`
1354
1355 :type if_metageneration_not_match: long
1356 :param if_metageneration_not_match:
1357 (Optional) See :ref:`using-if-metageneration-not-match`
1358
1359 :type timeout: float or tuple
1360 :param timeout:
1361 (Optional) The amount of time, in seconds, to wait
1362 for the server response. See: :ref:`configuring_timeouts`
1363
1364 :type checksum: str
1365 :param checksum:
1366 (Optional) The type of checksum to compute to verify the integrity
1367 of the object. The response headers must contain a checksum of the
1368 requested type. If the headers lack an appropriate checksum (for
1369 instance in the case of transcoded or ranged downloads where the
1370 remote service does not know the correct checksum, including
1371 downloads where chunk_size is set) an INFO-level log will be
1372 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1373 default is "auto", which will try to detect if the C extension for
1374 crc32c is installed and fall back to md5 otherwise.
1375
1376 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1377 :param retry: (Optional) How to retry the RPC. A None value will disable
1378 retries. A google.api_core.retry.Retry value will enable retries,
1379 and the object will define retriable response codes and errors and
1380 configure backoff and timeout options.
1381
1382 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1383 Retry object and activates it only if certain conditions are met.
1384 This class exists to provide safe defaults for RPC calls that are
1385 not technically safe to retry normally (due to potential data
1386 duplication or other side-effects) but become safe to retry if a
1387 condition such as if_metageneration_match is set.
1388
1389 See the retry.py source code and docstrings in this package
1390 (google.cloud.storage.retry) for information on retry types and how
1391 to configure them.
1392
1393 :type single_shot_download: bool
1394 :param single_shot_download:
1395 (Optional) If true, download the object in a single request.
1396 Caution: Enabling this will increase the memory overload for your application.
1397 Please enable this as per your use case.
1398
1399 :raises: :class:`google.cloud.exceptions.NotFound`
1400 """
1401 with create_trace_span(name="Storage.Blob.downloadToFilename"):
1402 self._handle_filename_and_download(
1403 filename,
1404 client=client,
1405 start=start,
1406 end=end,
1407 raw_download=raw_download,
1408 if_etag_match=if_etag_match,
1409 if_etag_not_match=if_etag_not_match,
1410 if_generation_match=if_generation_match,
1411 if_generation_not_match=if_generation_not_match,
1412 if_metageneration_match=if_metageneration_match,
1413 if_metageneration_not_match=if_metageneration_not_match,
1414 timeout=timeout,
1415 checksum=checksum,
1416 retry=retry,
1417 single_shot_download=single_shot_download,
1418 )
1419
1420 def download_as_bytes(
1421 self,
1422 client=None,
1423 start=None,
1424 end=None,
1425 raw_download=False,
1426 if_etag_match=None,
1427 if_etag_not_match=None,
1428 if_generation_match=None,
1429 if_generation_not_match=None,
1430 if_metageneration_match=None,
1431 if_metageneration_not_match=None,
1432 timeout=_DEFAULT_TIMEOUT,
1433 checksum="auto",
1434 retry=DEFAULT_RETRY,
1435 single_shot_download=False,
1436 ):
1437 """Download the contents of this blob as a bytes object.
1438
1439 If :attr:`user_project` is set on the bucket, bills the API request
1440 to that project.
1441
1442 :type client: :class:`~google.cloud.storage.client.Client`
1443 :param client:
1444 (Optional) The client to use. If not passed, falls back to the
1445 ``client`` stored on the blob's bucket.
1446
1447 :type start: int
1448 :param start: (Optional) The first byte in a range to be downloaded.
1449
1450 :type end: int
1451 :param end: (Optional) The last byte in a range to be downloaded.
1452
1453 :type raw_download: bool
1454 :param raw_download:
1455 (Optional) If true, download the object without any expansion.
1456
1457 :type if_etag_match: Union[str, Set[str]]
1458 :param if_etag_match:
1459 (Optional) See :ref:`using-if-etag-match`
1460
1461 :type if_etag_not_match: Union[str, Set[str]]
1462 :param if_etag_not_match:
1463 (Optional) See :ref:`using-if-etag-not-match`
1464
1465 :type if_generation_match: long
1466 :param if_generation_match:
1467 (Optional) See :ref:`using-if-generation-match`
1468
1469 :type if_generation_not_match: long
1470 :param if_generation_not_match:
1471 (Optional) See :ref:`using-if-generation-not-match`
1472
1473 :type if_metageneration_match: long
1474 :param if_metageneration_match:
1475 (Optional) See :ref:`using-if-metageneration-match`
1476
1477 :type if_metageneration_not_match: long
1478 :param if_metageneration_not_match:
1479 (Optional) See :ref:`using-if-metageneration-not-match`
1480
1481 :type timeout: float or tuple
1482 :param timeout:
1483 (Optional) The amount of time, in seconds, to wait
1484 for the server response. See: :ref:`configuring_timeouts`
1485
1486 :type checksum: str
1487 :param checksum:
1488 (Optional) The type of checksum to compute to verify the integrity
1489 of the object. The response headers must contain a checksum of the
1490 requested type. If the headers lack an appropriate checksum (for
1491 instance in the case of transcoded or ranged downloads where the
1492 remote service does not know the correct checksum, including
1493 downloads where chunk_size is set) an INFO-level log will be
1494 emitted. Supported values are "md5", "crc32c", "auto" and None. The
1495 default is "auto", which will try to detect if the C extension for
1496 crc32c is installed and fall back to md5 otherwise.
1497
1498 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1499 :param retry: (Optional) How to retry the RPC. A None value will disable
1500 retries. A google.api_core.retry.Retry value will enable retries,
1501 and the object will define retriable response codes and errors and
1502 configure backoff and timeout options.
1503
1504 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1505 Retry object and activates it only if certain conditions are met.
1506 This class exists to provide safe defaults for RPC calls that are
1507 not technically safe to retry normally (due to potential data
1508 duplication or other side-effects) but become safe to retry if a
1509 condition such as if_metageneration_match is set.
1510
1511 See the retry.py source code and docstrings in this package
1512 (google.cloud.storage.retry) for information on retry types and how
1513 to configure them.
1514
1515 :type single_shot_download: bool
1516 :param single_shot_download:
1517 (Optional) If true, download the object in a single request.
1518 Caution: Enabling this will increase the memory overload for your application.
1519 Please enable this as per your use case.
1520
1521 :rtype: bytes
1522 :returns: The data stored in this blob.
1523
1524 :raises: :class:`google.cloud.exceptions.NotFound`
1525 """
1526 with create_trace_span(name="Storage.Blob.downloadAsBytes"):
1527 string_buffer = BytesIO()
1528
1529 self._prep_and_do_download(
1530 string_buffer,
1531 client=client,
1532 start=start,
1533 end=end,
1534 raw_download=raw_download,
1535 if_etag_match=if_etag_match,
1536 if_etag_not_match=if_etag_not_match,
1537 if_generation_match=if_generation_match,
1538 if_generation_not_match=if_generation_not_match,
1539 if_metageneration_match=if_metageneration_match,
1540 if_metageneration_not_match=if_metageneration_not_match,
1541 timeout=timeout,
1542 checksum=checksum,
1543 retry=retry,
1544 single_shot_download=single_shot_download,
1545 )
1546 return string_buffer.getvalue()
1547
1548 def download_as_string(
1549 self,
1550 client=None,
1551 start=None,
1552 end=None,
1553 raw_download=False,
1554 if_etag_match=None,
1555 if_etag_not_match=None,
1556 if_generation_match=None,
1557 if_generation_not_match=None,
1558 if_metageneration_match=None,
1559 if_metageneration_not_match=None,
1560 timeout=_DEFAULT_TIMEOUT,
1561 retry=DEFAULT_RETRY,
1562 single_shot_download=False,
1563 ):
1564 """(Deprecated) Download the contents of this blob as a bytes object.
1565
1566 If :attr:`user_project` is set on the bucket, bills the API request
1567 to that project.
1568
1569 .. note::
1570 Deprecated alias for :meth:`download_as_bytes`.
1571
1572 :type client: :class:`~google.cloud.storage.client.Client`
1573 :param client:
1574 (Optional) The client to use. If not passed, falls back to the
1575 ``client`` stored on the blob's bucket.
1576
1577 :type start: int
1578 :param start: (Optional) The first byte in a range to be downloaded.
1579
1580 :type end: int
1581 :param end: (Optional) The last byte in a range to be downloaded.
1582
1583 :type raw_download: bool
1584 :param raw_download:
1585 (Optional) If true, download the object without any expansion.
1586
1587 :type if_etag_match: Union[str, Set[str]]
1588 :param if_etag_match:
1589 (Optional) See :ref:`using-if-etag-match`
1590
1591 :type if_etag_not_match: Union[str, Set[str]]
1592 :param if_etag_not_match:
1593 (Optional) See :ref:`using-if-etag-not-match`
1594
1595 :type if_generation_match: long
1596 :param if_generation_match:
1597 (Optional) See :ref:`using-if-generation-match`
1598
1599 :type if_generation_not_match: long
1600 :param if_generation_not_match:
1601 (Optional) See :ref:`using-if-generation-not-match`
1602
1603 :type if_metageneration_match: long
1604 :param if_metageneration_match:
1605 (Optional) See :ref:`using-if-metageneration-match`
1606
1607 :type if_metageneration_not_match: long
1608 :param if_metageneration_not_match:
1609 (Optional) See :ref:`using-if-metageneration-not-match`
1610
1611 :type timeout: float or tuple
1612 :param timeout:
1613 (Optional) The amount of time, in seconds, to wait
1614 for the server response. See: :ref:`configuring_timeouts`
1615
1616 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1617 :param retry: (Optional) How to retry the RPC. A None value will disable
1618 retries. A google.api_core.retry.Retry value will enable retries,
1619 and the object will define retriable response codes and errors and
1620 configure backoff and timeout options.
1621
1622 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1623 Retry object and activates it only if certain conditions are met.
1624 This class exists to provide safe defaults for RPC calls that are
1625 not technically safe to retry normally (due to potential data
1626 duplication or other side-effects) but become safe to retry if a
1627 condition such as if_metageneration_match is set.
1628
1629 See the retry.py source code and docstrings in this package
1630 (google.cloud.storage.retry) for information on retry types and how
1631 to configure them.
1632
1633 :type single_shot_download: bool
1634 :param single_shot_download:
1635 (Optional) If true, download the object in a single request.
1636 Caution: Enabling this will increase the memory overload for your application.
1637 Please enable this as per your use case.
1638
1639 :rtype: bytes
1640 :returns: The data stored in this blob.
1641
1642 :raises: :class:`google.cloud.exceptions.NotFound`
1643 """
1644 warnings.warn(
1645 _DOWNLOAD_AS_STRING_DEPRECATED,
1646 PendingDeprecationWarning,
1647 stacklevel=2,
1648 )
1649 with create_trace_span(name="Storage.Blob.downloadAsString"):
1650 return self.download_as_bytes(
1651 client=client,
1652 start=start,
1653 end=end,
1654 raw_download=raw_download,
1655 if_etag_match=if_etag_match,
1656 if_etag_not_match=if_etag_not_match,
1657 if_generation_match=if_generation_match,
1658 if_generation_not_match=if_generation_not_match,
1659 if_metageneration_match=if_metageneration_match,
1660 if_metageneration_not_match=if_metageneration_not_match,
1661 timeout=timeout,
1662 retry=retry,
1663 single_shot_download=single_shot_download,
1664 )
1665
1666 def download_as_text(
1667 self,
1668 client=None,
1669 start=None,
1670 end=None,
1671 raw_download=False,
1672 encoding=None,
1673 if_etag_match=None,
1674 if_etag_not_match=None,
1675 if_generation_match=None,
1676 if_generation_not_match=None,
1677 if_metageneration_match=None,
1678 if_metageneration_not_match=None,
1679 timeout=_DEFAULT_TIMEOUT,
1680 retry=DEFAULT_RETRY,
1681 single_shot_download=False,
1682 ):
1683 """Download the contents of this blob as text (*not* bytes).
1684
1685 If :attr:`user_project` is set on the bucket, bills the API request
1686 to that project.
1687
1688 :type client: :class:`~google.cloud.storage.client.Client`
1689 :param client:
1690 (Optional) The client to use. If not passed, falls back to the
1691 ``client`` stored on the blob's bucket.
1692
1693 :type start: int
1694 :param start: (Optional) The first byte in a range to be downloaded.
1695
1696 :type end: int
1697 :param end: (Optional) The last byte in a range to be downloaded.
1698
1699 :type raw_download: bool
1700 :param raw_download:
1701 (Optional) If true, download the object without any expansion.
1702
1703 :type encoding: str
1704 :param encoding: (Optional) encoding to be used to decode the
1705 downloaded bytes. Defaults to the ``charset`` param of
1706 attr:`content_type`, or else to "utf-8".
1707
1708 :type if_etag_match: Union[str, Set[str]]
1709 :param if_etag_match:
1710 (Optional) See :ref:`using-if-etag-match`
1711
1712 :type if_etag_not_match: Union[str, Set[str]]
1713 :param if_etag_not_match:
1714 (Optional) See :ref:`using-if-etag-not-match`
1715
1716 :type if_generation_match: long
1717 :param if_generation_match:
1718 (Optional) See :ref:`using-if-generation-match`
1719
1720 :type if_generation_not_match: long
1721 :param if_generation_not_match:
1722 (Optional) See :ref:`using-if-generation-not-match`
1723
1724 :type if_metageneration_match: long
1725 :param if_metageneration_match:
1726 (Optional) See :ref:`using-if-metageneration-match`
1727
1728 :type if_metageneration_not_match: long
1729 :param if_metageneration_not_match:
1730 (Optional) See :ref:`using-if-metageneration-not-match`
1731
1732 :type timeout: float or tuple
1733 :param timeout:
1734 (Optional) The amount of time, in seconds, to wait
1735 for the server response. See: :ref:`configuring_timeouts`
1736
1737 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1738 :param retry: (Optional) How to retry the RPC. A None value will disable
1739 retries. A google.api_core.retry.Retry value will enable retries,
1740 and the object will define retriable response codes and errors and
1741 configure backoff and timeout options.
1742
1743 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1744 Retry object and activates it only if certain conditions are met.
1745 This class exists to provide safe defaults for RPC calls that are
1746 not technically safe to retry normally (due to potential data
1747 duplication or other side-effects) but become safe to retry if a
1748 condition such as if_metageneration_match is set.
1749
1750 See the retry.py source code and docstrings in this package
1751 (google.cloud.storage.retry) for information on retry types and how
1752 to configure them.
1753
1754 :type single_shot_download: bool
1755 :param single_shot_download:
1756 (Optional) If true, download the object in a single request.
1757 Caution: Enabling this will increase the memory overload for your application.
1758 Please enable this as per your use case.
1759
1760 :rtype: text
1761 :returns: The data stored in this blob, decoded to text.
1762 """
1763 with create_trace_span(name="Storage.Blob.downloadAsText"):
1764 data = self.download_as_bytes(
1765 client=client,
1766 start=start,
1767 end=end,
1768 raw_download=raw_download,
1769 if_etag_match=if_etag_match,
1770 if_etag_not_match=if_etag_not_match,
1771 if_generation_match=if_generation_match,
1772 if_generation_not_match=if_generation_not_match,
1773 if_metageneration_match=if_metageneration_match,
1774 if_metageneration_not_match=if_metageneration_not_match,
1775 timeout=timeout,
1776 retry=retry,
1777 single_shot_download=single_shot_download,
1778 )
1779
1780 if encoding is not None:
1781 return data.decode(encoding)
1782
1783 if self.content_type is not None:
1784 msg = HeaderParser().parsestr("Content-Type: " + self.content_type)
1785 params = dict(msg.get_params()[1:])
1786 if "charset" in params:
1787 return data.decode(params["charset"])
1788
1789 return data.decode("utf-8")
1790
1791 def _get_content_type(self, content_type, filename=None):
1792 """Determine the content type from the current object.
1793
1794 The return value will be determined in order of precedence:
1795
1796 - The value passed in to this method (if not :data:`None`)
1797 - The value stored on the current blob
1798 - The default value ('application/octet-stream')
1799
1800 :type content_type: str
1801 :param content_type: (Optional) Type of content.
1802
1803 :type filename: str
1804 :param filename:
1805 (Optional) The name of the file where the content is stored.
1806
1807 :rtype: str
1808 :returns: Type of content gathered from the object.
1809 """
1810 if content_type is None:
1811 content_type = self.content_type
1812
1813 if content_type is None and filename is not None:
1814 content_type, _ = mimetypes.guess_type(filename)
1815
1816 if content_type is None:
1817 content_type = _DEFAULT_CONTENT_TYPE
1818
1819 return content_type
1820
1821 def _get_writable_metadata(self):
1822 """Get the object / blob metadata which is writable.
1823
1824 This is intended to be used when creating a new object / blob.
1825
1826 See the [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects)
1827 for more information, the fields marked as writable are:
1828
1829 * ``acl``
1830 * ``cacheControl``
1831 * ``contentDisposition``
1832 * ``contentEncoding``
1833 * ``contentLanguage``
1834 * ``contentType``
1835 * ``crc32c``
1836 * ``customTime``
1837 * ``md5Hash``
1838 * ``metadata``
1839 * ``name``
1840 * ``retention``
1841 * ``storageClass``
1842
1843 For now, we don't support ``acl``, access control lists should be
1844 managed directly through :class:`ObjectACL` methods.
1845 """
1846 # NOTE: This assumes `self.name` is unicode.
1847 object_metadata = {"name": self.name}
1848 for key in self._changes:
1849 if key in _WRITABLE_FIELDS:
1850 object_metadata[key] = self._properties[key]
1851
1852 return object_metadata
1853
1854 def _get_upload_arguments(self, client, content_type, filename=None, command=None):
1855 """Get required arguments for performing an upload.
1856
1857 The content type returned will be determined in order of precedence:
1858
1859 - The value passed in to this method (if not :data:`None`)
1860 - The value stored on the current blob
1861 - The default value ('application/octet-stream')
1862
1863 :type content_type: str
1864 :param content_type: Type of content being uploaded (or :data:`None`).
1865
1866 :type command: str
1867 :param command:
1868 (Optional) Information about which interface for upload was used,
1869 to be included in the X-Goog-API-Client header. Please leave as None
1870 unless otherwise directed.
1871
1872 :rtype: tuple
1873 :returns: A triple of
1874
1875 * A header dictionary
1876 * An object metadata dictionary
1877 * The ``content_type`` as a string (according to precedence)
1878 """
1879 content_type = self._get_content_type(content_type, filename=filename)
1880 # Add any client attached custom headers to the upload headers.
1881 headers = {
1882 **_get_default_headers(
1883 client._connection.user_agent, content_type, command=command
1884 ),
1885 **_get_encryption_headers(self._encryption_key),
1886 **client._extra_headers,
1887 }
1888 object_metadata = self._get_writable_metadata()
1889 return headers, object_metadata, content_type
1890
1891 def _do_multipart_upload(
1892 self,
1893 client,
1894 stream,
1895 content_type,
1896 size,
1897 predefined_acl,
1898 if_generation_match,
1899 if_generation_not_match,
1900 if_metageneration_match,
1901 if_metageneration_not_match,
1902 timeout=_DEFAULT_TIMEOUT,
1903 checksum="auto",
1904 retry=None,
1905 command=None,
1906 ):
1907 """Perform a multipart upload.
1908
1909 The content type of the upload will be determined in order
1910 of precedence:
1911
1912 - The value passed in to this method (if not :data:`None`)
1913 - The value stored on the current blob
1914 - The default value ('application/octet-stream')
1915
1916 :type client: :class:`~google.cloud.storage.client.Client`
1917 :param client:
1918 (Optional) The client to use. If not passed, falls back to the
1919 ``client`` stored on the blob's bucket.
1920
1921 :type stream: IO[bytes]
1922 :param stream: A bytes IO object open for reading.
1923
1924 :type content_type: str
1925 :param content_type: Type of content being uploaded (or :data:`None`).
1926
1927 :type size: int
1928 :param size:
1929 The number of bytes to be uploaded (which will be read from
1930 ``stream``). If not provided, the upload will be concluded once
1931 ``stream`` is exhausted (or :data:`None`).
1932
1933 :type predefined_acl: str
1934 :param predefined_acl: (Optional) Predefined access control list
1935
1936 :type if_generation_match: long
1937 :param if_generation_match:
1938 (Optional) See :ref:`using-if-generation-match`
1939
1940 :type if_generation_not_match: long
1941 :param if_generation_not_match:
1942 (Optional) See :ref:`using-if-generation-not-match`
1943
1944 :type if_metageneration_match: long
1945 :param if_metageneration_match:
1946 (Optional) See :ref:`using-if-metageneration-match`
1947
1948 :type if_metageneration_not_match: long
1949 :param if_metageneration_not_match:
1950 (Optional) See :ref:`using-if-metageneration-not-match`
1951
1952 :type timeout: float or tuple
1953 :param timeout:
1954 (Optional) The amount of time, in seconds, to wait
1955 for the server response. See: :ref:`configuring_timeouts`
1956
1957 :type checksum: str
1958 :param checksum:
1959 (Optional) The type of checksum to compute to verify
1960 the integrity of the object. The request metadata will be amended
1961 to include the computed value. Using this option will override a
1962 manually-set checksum value. Supported values are "md5", "crc32c",
1963 "auto" and None. The default is "auto", which will try to detect if
1964 the C extension for crc32c is installed and fall back to md5
1965 otherwise.
1966 :type retry: google.api_core.retry.Retry
1967 :param retry: (Optional) How to retry the RPC. A None value will disable
1968 retries. A google.api_core.retry.Retry value will enable retries,
1969 and the object will configure backoff and timeout options.
1970
1971 This private method does not accept ConditionalRetryPolicy values
1972 because the information necessary to evaluate the policy is instead
1973 evaluated in blob._do_upload().
1974
1975 See the retry.py source code and docstrings in this package
1976 (google.cloud.storage.retry) for information on retry types and how
1977 to configure them.
1978
1979 :type command: str
1980 :param command:
1981 (Optional) Information about which interface for upload was used,
1982 to be included in the X-Goog-API-Client header. Please leave as None
1983 unless otherwise directed.
1984
1985 :rtype: :class:`~requests.Response`
1986 :returns: The "200 OK" response object returned after the multipart
1987 upload request.
1988 :raises: :exc:`ValueError` if ``size`` is not :data:`None` but the
1989 ``stream`` has fewer than ``size`` bytes remaining.
1990 """
1991 if size is None:
1992 data = stream.read()
1993 else:
1994 data = stream.read(size)
1995 if len(data) < size:
1996 msg = _READ_LESS_THAN_SIZE.format(size, len(data))
1997 raise ValueError(msg)
1998
1999 client = self._require_client(client)
2000 transport = self._get_transport(client)
2001 if "metadata" in self._properties and "metadata" not in self._changes:
2002 self._changes.add("metadata")
2003
2004 info = self._get_upload_arguments(client, content_type, command=command)
2005 headers, object_metadata, content_type = info
2006
2007 hostname = _get_host_name(client._connection)
2008 base_url = _MULTIPART_URL_TEMPLATE.format(
2009 hostname=hostname,
2010 bucket_path=self.bucket.path,
2011 api_version=_API_VERSION,
2012 )
2013 name_value_pairs = []
2014
2015 if self.user_project is not None:
2016 name_value_pairs.append(("userProject", self.user_project))
2017
2018 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
2019 # at rest, object resource metadata will store the version of the Key Management
2020 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
2021 # used to upload a new version of the object then the existing kmsKeyName version
2022 # value can't be used in the upload request and the client instead ignores it.
2023 if (
2024 self.kms_key_name is not None
2025 and "cryptoKeyVersions" not in self.kms_key_name
2026 ):
2027 name_value_pairs.append(("kmsKeyName", self.kms_key_name))
2028
2029 if predefined_acl is not None:
2030 name_value_pairs.append(("predefinedAcl", predefined_acl))
2031
2032 if if_generation_match is not None:
2033 name_value_pairs.append(("ifGenerationMatch", if_generation_match))
2034
2035 if if_generation_not_match is not None:
2036 name_value_pairs.append(("ifGenerationNotMatch", if_generation_not_match))
2037
2038 if if_metageneration_match is not None:
2039 name_value_pairs.append(("ifMetagenerationMatch", if_metageneration_match))
2040
2041 if if_metageneration_not_match is not None:
2042 name_value_pairs.append(
2043 ("ifMetaGenerationNotMatch", if_metageneration_not_match)
2044 )
2045
2046 upload_url = _add_query_parameters(base_url, name_value_pairs)
2047 upload = MultipartUpload(
2048 upload_url, headers=headers, checksum=checksum, retry=retry
2049 )
2050
2051 extra_attributes = {
2052 "url.full": upload_url,
2053 "upload.checksum": f"{checksum}",
2054 }
2055 args = {"timeout": timeout}
2056 with create_trace_span(
2057 name="Storage.MultipartUpload/transmit",
2058 attributes=extra_attributes,
2059 client=client,
2060 api_request=args,
2061 ):
2062 response = upload.transmit(
2063 transport, data, object_metadata, content_type, timeout=timeout
2064 )
2065
2066 return response
2067
2068 def _initiate_resumable_upload(
2069 self,
2070 client,
2071 stream,
2072 content_type,
2073 size,
2074 predefined_acl=None,
2075 extra_headers=None,
2076 chunk_size=None,
2077 if_generation_match=None,
2078 if_generation_not_match=None,
2079 if_metageneration_match=None,
2080 if_metageneration_not_match=None,
2081 timeout=_DEFAULT_TIMEOUT,
2082 checksum="auto",
2083 retry=None,
2084 command=None,
2085 crc32c_checksum_value=None,
2086 ):
2087 """Initiate a resumable upload.
2088
2089 The content type of the upload will be determined in order
2090 of precedence:
2091
2092 - The value passed in to this method (if not :data:`None`)
2093 - The value stored on the current blob
2094 - The default value ('application/octet-stream')
2095
2096 :type client: :class:`~google.cloud.storage.client.Client`
2097 :param client:
2098 (Optional) The client to use. If not passed, falls back to the
2099 ``client`` stored on the blob's bucket.
2100
2101 :type stream: IO[bytes]
2102 :param stream: A bytes IO object open for reading.
2103
2104 :type content_type: str
2105 :param content_type: Type of content being uploaded (or :data:`None`).
2106
2107 :type size: int
2108 :param size:
2109 The number of bytes to be uploaded (which will be read from
2110 ``stream``). If not provided, the upload will be concluded once
2111 ``stream`` is exhausted (or :data:`None`).
2112
2113 :type predefined_acl: str
2114 :param predefined_acl: (Optional) Predefined access control list
2115
2116 :type extra_headers: dict
2117 :param extra_headers:
2118 (Optional) Extra headers to add to standard headers.
2119
2120 :type chunk_size: int
2121 :param chunk_size:
2122 (Optional) Chunk size to use when creating a
2123 :class:`~google.cloud.storage._media.requests.ResumableUpload`.
2124 If not passed, will fall back to the chunk size on the
2125 current blob, if the chunk size of a current blob is also
2126 `None`, will set the default value.
2127 The default value of ``chunk_size`` is 100 MB.
2128
2129 :type if_generation_match: long
2130 :param if_generation_match:
2131 (Optional) See :ref:`using-if-generation-match`
2132
2133 :type if_generation_not_match: long
2134 :param if_generation_not_match:
2135 (Optional) See :ref:`using-if-generation-not-match`
2136
2137 :type if_metageneration_match: long
2138 :param if_metageneration_match:
2139 (Optional) See :ref:`using-if-metageneration-match`
2140
2141 :type if_metageneration_not_match: long
2142 :param if_metageneration_not_match:
2143 (Optional) See :ref:`using-if-metageneration-not-match`
2144
2145 :type timeout: float or tuple
2146 :param timeout:
2147 (Optional) The amount of time, in seconds, to wait
2148 for the server response. See: :ref:`configuring_timeouts`
2149
2150 :type checksum: str
2151 :param checksum:
2152 (Optional) The type of checksum to compute to verify
2153 the integrity of the object. After the upload is complete, the
2154 server-computed checksum of the resulting object will be checked
2155 and google.cloud.storage.exceptions.DataCorruption will be raised on
2156 a mismatch. On a validation failure, the client will attempt to
2157 delete the uploaded object automatically. Supported values are
2158 "md5", "crc32c", "auto" and None. The default is "auto", which will
2159 try to detect if the C extension for crc32c is installed and fall
2160 back to md5 otherwise.
2161
2162 :type retry: google.api_core.retry.Retry
2163 :param retry: (Optional) How to retry the RPC. A None value will disable
2164 retries. A google.api_core.retry.Retry value will enable retries,
2165 and the object will configure backoff and timeout options.
2166
2167 This private method does not accept ConditionalRetryPolicy values
2168 because the information necessary to evaluate the policy is instead
2169 evaluated in blob._do_upload().
2170
2171 See the retry.py source code and docstrings in this package
2172 (google.cloud.storage.retry) for information on retry types and how
2173 to configure them.
2174
2175 :type command: str
2176 :param command:
2177 (Optional) Information about which interface for upload was used,
2178 to be included in the X-Goog-API-Client header. Please leave as None
2179 unless otherwise directed.
2180
2181 :type crc32c_checksum_value: str
2182 :param crc32c_checksum_value: (Optional) This should be the checksum of
2183 the entire contents of `file`. Applicable while uploading object
2184 greater than `_MAX_MULTIPART_SIZE` bytes.
2185
2186 It can be obtained by running
2187
2188 `gcloud storage hash /path/to/your/file`
2189
2190 or
2191
2192 .. code-block:: python
2193
2194 import google_crc32c
2195 import base64
2196
2197 data = b"Hello, world!"
2198 crc32c_int = google_crc32c.value(data)
2199 crc32c_hex = f"{crc32c_int:08x}"
2200 crc32c_bytes = crc32c_int.to_bytes(4, "big")
2201 base64_encoded = base64.b64encode(crc32c_bytes)
2202 crc32c_base64 = base64_encoded.decode("utf-8")
2203
2204 print(crc32c_base64)
2205
2206 Above code block prints 8 char string of base64 encoded big-endian
2207 bytes of 32 bit CRC32c integer.
2208
2209 More details on CRC32c can be found in Appendix B:
2210 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
2211 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
2212
2213 :rtype: tuple
2214 :returns:
2215 Pair of
2216
2217 * The :class:`~google.cloud.storage._media.requests.ResumableUpload`
2218 that was created
2219 * The ``transport`` used to initiate the upload.
2220 """
2221 client = self._require_client(client)
2222 if chunk_size is None:
2223 chunk_size = self.chunk_size
2224 if chunk_size is None:
2225 chunk_size = _DEFAULT_CHUNKSIZE
2226
2227 transport = self._get_transport(client)
2228 if "metadata" in self._properties and "metadata" not in self._changes:
2229 self._changes.add("metadata")
2230 info = self._get_upload_arguments(client, content_type, command=command)
2231 headers, object_metadata, content_type = info
2232 if extra_headers is not None:
2233 headers.update(extra_headers)
2234
2235 if crc32c_checksum_value is not None:
2236 object_metadata["crc32c"] = crc32c_checksum_value
2237
2238 hostname = _get_host_name(client._connection)
2239 base_url = _RESUMABLE_URL_TEMPLATE.format(
2240 hostname=hostname,
2241 bucket_path=self.bucket.path,
2242 api_version=_API_VERSION,
2243 )
2244 name_value_pairs = []
2245
2246 if self.user_project is not None:
2247 name_value_pairs.append(("userProject", self.user_project))
2248
2249 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
2250 # at rest, object resource metadata will store the version of the Key Management
2251 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
2252 # used to upload a new version of the object then the existing kmsKeyName version
2253 # value can't be used in the upload request and the client instead ignores it.
2254 if (
2255 self.kms_key_name is not None
2256 and "cryptoKeyVersions" not in self.kms_key_name
2257 ):
2258 name_value_pairs.append(("kmsKeyName", self.kms_key_name))
2259
2260 if predefined_acl is not None:
2261 name_value_pairs.append(("predefinedAcl", predefined_acl))
2262
2263 if if_generation_match is not None:
2264 name_value_pairs.append(("ifGenerationMatch", if_generation_match))
2265
2266 if if_generation_not_match is not None:
2267 name_value_pairs.append(("ifGenerationNotMatch", if_generation_not_match))
2268
2269 if if_metageneration_match is not None:
2270 name_value_pairs.append(("ifMetagenerationMatch", if_metageneration_match))
2271
2272 if if_metageneration_not_match is not None:
2273 name_value_pairs.append(
2274 ("ifMetaGenerationNotMatch", if_metageneration_not_match)
2275 )
2276
2277 upload_url = _add_query_parameters(base_url, name_value_pairs)
2278 upload = ResumableUpload(
2279 upload_url,
2280 chunk_size,
2281 headers=headers,
2282 checksum=checksum,
2283 retry=retry,
2284 )
2285
2286 upload.initiate(
2287 transport,
2288 stream,
2289 object_metadata,
2290 content_type,
2291 total_bytes=size,
2292 stream_final=False,
2293 timeout=timeout,
2294 )
2295
2296 return upload, transport
2297
2298 def _do_resumable_upload(
2299 self,
2300 client,
2301 stream,
2302 content_type,
2303 size,
2304 predefined_acl,
2305 if_generation_match,
2306 if_generation_not_match,
2307 if_metageneration_match,
2308 if_metageneration_not_match,
2309 timeout=_DEFAULT_TIMEOUT,
2310 checksum="auto",
2311 retry=None,
2312 command=None,
2313 crc32c_checksum_value=None,
2314 ):
2315 """Perform a resumable upload.
2316
2317 Assumes ``chunk_size`` is not :data:`None` on the current blob.
2318 The default value of ``chunk_size`` is 100 MB.
2319
2320 The content type of the upload will be determined in order
2321 of precedence:
2322
2323 - The value passed in to this method (if not :data:`None`)
2324 - The value stored on the current blob
2325 - The default value ('application/octet-stream')
2326
2327 :type client: :class:`~google.cloud.storage.client.Client`
2328 :param client:
2329 (Optional) The client to use. If not passed, falls back to the
2330 ``client`` stored on the blob's bucket.
2331
2332 :type stream: IO[bytes]
2333 :param stream: A bytes IO object open for reading.
2334
2335 :type content_type: str
2336 :param content_type: Type of content being uploaded (or :data:`None`).
2337
2338 :type size: int
2339 :param size:
2340 The number of bytes to be uploaded (which will be read from
2341 ``stream``). If not provided, the upload will be concluded once
2342 ``stream`` is exhausted (or :data:`None`).
2343
2344 :type predefined_acl: str
2345 :param predefined_acl: (Optional) Predefined access control list
2346
2347 :type if_generation_match: long
2348 :param if_generation_match:
2349 (Optional) See :ref:`using-if-generation-match`
2350
2351 :type if_generation_not_match: long
2352 :param if_generation_not_match:
2353 (Optional) See :ref:`using-if-generation-not-match`
2354
2355 :type if_metageneration_match: long
2356 :param if_metageneration_match:
2357 (Optional) See :ref:`using-if-metageneration-match`
2358
2359 :type if_metageneration_not_match: long
2360 :param if_metageneration_not_match:
2361 (Optional) See :ref:`using-if-metageneration-not-match`
2362
2363 :type timeout: float or tuple
2364 :param timeout:
2365 (Optional) The amount of time, in seconds, to wait
2366 for the server response. See: :ref:`configuring_timeouts`
2367
2368 :type checksum: str
2369 :param checksum:
2370 (Optional) The type of checksum to compute to verify
2371 the integrity of the object. After the upload is complete, the
2372 server-computed checksum of the resulting object will be checked
2373 and google.cloud.storage.exceptions.DataCorruption will be raised on
2374 a mismatch. On a validation failure, the client will attempt to
2375 delete the uploaded object automatically. Supported values are
2376 "md5", "crc32c", "auto" and None. The default is "auto", which will
2377 try to detect if the C extension for crc32c is installed and fall
2378 back to md5 otherwise.
2379
2380 :type retry: google.api_core.retry.Retry
2381 :param retry: (Optional) How to retry the RPC. A None value will disable
2382 retries. A google.api_core.retry.Retry value will enable retries,
2383 and the object will configure backoff and timeout options.
2384
2385 This private method does not accept ConditionalRetryPolicy values
2386 because the information necessary to evaluate the policy is instead
2387 evaluated in blob._do_upload().
2388
2389 See the retry.py source code and docstrings in this package
2390 (google.cloud.storage.retry) for information on retry types and how
2391 to configure them.
2392
2393 :type command: str
2394 :param command:
2395 (Optional) Information about which interface for upload was used,
2396 to be included in the X-Goog-API-Client header. Please leave as None
2397 unless otherwise directed.
2398
2399 :type crc32c_checksum_value: str
2400 :param crc32c_checksum_value: (Optional) This should be the checksum of
2401 the entire contents of `stream`. Applicable while uploading object
2402 greater than `_MAX_MULTIPART_SIZE` bytes.
2403
2404 It can be obtained by running
2405
2406 `gcloud storage hash /path/to/your/file`
2407
2408 or
2409
2410 .. code-block:: python
2411
2412 import google_crc32c
2413 import base64
2414
2415 data = b"Hello, world!"
2416 crc32c_int = google_crc32c.value(data)
2417 crc32c_hex = f"{crc32c_int:08x}"
2418 crc32c_bytes = crc32c_int.to_bytes(4, "big")
2419 base64_encoded = base64.b64encode(crc32c_bytes)
2420 crc32c_base64 = base64_encoded.decode("utf-8")
2421
2422 print(crc32c_base64)
2423
2424 Above code block prints 8 char string of base64 encoded big-endian
2425 bytes of 32 bit CRC32c integer.
2426
2427 More details on CRC32c can be found in Appendix B:
2428 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
2429 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
2430
2431 :rtype: :class:`~requests.Response`
2432 :returns: The "200 OK" response object returned after the final chunk
2433 is uploaded.
2434 """
2435 upload, transport = self._initiate_resumable_upload(
2436 client,
2437 stream,
2438 content_type,
2439 size,
2440 predefined_acl=predefined_acl,
2441 if_generation_match=if_generation_match,
2442 if_generation_not_match=if_generation_not_match,
2443 if_metageneration_match=if_metageneration_match,
2444 if_metageneration_not_match=if_metageneration_not_match,
2445 timeout=timeout,
2446 checksum=checksum,
2447 retry=retry,
2448 command=command,
2449 crc32c_checksum_value=crc32c_checksum_value,
2450 )
2451 extra_attributes = {
2452 "url.full": upload.resumable_url,
2453 "upload.chunk_size": upload.chunk_size,
2454 "upload.checksum": f"{checksum}",
2455 }
2456 args = {"timeout": timeout}
2457 with create_trace_span(
2458 name="Storage.ResumableUpload/transmitNextChunk",
2459 attributes=extra_attributes,
2460 client=client,
2461 api_request=args,
2462 ):
2463 while not upload.finished:
2464 try:
2465 response = upload.transmit_next_chunk(transport, timeout=timeout)
2466 except DataCorruption:
2467 # Attempt to delete the corrupted object.
2468 self.delete()
2469 raise
2470 return response
2471
2472 def _do_upload(
2473 self,
2474 client,
2475 stream,
2476 content_type,
2477 size,
2478 predefined_acl,
2479 if_generation_match,
2480 if_generation_not_match,
2481 if_metageneration_match,
2482 if_metageneration_not_match,
2483 timeout=_DEFAULT_TIMEOUT,
2484 checksum="auto",
2485 retry=None,
2486 command=None,
2487 crc32c_checksum_value=None,
2488 ):
2489 """Determine an upload strategy and then perform the upload.
2490
2491 If the size of the data to be uploaded exceeds 8 MB a resumable media
2492 request will be used, otherwise the content and the metadata will be
2493 uploaded in a single multipart upload request.
2494
2495 The content type of the upload will be determined in order
2496 of precedence:
2497
2498 - The value passed in to this method (if not :data:`None`)
2499 - The value stored on the current blob
2500 - The default value ('application/octet-stream')
2501
2502 :type client: :class:`~google.cloud.storage.client.Client`
2503 :param client:
2504 (Optional) The client to use. If not passed, falls back to the
2505 ``client`` stored on the blob's bucket.
2506
2507 :type stream: IO[bytes]
2508 :param stream: A bytes IO object open for reading.
2509
2510 :type content_type: str
2511 :param content_type: Type of content being uploaded (or :data:`None`).
2512
2513 :type size: int
2514 :param size:
2515 The number of bytes to be uploaded (which will be read from
2516 ``stream``). If not provided, the upload will be concluded once
2517 ``stream`` is exhausted (or :data:`None`).
2518
2519 :type predefined_acl: str
2520 :param predefined_acl: (Optional) Predefined access control list
2521
2522 :type if_generation_match: long
2523 :param if_generation_match:
2524 (Optional) See :ref:`using-if-generation-match`
2525
2526 :type if_generation_not_match: long
2527 :param if_generation_not_match:
2528 (Optional) See :ref:`using-if-generation-not-match`
2529
2530 :type if_metageneration_match: long
2531 :param if_metageneration_match:
2532 (Optional) See :ref:`using-if-metageneration-match`
2533
2534 :type if_metageneration_not_match: long
2535 :param if_metageneration_not_match:
2536 (Optional) See :ref:`using-if-metageneration-not-match`
2537
2538 :type timeout: float or tuple
2539 :param timeout:
2540 (Optional) The amount of time, in seconds, to wait
2541 for the server response. See: :ref:`configuring_timeouts`
2542
2543 :type checksum: str
2544 :param checksum:
2545 (Optional) The type of checksum to compute to verify
2546 the integrity of the object. If the upload is completed in a single
2547 request, the checksum will be entirely precomputed and the remote
2548 server will handle verification and error handling. If the upload
2549 is too large and must be transmitted in multiple requests, the
2550 checksum will be incrementally computed and the client will handle
2551 verification and error handling, raising
2552 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2553 attempting to delete the corrupted file. Supported values are
2554 "md5", "crc32c", "auto" and None. The default is "auto", which will
2555 try to detect if the C extension for crc32c is installed and fall
2556 back to md5 otherwise.
2557
2558 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2559 :param retry: (Optional) How to retry the RPC. A None value will disable
2560 retries. A google.api_core.retry.Retry value will enable retries,
2561 and the object will define retriable response codes and errors and
2562 configure backoff and timeout options.
2563
2564 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2565 Retry object and activates it only if certain conditions are met.
2566 This class exists to provide safe defaults for RPC calls that are
2567 not technically safe to retry normally (due to potential data
2568 duplication or other side-effects) but become safe to retry if a
2569 condition such as if_generation_match is set.
2570
2571 See the retry.py source code and docstrings in this package
2572 (google.cloud.storage.retry) for information on retry types and how
2573 to configure them.
2574
2575 :type command: str
2576 :param command:
2577 (Optional) Information about which interface for upload was used,
2578 to be included in the X-Goog-API-Client header. Please leave as None
2579 unless otherwise directed.
2580
2581 :type crc32c_checksum_value: str
2582 :param crc32c_checksum_value: (Optional) This should be the checksum of
2583 the entire contents of `file_obj`. Applicable while uploading object
2584 greater than `_MAX_MULTIPART_SIZE` bytes.
2585
2586 It can be obtained by running
2587
2588 `gcloud storage hash /path/to/your/file`
2589
2590 or
2591
2592 .. code-block:: python
2593
2594 import google_crc32c
2595 import base64
2596
2597 data = b"Hello, world!"
2598 crc32c_int = google_crc32c.value(data)
2599 crc32c_hex = f"{crc32c_int:08x}"
2600 crc32c_bytes = crc32c_int.to_bytes(4, "big")
2601 base64_encoded = base64.b64encode(crc32c_bytes)
2602 crc32c_base64 = base64_encoded.decode("utf-8")
2603
2604 print(crc32c_base64)
2605
2606 Above code block prints 8 char string of base64 encoded big-endian
2607 bytes of 32 bit CRC32c integer.
2608
2609 More details on CRC32c can be found in Appendix B:
2610 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
2611 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
2612
2613 :rtype: dict
2614 :returns: The parsed JSON from the "200 OK" response. This will be the
2615 **only** response in the multipart case and it will be the
2616 **final** response in the resumable case.
2617 """
2618
2619 # Handle ConditionalRetryPolicy.
2620 if isinstance(retry, ConditionalRetryPolicy):
2621 # Conditional retries are designed for non-media calls, which change
2622 # arguments into query_params dictionaries. Media operations work
2623 # differently, so here we make a "fake" query_params to feed to the
2624 # ConditionalRetryPolicy.
2625 query_params = {
2626 "ifGenerationMatch": if_generation_match,
2627 "ifMetagenerationMatch": if_metageneration_match,
2628 }
2629 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
2630
2631 if size is not None and size <= _MAX_MULTIPART_SIZE:
2632 response = self._do_multipart_upload(
2633 client,
2634 stream,
2635 content_type,
2636 size,
2637 predefined_acl,
2638 if_generation_match,
2639 if_generation_not_match,
2640 if_metageneration_match,
2641 if_metageneration_not_match,
2642 timeout=timeout,
2643 checksum=checksum,
2644 retry=retry,
2645 command=command,
2646 )
2647 else:
2648 response = self._do_resumable_upload(
2649 client,
2650 stream,
2651 content_type,
2652 size,
2653 predefined_acl,
2654 if_generation_match,
2655 if_generation_not_match,
2656 if_metageneration_match,
2657 if_metageneration_not_match,
2658 timeout=timeout,
2659 checksum=checksum,
2660 retry=retry,
2661 command=command,
2662 crc32c_checksum_value=crc32c_checksum_value,
2663 )
2664
2665 return response.json()
2666
2667 def _prep_and_do_upload(
2668 self,
2669 file_obj,
2670 rewind=False,
2671 size=None,
2672 content_type=None,
2673 client=None,
2674 predefined_acl=None,
2675 if_generation_match=None,
2676 if_generation_not_match=None,
2677 if_metageneration_match=None,
2678 if_metageneration_not_match=None,
2679 timeout=_DEFAULT_TIMEOUT,
2680 checksum="auto",
2681 retry=DEFAULT_RETRY,
2682 command=None,
2683 crc32c_checksum_value=None,
2684 ):
2685 """Upload the contents of this blob from a file-like object.
2686
2687 The content type of the upload will be determined in order
2688 of precedence:
2689
2690 - The value passed in to this method (if not :data:`None`)
2691 - The value stored on the current blob
2692 - The default value ('application/octet-stream')
2693
2694 .. note::
2695 The effect of uploading to an existing blob depends on the
2696 "versioning" and "lifecycle" policies defined on the blob's
2697 bucket. In the absence of those policies, upload will
2698 overwrite any existing contents.
2699
2700 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2701 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2702 API documents for details.
2703
2704 If the size of the data to be uploaded exceeds 8 MB a resumable media
2705 request will be used, otherwise the content and the metadata will be
2706 uploaded in a single multipart upload request.
2707
2708 For more fine-grained over the upload process, check out
2709 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
2710
2711 If :attr:`user_project` is set on the bucket, bills the API request
2712 to that project.
2713
2714 :type file_obj: file
2715 :param file_obj: A file handle opened in binary mode for reading.
2716
2717 :type rewind: bool
2718 :param rewind:
2719 If True, seek to the beginning of the file handle before writing
2720 the file to Cloud Storage.
2721
2722 :type size: int
2723 :param size:
2724 The number of bytes to be uploaded (which will be read from
2725 ``file_obj``). If not provided, the upload will be concluded once
2726 ``file_obj`` is exhausted.
2727
2728 :type content_type: str
2729 :param content_type: (Optional) Type of content being uploaded.
2730
2731 :type client: :class:`~google.cloud.storage.client.Client`
2732 :param client:
2733 (Optional) The client to use. If not passed, falls back to the
2734 ``client`` stored on the blob's bucket.
2735
2736 :type predefined_acl: str
2737 :param predefined_acl: (Optional) Predefined access control list
2738
2739 :type if_generation_match: long
2740 :param if_generation_match:
2741 (Optional) See :ref:`using-if-generation-match`
2742
2743 :type if_generation_not_match: long
2744 :param if_generation_not_match:
2745 (Optional) See :ref:`using-if-generation-not-match`
2746
2747 :type if_metageneration_match: long
2748 :param if_metageneration_match:
2749 (Optional) See :ref:`using-if-metageneration-match`
2750
2751 :type if_metageneration_not_match: long
2752 :param if_metageneration_not_match:
2753 (Optional) See :ref:`using-if-metageneration-not-match`
2754
2755 :type timeout: float or tuple
2756 :param timeout:
2757 (Optional) The amount of time, in seconds, to wait
2758 for the server response. See: :ref:`configuring_timeouts`
2759
2760 :type checksum: str
2761 :param checksum:
2762 (Optional) The type of checksum to compute to verify
2763 the integrity of the object. If the upload is completed in a single
2764 request, the checksum will be entirely precomputed and the remote
2765 server will handle verification and error handling. If the upload
2766 is too large and must be transmitted in multiple requests, the
2767 checksum will be incrementally computed and the client will handle
2768 verification and error handling, raising
2769 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2770 attempting to delete the corrupted file. Supported values are
2771 "md5", "crc32c", "auto" and None. The default is "auto", which will
2772 try to detect if the C extension for crc32c is installed and fall
2773 back to md5 otherwise.
2774
2775 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2776 :param retry: (Optional) How to retry the RPC. A None value will disable
2777 retries. A google.api_core.retry.Retry value will enable retries,
2778 and the object will define retriable response codes and errors and
2779 configure backoff and timeout options.
2780
2781 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2782 Retry object and activates it only if certain conditions are met.
2783 This class exists to provide safe defaults for RPC calls that are
2784 not technically safe to retry normally (due to potential data
2785 duplication or other side-effects) but become safe to retry if a
2786 condition such as if_generation_match is set.
2787
2788 See the retry.py source code and docstrings in this package
2789 (google.cloud.storage.retry) for information on retry types and how
2790 to configure them.
2791
2792 :type command: str
2793 :param command:
2794 (Optional) Information about which interface for upload was used,
2795 to be included in the X-Goog-API-Client header. Please leave as None
2796 unless otherwise directed.
2797
2798 :type crc32c_checksum_value: str
2799 :param crc32c_checksum_value: (Optional) This should be the checksum of
2800 the entire contents of `file_obj`. Applicable while uploading object
2801 greater than `_MAX_MULTIPART_SIZE` bytes.
2802
2803 It can be obtained by running
2804
2805 `gcloud storage hash /path/to/your/file`
2806
2807 or
2808
2809 .. code-block:: python
2810
2811 import google_crc32c
2812 import base64
2813
2814 data = b"Hello, world!"
2815 crc32c_int = google_crc32c.value(data)
2816 crc32c_hex = f"{crc32c_int:08x}"
2817 crc32c_bytes = crc32c_int.to_bytes(4, "big")
2818 base64_encoded = base64.b64encode(crc32c_bytes)
2819 crc32c_base64 = base64_encoded.decode("utf-8")
2820
2821 print(crc32c_base64)
2822
2823 Above code block prints 8 char string of base64 encoded big-endian
2824 bytes of 32 bit CRC32c integer.
2825
2826 More details on CRC32c can be found in Appendix B:
2827 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
2828 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
2829
2830 :raises: :class:`~google.cloud.exceptions.GoogleCloudError`
2831 if the upload response returns an error status.
2832 """
2833 _maybe_rewind(file_obj, rewind=rewind)
2834 predefined_acl = ACL.validate_predefined(predefined_acl)
2835
2836 try:
2837 created_json = self._do_upload(
2838 client,
2839 file_obj,
2840 content_type,
2841 size,
2842 predefined_acl,
2843 if_generation_match,
2844 if_generation_not_match,
2845 if_metageneration_match,
2846 if_metageneration_not_match,
2847 timeout=timeout,
2848 checksum=checksum,
2849 retry=retry,
2850 command=command,
2851 crc32c_checksum_value=crc32c_checksum_value,
2852 )
2853 self._set_properties(created_json)
2854 except InvalidResponse as exc:
2855 _raise_from_invalid_response(exc)
2856
2857 def upload_from_file(
2858 self,
2859 file_obj,
2860 rewind=False,
2861 size=None,
2862 content_type=None,
2863 client=None,
2864 predefined_acl=None,
2865 if_generation_match=None,
2866 if_generation_not_match=None,
2867 if_metageneration_match=None,
2868 if_metageneration_not_match=None,
2869 timeout=_DEFAULT_TIMEOUT,
2870 checksum="auto",
2871 retry=DEFAULT_RETRY,
2872 crc32c_checksum_value=None,
2873 ):
2874 """Upload the contents of this blob from a file-like object.
2875
2876 The content type of the upload will be determined in order
2877 of precedence:
2878
2879 - The value passed in to this method (if not :data:`None`)
2880 - The value stored on the current blob
2881 - The default value ('application/octet-stream')
2882
2883 .. note::
2884 The effect of uploading to an existing blob depends on the
2885 "versioning" and "lifecycle" policies defined on the blob's
2886 bucket. In the absence of those policies, upload will
2887 overwrite any existing contents.
2888
2889 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
2890 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
2891 API documents for details.
2892
2893 If the size of the data to be uploaded exceeds 8 MB a resumable media
2894 request will be used, otherwise the content and the metadata will be
2895 uploaded in a single multipart upload request.
2896
2897 For more fine-grained over the upload process, check out
2898 [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html).
2899
2900 If :attr:`user_project` is set on the bucket, bills the API request
2901 to that project.
2902
2903 :type file_obj: file
2904 :param file_obj: A file handle opened in binary mode for reading.
2905
2906 :type rewind: bool
2907 :param rewind:
2908 If True, seek to the beginning of the file handle before writing
2909 the file to Cloud Storage.
2910
2911 :type size: int
2912 :param size:
2913 The number of bytes to be uploaded (which will be read from
2914 ``file_obj``). If not provided, the upload will be concluded once
2915 ``file_obj`` is exhausted.
2916
2917 :type content_type: str
2918 :param content_type: (Optional) Type of content being uploaded.
2919
2920 :type client: :class:`~google.cloud.storage.client.Client`
2921 :param client:
2922 (Optional) The client to use. If not passed, falls back to the
2923 ``client`` stored on the blob's bucket.
2924
2925 :type predefined_acl: str
2926 :param predefined_acl: (Optional) Predefined access control list
2927
2928 :type if_generation_match: long
2929 :param if_generation_match:
2930 (Optional) See :ref:`using-if-generation-match`
2931
2932 :type if_generation_not_match: long
2933 :param if_generation_not_match:
2934 (Optional) See :ref:`using-if-generation-not-match`
2935
2936 :type if_metageneration_match: long
2937 :param if_metageneration_match:
2938 (Optional) See :ref:`using-if-metageneration-match`
2939
2940 :type if_metageneration_not_match: long
2941 :param if_metageneration_not_match:
2942 (Optional) See :ref:`using-if-metageneration-not-match`
2943
2944 :type timeout: float or tuple
2945 :param timeout:
2946 (Optional) The amount of time, in seconds, to wait
2947 for the server response. See: :ref:`configuring_timeouts`
2948
2949 :type checksum: str
2950 :param checksum:
2951 (Optional) The type of checksum to compute to verify
2952 the integrity of the object. If the upload is completed in a single
2953 request, the checksum will be entirely precomputed and the remote
2954 server will handle verification and error handling. If the upload
2955 is too large and must be transmitted in multiple requests, the
2956 checksum will be incrementally computed and the client will handle
2957 verification and error handling, raising
2958 google.cloud.storage.exceptions.DataCorruption on a mismatch and
2959 attempting to delete the corrupted file. Supported values are
2960 "md5", "crc32c", "auto" and None. The default is "auto", which will
2961 try to detect if the C extension for crc32c is installed and fall
2962 back to md5 otherwise.
2963
2964 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
2965 :param retry: (Optional) How to retry the RPC. A None value will disable
2966 retries. A google.api_core.retry.Retry value will enable retries,
2967 and the object will define retriable response codes and errors and
2968 configure backoff and timeout options.
2969
2970 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
2971 Retry object and activates it only if certain conditions are met.
2972 This class exists to provide safe defaults for RPC calls that are
2973 not technically safe to retry normally (due to potential data
2974 duplication or other side-effects) but become safe to retry if a
2975 condition such as if_generation_match is set.
2976
2977 See the retry.py source code and docstrings in this package
2978 (google.cloud.storage.retry) for information on retry types and how
2979 to configure them.
2980
2981 :type crc32c_checksum_value: str
2982 :param crc32c_checksum_value: (Optional) This should be the checksum of
2983 the entire contents of `file_obj`. Applicable while uploading object
2984 greater than `_MAX_MULTIPART_SIZE` bytes.
2985
2986 It can be obtained by running
2987
2988 `gcloud storage hash /path/to/your/file`
2989
2990 or
2991
2992 .. code-block:: python
2993
2994 import google_crc32c
2995 import base64
2996
2997 data = b"Hello, world!"
2998 crc32c_int = google_crc32c.value(data)
2999 crc32c_hex = f"{crc32c_int:08x}"
3000 crc32c_bytes = crc32c_int.to_bytes(4, "big")
3001 base64_encoded = base64.b64encode(crc32c_bytes)
3002 crc32c_base64 = base64_encoded.decode("utf-8")
3003
3004 print(crc32c_base64)
3005
3006 Above code block prints 8 char string of base64 encoded big-endian
3007 bytes of 32 bit CRC32c integer.
3008
3009 More details on CRC32c can be found in Appendix B:
3010 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
3011 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
3012
3013 :raises: :class:`~google.cloud.exceptions.GoogleCloudError`
3014 if the upload response returns an error status.
3015 """
3016 with create_trace_span(name="Storage.Blob.uploadFromFile"):
3017 self._prep_and_do_upload(
3018 file_obj,
3019 rewind=rewind,
3020 size=size,
3021 content_type=content_type,
3022 client=client,
3023 predefined_acl=predefined_acl,
3024 if_generation_match=if_generation_match,
3025 if_generation_not_match=if_generation_not_match,
3026 if_metageneration_match=if_metageneration_match,
3027 if_metageneration_not_match=if_metageneration_not_match,
3028 timeout=timeout,
3029 checksum=checksum,
3030 retry=retry,
3031 crc32c_checksum_value=crc32c_checksum_value,
3032 )
3033
3034 def _handle_filename_and_upload(self, filename, content_type=None, *args, **kwargs):
3035 """Upload this blob's contents from the content of a named file.
3036
3037 :type filename: str
3038 :param filename: The path to the file.
3039
3040 :type content_type: str
3041 :param content_type: (Optional) Type of content being uploaded.
3042
3043 For *args and **kwargs, refer to the documentation for upload_from_filename() for more information.
3044 """
3045
3046 content_type = self._get_content_type(content_type, filename=filename)
3047
3048 with open(filename, "rb") as file_obj:
3049 total_bytes = os.fstat(file_obj.fileno()).st_size
3050 self._prep_and_do_upload(
3051 file_obj,
3052 content_type=content_type,
3053 size=total_bytes,
3054 *args,
3055 **kwargs,
3056 )
3057
3058 def upload_from_filename(
3059 self,
3060 filename,
3061 content_type=None,
3062 client=None,
3063 predefined_acl=None,
3064 if_generation_match=None,
3065 if_generation_not_match=None,
3066 if_metageneration_match=None,
3067 if_metageneration_not_match=None,
3068 timeout=_DEFAULT_TIMEOUT,
3069 checksum="auto",
3070 retry=DEFAULT_RETRY,
3071 crc32c_checksum_value=None,
3072 ):
3073 """Upload this blob's contents from the content of a named file.
3074
3075 The content type of the upload will be determined in order
3076 of precedence:
3077
3078 - The value passed in to this method (if not :data:`None`)
3079 - The value stored on the current blob
3080 - The value given by ``mimetypes.guess_type``
3081 - The default value ('application/octet-stream')
3082
3083 .. note::
3084 The effect of uploading to an existing blob depends on the
3085 "versioning" and "lifecycle" policies defined on the blob's
3086 bucket. In the absence of those policies, upload will
3087 overwrite any existing contents.
3088
3089 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3090 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3091 API documents for details.
3092
3093 If :attr:`user_project` is set on the bucket, bills the API request
3094 to that project.
3095
3096 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-upload-encrypted-file#storage_upload_encrypted_file-python)
3097 to upload a file with a
3098 [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied).
3099
3100 :type filename: str
3101 :param filename: The path to the file.
3102
3103 :type content_type: str
3104 :param content_type: (Optional) Type of content being uploaded.
3105
3106 :type client: :class:`~google.cloud.storage.client.Client`
3107 :param client:
3108 (Optional) The client to use. If not passed, falls back to the
3109 ``client`` stored on the blob's bucket.
3110
3111 :type predefined_acl: str
3112 :param predefined_acl: (Optional) Predefined access control list
3113
3114 :type if_generation_match: long
3115 :param if_generation_match:
3116 (Optional) See :ref:`using-if-generation-match`
3117
3118 :type if_generation_not_match: long
3119 :param if_generation_not_match:
3120 (Optional) See :ref:`using-if-generation-not-match`
3121
3122 :type if_metageneration_match: long
3123 :param if_metageneration_match:
3124 (Optional) See :ref:`using-if-metageneration-match`
3125
3126 :type if_metageneration_not_match: long
3127 :param if_metageneration_not_match:
3128 (Optional) See :ref:`using-if-metageneration-not-match`
3129
3130 :type timeout: float or tuple
3131 :param timeout:
3132 (Optional) The amount of time, in seconds, to wait
3133 for the server response. See: :ref:`configuring_timeouts`
3134
3135 :type checksum: str
3136 :param checksum:
3137 (Optional) The type of checksum to compute to verify
3138 the integrity of the object. If the upload is completed in a single
3139 request, the checksum will be entirely precomputed and the remote
3140 server will handle verification and error handling. If the upload
3141 is too large and must be transmitted in multiple requests, the
3142 checksum will be incrementally computed and the client will handle
3143 verification and error handling, raising
3144 google.cloud.storage.exceptions.DataCorruption on a mismatch and
3145 attempting to delete the corrupted file. Supported values are
3146 "md5", "crc32c", "auto" and None. The default is "auto", which will
3147 try to detect if the C extension for crc32c is installed and fall
3148 back to md5 otherwise.
3149
3150 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3151 :param retry: (Optional) How to retry the RPC. A None value will disable
3152 retries. A google.api_core.retry.Retry value will enable retries,
3153 and the object will define retriable response codes and errors and
3154 configure backoff and timeout options.
3155
3156 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3157 Retry object and activates it only if certain conditions are met.
3158 This class exists to provide safe defaults for RPC calls that are
3159 not technically safe to retry normally (due to potential data
3160 duplication or other side-effects) but become safe to retry if a
3161 condition such as if_generation_match is set.
3162
3163 See the retry.py source code and docstrings in this package
3164 (google.cloud.storage.retry) for information on retry types and how
3165 to configure them.
3166
3167 :type crc32c_checksum_value: str
3168 :param crc32c_checksum_value: (Optional) This should be the checksum of
3169 the entire contents of `filename`. Applicable while uploading object
3170 greater than `_MAX_MULTIPART_SIZE` bytes.
3171
3172 It can be obtained by running
3173
3174 `gcloud storage hash /path/to/your/file`
3175
3176 or
3177
3178 .. code-block:: python
3179
3180 import google_crc32c
3181 import base64
3182
3183 data = b"Hello, world!"
3184 crc32c_int = google_crc32c.value(data)
3185 crc32c_hex = f"{crc32c_int:08x}"
3186 crc32c_bytes = crc32c_int.to_bytes(4, "big")
3187 base64_encoded = base64.b64encode(crc32c_bytes)
3188 crc32c_base64 = base64_encoded.decode("utf-8")
3189
3190 print(crc32c_base64)
3191
3192 Above code block prints 8 char string of base64 encoded big-endian
3193 bytes of 32 bit CRC32c integer.
3194
3195 More details on CRC32c can be found in Appendix B:
3196 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
3197 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
3198 """
3199 with create_trace_span(name="Storage.Blob.uploadFromFilename"):
3200 self._handle_filename_and_upload(
3201 filename,
3202 content_type=content_type,
3203 client=client,
3204 predefined_acl=predefined_acl,
3205 if_generation_match=if_generation_match,
3206 if_generation_not_match=if_generation_not_match,
3207 if_metageneration_match=if_metageneration_match,
3208 if_metageneration_not_match=if_metageneration_not_match,
3209 timeout=timeout,
3210 checksum=checksum,
3211 retry=retry,
3212 crc32c_checksum_value=crc32c_checksum_value,
3213 )
3214
3215 def upload_from_string(
3216 self,
3217 data,
3218 content_type="text/plain",
3219 client=None,
3220 predefined_acl=None,
3221 if_generation_match=None,
3222 if_generation_not_match=None,
3223 if_metageneration_match=None,
3224 if_metageneration_not_match=None,
3225 timeout=_DEFAULT_TIMEOUT,
3226 checksum="auto",
3227 retry=DEFAULT_RETRY,
3228 crc32c_checksum_value=None,
3229 ):
3230 """Upload contents of this blob from the provided string.
3231
3232 .. note::
3233 The effect of uploading to an existing blob depends on the
3234 "versioning" and "lifecycle" policies defined on the blob's
3235 bucket. In the absence of those policies, upload will
3236 overwrite any existing contents.
3237
3238 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3239 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3240 API documents for details.
3241
3242 If :attr:`user_project` is set on the bucket, bills the API request
3243 to that project.
3244
3245 :type data: bytes or str
3246 :param data:
3247 The data to store in this blob. If the value is text, it will be
3248 encoded as UTF-8.
3249
3250 :type content_type: str
3251 :param content_type:
3252 (Optional) Type of content being uploaded. Defaults to
3253 ``'text/plain'``.
3254
3255 :type client: :class:`~google.cloud.storage.client.Client`
3256 :param client:
3257 (Optional) The client to use. If not passed, falls back to the
3258 ``client`` stored on the blob's bucket.
3259
3260 :type predefined_acl: str
3261 :param predefined_acl: (Optional) Predefined access control list
3262
3263 :type if_generation_match: long
3264 :param if_generation_match:
3265 (Optional) See :ref:`using-if-generation-match`
3266
3267 :type if_generation_not_match: long
3268 :param if_generation_not_match:
3269 (Optional) See :ref:`using-if-generation-not-match`
3270
3271 :type if_metageneration_match: long
3272 :param if_metageneration_match:
3273 (Optional) See :ref:`using-if-metageneration-match`
3274
3275 :type if_metageneration_not_match: long
3276 :param if_metageneration_not_match:
3277 (Optional) See :ref:`using-if-metageneration-not-match`
3278
3279 :type timeout: float or tuple
3280 :param timeout:
3281 (Optional) The amount of time, in seconds, to wait
3282 for the server response. See: :ref:`configuring_timeouts`
3283
3284 :type checksum: str
3285 :param checksum:
3286 (Optional) The type of checksum to compute to verify
3287 the integrity of the object. If the upload is completed in a single
3288 request, the checksum will be entirely precomputed and the remote
3289 server will handle verification and error handling. If the upload
3290 is too large and must be transmitted in multiple requests, the
3291 checksum will be incrementally computed and the client will handle
3292 verification and error handling, raising
3293 google.cloud.storage.exceptions.DataCorruption on a mismatch and
3294 attempting to delete the corrupted file. Supported values are
3295 "md5", "crc32c", "auto" and None. The default is "auto", which will
3296 try to detect if the C extension for crc32c is installed and fall
3297 back to md5 otherwise.
3298
3299 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3300 :param retry: (Optional) How to retry the RPC. A None value will disable
3301 retries. A google.api_core.retry.Retry value will enable retries,
3302 and the object will define retriable response codes and errors and
3303 configure backoff and timeout options.
3304
3305 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3306 Retry object and activates it only if certain conditions are met.
3307 This class exists to provide safe defaults for RPC calls that are
3308 not technically safe to retry normally (due to potential data
3309 duplication or other side-effects) but become safe to retry if a
3310 condition such as if_generation_match is set.
3311
3312 See the retry.py source code and docstrings in this package
3313 (google.cloud.storage.retry) for information on retry types and how
3314 to configure them.
3315
3316 :type crc32c_checksum_value: str
3317 :param crc32c_checksum_value: (Optional) This should be the checksum of
3318 the entire contents of `file_obj`. Applicable while uploading object
3319 greater than `_MAX_MULTIPART_SIZE` bytes.
3320
3321 It can be obtained by running
3322
3323 `gcloud storage hash /path/to/your/file`
3324
3325 or
3326
3327 .. code-block:: python
3328
3329 import google_crc32c
3330 import base64
3331
3332 data = b"Hello, world!"
3333 crc32c_int = google_crc32c.value(data)
3334 crc32c_hex = f"{crc32c_int:08x}"
3335 crc32c_bytes = crc32c_int.to_bytes(4, "big")
3336 base64_encoded = base64.b64encode(crc32c_bytes)
3337 crc32c_base64 = base64_encoded.decode("utf-8")
3338
3339 print(crc32c_base64)
3340
3341 Above code block prints 8 char string of base64 encoded big-endian
3342 bytes of 32 bit CRC32c integer.
3343
3344 More details on CRC32c can be found in Appendix B:
3345 https://datatracker.ietf.org/doc/html/rfc4960#appendix-B and
3346 base64: https://datatracker.ietf.org/doc/html/rfc4648#section-4
3347 """
3348 with create_trace_span(name="Storage.Blob.uploadFromString"):
3349 data = _to_bytes(data, encoding="utf-8")
3350 string_buffer = BytesIO(data)
3351 self.upload_from_file(
3352 file_obj=string_buffer,
3353 size=len(data),
3354 content_type=content_type,
3355 client=client,
3356 predefined_acl=predefined_acl,
3357 if_generation_match=if_generation_match,
3358 if_generation_not_match=if_generation_not_match,
3359 if_metageneration_match=if_metageneration_match,
3360 if_metageneration_not_match=if_metageneration_not_match,
3361 timeout=timeout,
3362 checksum=checksum,
3363 retry=retry,
3364 crc32c_checksum_value=crc32c_checksum_value,
3365 )
3366
3367 def create_resumable_upload_session(
3368 self,
3369 content_type=None,
3370 size=None,
3371 origin=None,
3372 client=None,
3373 timeout=_DEFAULT_TIMEOUT,
3374 checksum="auto",
3375 predefined_acl=None,
3376 if_generation_match=None,
3377 if_generation_not_match=None,
3378 if_metageneration_match=None,
3379 if_metageneration_not_match=None,
3380 retry=DEFAULT_RETRY,
3381 ):
3382 """Create a resumable upload session.
3383
3384 Resumable upload sessions allow you to start an upload session from
3385 one client and complete the session in another. This method is called
3386 by the initiator to set the metadata and limits. The initiator then
3387 passes the session URL to the client that will upload the binary data.
3388 The client performs a PUT request on the session URL to complete the
3389 upload. This process allows untrusted clients to upload to an
3390 access-controlled bucket.
3391
3392 For more details, see the
3393 documentation on [`signed URLs`](https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable).
3394
3395 The content type of the upload will be determined in order
3396 of precedence:
3397
3398 - The value passed in to this method (if not :data:`None`)
3399 - The value stored on the current blob
3400 - The default value ('application/octet-stream')
3401
3402 .. note::
3403 The effect of uploading to an existing blob depends on the
3404 "versioning" and "lifecycle" policies defined on the blob's
3405 bucket. In the absence of those policies, upload will
3406 overwrite any existing contents.
3407
3408 See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning)
3409 and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle)
3410 API documents for details.
3411
3412 If :attr:`encryption_key` is set, the blob will be encrypted with
3413 a [`customer-supplied`](https://cloud.google.com/storage/docs/encryption#customer-supplied)
3414 encryption key.
3415
3416 If :attr:`user_project` is set on the bucket, bills the API request
3417 to that project.
3418
3419 :type size: int
3420 :param size:
3421 (Optional) The maximum number of bytes that can be uploaded using
3422 this session. If the size is not known when creating the session,
3423 this should be left blank.
3424
3425 :type content_type: str
3426 :param content_type: (Optional) Type of content being uploaded.
3427
3428 :type origin: str
3429 :param origin:
3430 (Optional) If set, the upload can only be completed by a user-agent
3431 that uploads from the given origin. This can be useful when passing
3432 the session to a web client.
3433
3434 :type client: :class:`~google.cloud.storage.client.Client`
3435 :param client:
3436 (Optional) The client to use. If not passed, falls back to the
3437 ``client`` stored on the blob's bucket.
3438
3439 :type timeout: float or tuple
3440 :param timeout:
3441 (Optional) The amount of time, in seconds, to wait
3442 for the server response. See: :ref:`configuring_timeouts`
3443
3444 :type checksum: str
3445 :param checksum:
3446 (Optional) The type of checksum to compute to verify
3447 the integrity of the object. After the upload is complete, the
3448 server-computed checksum of the resulting object will be checked
3449 and google.cloud.storage.exceptions.DataCorruption will be raised on
3450 a mismatch. On a validation failure, the client will attempt to
3451 delete the uploaded object automatically. Supported values are
3452 "md5", "crc32c", "auto" and None. The default is "auto", which will
3453 try to detect if the C extension for crc32c is installed and fall
3454 back to md5 otherwise.
3455
3456 :type predefined_acl: str
3457 :param predefined_acl: (Optional) Predefined access control list
3458
3459 :type if_generation_match: long
3460 :param if_generation_match:
3461 (Optional) See :ref:`using-if-generation-match`
3462
3463 :type if_generation_not_match: long
3464 :param if_generation_not_match:
3465 (Optional) See :ref:`using-if-generation-not-match`
3466
3467 :type if_metageneration_match: long
3468 :param if_metageneration_match:
3469 (Optional) See :ref:`using-if-metageneration-match`
3470
3471 :type if_metageneration_not_match: long
3472 :param if_metageneration_not_match:
3473 (Optional) See :ref:`using-if-metageneration-not-match`
3474
3475 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3476 :param retry: (Optional) How to retry the RPC. A None value will disable
3477 retries. A google.api_core.retry.Retry value will enable retries,
3478 and the object will define retriable response codes and errors and
3479 configure backoff and timeout options.
3480
3481 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
3482 Retry object and activates it only if certain conditions are met.
3483 This class exists to provide safe defaults for RPC calls that are
3484 not technically safe to retry normally (due to potential data
3485 duplication or other side-effects) but become safe to retry if a
3486 condition such as if_generation_match is set.
3487
3488 See the retry.py source code and docstrings in this package
3489 (google.cloud.storage.retry) for information on retry types and how
3490 to configure them.
3491
3492 :rtype: str
3493 :returns: The resumable upload session URL. The upload can be
3494 completed by making an HTTP PUT request with the
3495 file's contents.
3496
3497 :raises: :class:`google.cloud.exceptions.GoogleCloudError`
3498 if the session creation response returns an error status.
3499 """
3500 with create_trace_span(name="Storage.Blob.createResumableUploadSession"):
3501 # Handle ConditionalRetryPolicy.
3502 if isinstance(retry, ConditionalRetryPolicy):
3503 # Conditional retries are designed for non-media calls, which change
3504 # arguments into query_params dictionaries. Media operations work
3505 # differently, so here we make a "fake" query_params to feed to the
3506 # ConditionalRetryPolicy.
3507 query_params = {
3508 "ifGenerationMatch": if_generation_match,
3509 "ifMetagenerationMatch": if_metageneration_match,
3510 }
3511 retry = retry.get_retry_policy_if_conditions_met(
3512 query_params=query_params
3513 )
3514
3515 extra_headers = {}
3516 if origin is not None:
3517 # This header is specifically for client-side uploads, it
3518 # determines the origins allowed for CORS.
3519 extra_headers["Origin"] = origin
3520
3521 try:
3522 fake_stream = BytesIO(b"")
3523 # Send a fake the chunk size which we **know** will be acceptable
3524 # to the `ResumableUpload` constructor. The chunk size only
3525 # matters when **sending** bytes to an upload.
3526 upload, _ = self._initiate_resumable_upload(
3527 client,
3528 fake_stream,
3529 content_type,
3530 size,
3531 predefined_acl=predefined_acl,
3532 if_generation_match=if_generation_match,
3533 if_generation_not_match=if_generation_not_match,
3534 if_metageneration_match=if_metageneration_match,
3535 if_metageneration_not_match=if_metageneration_not_match,
3536 extra_headers=extra_headers,
3537 chunk_size=self._CHUNK_SIZE_MULTIPLE,
3538 timeout=timeout,
3539 checksum=checksum,
3540 retry=retry,
3541 )
3542
3543 return upload.resumable_url
3544 except InvalidResponse as exc:
3545 _raise_from_invalid_response(exc)
3546
3547 def get_iam_policy(
3548 self,
3549 client=None,
3550 requested_policy_version=None,
3551 timeout=_DEFAULT_TIMEOUT,
3552 retry=DEFAULT_RETRY,
3553 ):
3554 """Retrieve the IAM policy for the object.
3555
3556 .. note::
3557
3558 Blob- / object-level IAM support does not yet exist and methods
3559 currently call an internal ACL backend not providing any utility
3560 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3561 in the future and is currently undocumented. Use :attr:`acl` for
3562 managing object access control.
3563
3564 If :attr:`user_project` is set on the bucket, bills the API request
3565 to that project.
3566
3567 :type client: :class:`~google.cloud.storage.client.Client`
3568 :param client:
3569 (Optional) The client to use. If not passed, falls back to the
3570 ``client`` stored on the current object's bucket.
3571
3572 :type requested_policy_version: int or ``NoneType``
3573 :param requested_policy_version:
3574 (Optional) The version of IAM policies to request. If a policy
3575 with a condition is requested without setting this, the server will
3576 return an error. This must be set to a value of 3 to retrieve IAM
3577 policies containing conditions. This is to prevent client code that
3578 isn't aware of IAM conditions from interpreting and modifying
3579 policies incorrectly. The service might return a policy with
3580 version lower than the one that was requested, based on the feature
3581 syntax in the policy fetched.
3582
3583 :type timeout: float or tuple
3584 :param timeout:
3585 (Optional) The amount of time, in seconds, to wait
3586 for the server response. See: :ref:`configuring_timeouts`
3587
3588 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3589 :param retry:
3590 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3591
3592 :rtype: :class:`google.api_core.iam.Policy`
3593 :returns: the policy instance, based on the resource returned from
3594 the ``getIamPolicy`` API request.
3595 """
3596 with create_trace_span(name="Storage.Blob.getIamPolicy"):
3597 client = self._require_client(client)
3598
3599 query_params = {}
3600
3601 if self.user_project is not None:
3602 query_params["userProject"] = self.user_project
3603
3604 if requested_policy_version is not None:
3605 query_params["optionsRequestedPolicyVersion"] = requested_policy_version
3606
3607 info = client._get_resource(
3608 f"{self.path}/iam",
3609 query_params=query_params,
3610 timeout=timeout,
3611 retry=retry,
3612 _target_object=None,
3613 )
3614 return Policy.from_api_repr(info)
3615
3616 def set_iam_policy(
3617 self,
3618 policy,
3619 client=None,
3620 timeout=_DEFAULT_TIMEOUT,
3621 retry=DEFAULT_RETRY_IF_ETAG_IN_JSON,
3622 ):
3623 """Update the IAM policy for the bucket.
3624
3625 .. note::
3626
3627 Blob- / object-level IAM support does not yet exist and methods
3628 currently call an internal ACL backend not providing any utility
3629 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3630 in the future and is currently undocumented. Use :attr:`acl` for
3631 managing object access control.
3632
3633 If :attr:`user_project` is set on the bucket, bills the API request
3634 to that project.
3635
3636 :type policy: :class:`google.api_core.iam.Policy`
3637 :param policy: policy instance used to update bucket's IAM policy.
3638
3639 :type client: :class:`~google.cloud.storage.client.Client`
3640 :param client:
3641 (Optional) The client to use. If not passed, falls back to the
3642 ``client`` stored on the current bucket.
3643
3644 :type timeout: float or tuple
3645 :param timeout:
3646 (Optional) The amount of time, in seconds, to wait
3647 for the server response. See: :ref:`configuring_timeouts`
3648
3649 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3650 :param retry:
3651 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3652
3653 :rtype: :class:`google.api_core.iam.Policy`
3654 :returns: the policy instance, based on the resource returned from
3655 the ``setIamPolicy`` API request.
3656 """
3657 with create_trace_span(name="Storage.Blob.setIamPolicy"):
3658 client = self._require_client(client)
3659
3660 query_params = {}
3661
3662 if self.user_project is not None:
3663 query_params["userProject"] = self.user_project
3664
3665 path = f"{self.path}/iam"
3666 resource = policy.to_api_repr()
3667 resource["resourceId"] = self.path
3668 info = client._put_resource(
3669 path,
3670 resource,
3671 query_params=query_params,
3672 timeout=timeout,
3673 retry=retry,
3674 _target_object=None,
3675 )
3676 return Policy.from_api_repr(info)
3677
3678 def test_iam_permissions(
3679 self,
3680 permissions,
3681 client=None,
3682 timeout=_DEFAULT_TIMEOUT,
3683 retry=DEFAULT_RETRY,
3684 ):
3685 """API call: test permissions
3686
3687 .. note::
3688
3689 Blob- / object-level IAM support does not yet exist and methods
3690 currently call an internal ACL backend not providing any utility
3691 beyond the blob's :attr:`acl` at this time. The API may be enhanced
3692 in the future and is currently undocumented. Use :attr:`acl` for
3693 managing object access control.
3694
3695 If :attr:`user_project` is set on the bucket, bills the API request
3696 to that project.
3697
3698 :type permissions: list of string
3699 :param permissions: the permissions to check
3700
3701 :type client: :class:`~google.cloud.storage.client.Client`
3702 :param client:
3703 (Optional) The client to use. If not passed, falls back to the
3704 ``client`` stored on the current bucket.
3705
3706 :type timeout: float or tuple
3707 :param timeout:
3708 (Optional) The amount of time, in seconds, to wait
3709 for the server response. See: :ref:`configuring_timeouts`
3710
3711 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3712 :param retry:
3713 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3714
3715 :rtype: list of string
3716 :returns: the permissions returned by the ``testIamPermissions`` API
3717 request.
3718 """
3719 with create_trace_span(name="Storage.Blob.testIamPermissions"):
3720 client = self._require_client(client)
3721 query_params = {"permissions": permissions}
3722
3723 if self.user_project is not None:
3724 query_params["userProject"] = self.user_project
3725
3726 path = f"{self.path}/iam/testPermissions"
3727 resp = client._get_resource(
3728 path,
3729 query_params=query_params,
3730 timeout=timeout,
3731 retry=retry,
3732 _target_object=None,
3733 )
3734
3735 return resp.get("permissions", [])
3736
3737 def make_public(
3738 self,
3739 client=None,
3740 timeout=_DEFAULT_TIMEOUT,
3741 if_generation_match=None,
3742 if_generation_not_match=None,
3743 if_metageneration_match=None,
3744 if_metageneration_not_match=None,
3745 retry=DEFAULT_RETRY,
3746 ):
3747 """Update blob's ACL, granting read access to anonymous users.
3748
3749 :type client: :class:`~google.cloud.storage.client.Client` or
3750 ``NoneType``
3751 :param client: (Optional) The client to use. If not passed, falls back
3752 to the ``client`` stored on the blob's bucket.
3753
3754 :type timeout: float or tuple
3755 :param timeout:
3756 (Optional) The amount of time, in seconds, to wait
3757 for the server response. See: :ref:`configuring_timeouts`
3758
3759 :type if_generation_match: long
3760 :param if_generation_match:
3761 (Optional) See :ref:`using-if-generation-match`
3762
3763 :type if_generation_not_match: long
3764 :param if_generation_not_match:
3765 (Optional) See :ref:`using-if-generation-not-match`
3766
3767 :type if_metageneration_match: long
3768 :param if_metageneration_match:
3769 (Optional) See :ref:`using-if-metageneration-match`
3770
3771 :type if_metageneration_not_match: long
3772 :param if_metageneration_not_match:
3773 (Optional) See :ref:`using-if-metageneration-not-match`
3774
3775 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3776 :param retry:
3777 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3778 """
3779 with create_trace_span(name="Storage.Blob.makePublic"):
3780 self.acl.all().grant_read()
3781 self.acl.save(
3782 client=client,
3783 timeout=timeout,
3784 if_generation_match=if_generation_match,
3785 if_generation_not_match=if_generation_not_match,
3786 if_metageneration_match=if_metageneration_match,
3787 if_metageneration_not_match=if_metageneration_not_match,
3788 retry=retry,
3789 )
3790
3791 def make_private(
3792 self,
3793 client=None,
3794 timeout=_DEFAULT_TIMEOUT,
3795 if_generation_match=None,
3796 if_generation_not_match=None,
3797 if_metageneration_match=None,
3798 if_metageneration_not_match=None,
3799 retry=DEFAULT_RETRY,
3800 ):
3801 """Update blob's ACL, revoking read access for anonymous users.
3802
3803 :type client: :class:`~google.cloud.storage.client.Client` or
3804 ``NoneType``
3805 :param client: (Optional) The client to use. If not passed, falls back
3806 to the ``client`` stored on the blob's bucket.
3807
3808 :type timeout: float or tuple
3809 :param timeout:
3810 (Optional) The amount of time, in seconds, to wait
3811 for the server response. See: :ref:`configuring_timeouts`
3812
3813 :type if_generation_match: long
3814 :param if_generation_match:
3815 (Optional) See :ref:`using-if-generation-match`
3816
3817 :type if_generation_not_match: long
3818 :param if_generation_not_match:
3819 (Optional) See :ref:`using-if-generation-not-match`
3820
3821 :type if_metageneration_match: long
3822 :param if_metageneration_match:
3823 (Optional) See :ref:`using-if-metageneration-match`
3824
3825 :type if_metageneration_not_match: long
3826 :param if_metageneration_not_match:
3827 (Optional) See :ref:`using-if-metageneration-not-match`
3828
3829 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3830 :param retry:
3831 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
3832 """
3833 with create_trace_span(name="Storage.Blob.makePrivate"):
3834 self.acl.all().revoke_read()
3835 self.acl.save(
3836 client=client,
3837 timeout=timeout,
3838 if_generation_match=if_generation_match,
3839 if_generation_not_match=if_generation_not_match,
3840 if_metageneration_match=if_metageneration_match,
3841 if_metageneration_not_match=if_metageneration_not_match,
3842 retry=retry,
3843 )
3844
3845 def compose(
3846 self,
3847 sources,
3848 client=None,
3849 timeout=_DEFAULT_TIMEOUT,
3850 if_generation_match=None,
3851 if_metageneration_match=None,
3852 if_source_generation_match=None,
3853 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3854 ):
3855 """Concatenate source blobs into this one.
3856
3857 If :attr:`user_project` is set on the bucket, bills the API request
3858 to that project.
3859
3860 See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/objects/compose)
3861 and a [code sample](https://cloud.google.com/storage/docs/samples/storage-compose-file#storage_compose_file-python).
3862
3863 :type sources: list of :class:`Blob`
3864 :param sources: Blobs whose contents will be composed into this blob.
3865
3866 :type client: :class:`~google.cloud.storage.client.Client`
3867 :param client:
3868 (Optional) The client to use. If not passed, falls back to the
3869 ``client`` stored on the blob's bucket.
3870
3871 :type timeout: float or tuple
3872 :param timeout:
3873 (Optional) The amount of time, in seconds, to wait
3874 for the server response. See: :ref:`configuring_timeouts`
3875
3876 :type if_generation_match: long
3877 :param if_generation_match:
3878 (Optional) Makes the operation conditional on whether the
3879 destination object's current generation matches the given value.
3880 Setting to 0 makes the operation succeed only if there are no live
3881 versions of the object.
3882 Note: In a previous version, this argument worked identically to the
3883 ``if_source_generation_match`` argument. For
3884 backwards-compatibility reasons, if a list is passed in,
3885 this argument will behave like ``if_source_generation_match``
3886 and also issue a DeprecationWarning.
3887
3888 :type if_metageneration_match: long
3889 :param if_metageneration_match:
3890 (Optional) Makes the operation conditional on whether the
3891 destination object's current metageneration matches the given
3892 value.
3893
3894 If a list of long is passed in, no match operation will be
3895 performed. (Deprecated: type(list of long) is supported for
3896 backwards-compatability reasons only.)
3897
3898 :type if_source_generation_match: list of long
3899 :param if_source_generation_match:
3900 (Optional) Makes the operation conditional on whether the current
3901 generation of each source blob matches the corresponding generation.
3902 The list must match ``sources`` item-to-item.
3903
3904 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
3905 :param retry:
3906 (Optional) How to retry the RPC.
3907 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
3908 policy which will only enable retries if ``if_generation_match`` or ``generation``
3909 is set, in order to ensure requests are idempotent before retrying them.
3910 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
3911 to enable retries regardless of generation precondition setting.
3912 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
3913 """
3914 with create_trace_span(name="Storage.Blob.compose"):
3915 sources_len = len(sources)
3916 client = self._require_client(client)
3917 query_params = {}
3918
3919 if isinstance(if_generation_match, list):
3920 warnings.warn(
3921 _COMPOSE_IF_GENERATION_LIST_DEPRECATED,
3922 DeprecationWarning,
3923 stacklevel=2,
3924 )
3925
3926 if if_source_generation_match is not None:
3927 raise ValueError(
3928 _COMPOSE_IF_GENERATION_LIST_AND_IF_SOURCE_GENERATION_ERROR
3929 )
3930
3931 if_source_generation_match = if_generation_match
3932 if_generation_match = None
3933
3934 if isinstance(if_metageneration_match, list):
3935 warnings.warn(
3936 _COMPOSE_IF_METAGENERATION_LIST_DEPRECATED,
3937 DeprecationWarning,
3938 stacklevel=2,
3939 )
3940
3941 if_metageneration_match = None
3942
3943 if if_source_generation_match is None:
3944 if_source_generation_match = [None] * sources_len
3945 if len(if_source_generation_match) != sources_len:
3946 raise ValueError(_COMPOSE_IF_SOURCE_GENERATION_MISMATCH_ERROR)
3947
3948 source_objects = []
3949 for source, source_generation in zip(sources, if_source_generation_match):
3950 source_object = {
3951 "name": source.name,
3952 "generation": source.generation,
3953 }
3954
3955 preconditions = {}
3956 if source_generation is not None:
3957 preconditions["ifGenerationMatch"] = source_generation
3958
3959 if preconditions:
3960 source_object["objectPreconditions"] = preconditions
3961
3962 source_objects.append(source_object)
3963
3964 request = {
3965 "sourceObjects": source_objects,
3966 "destination": self._properties.copy(),
3967 }
3968
3969 if self.user_project is not None:
3970 query_params["userProject"] = self.user_project
3971
3972 _add_generation_match_parameters(
3973 query_params,
3974 if_generation_match=if_generation_match,
3975 if_metageneration_match=if_metageneration_match,
3976 )
3977
3978 api_response = client._post_resource(
3979 f"{self.path}/compose",
3980 request,
3981 query_params=query_params,
3982 timeout=timeout,
3983 retry=retry,
3984 _target_object=self,
3985 )
3986 self._set_properties(api_response)
3987
3988 def rewrite(
3989 self,
3990 source,
3991 token=None,
3992 client=None,
3993 if_generation_match=None,
3994 if_generation_not_match=None,
3995 if_metageneration_match=None,
3996 if_metageneration_not_match=None,
3997 if_source_generation_match=None,
3998 if_source_generation_not_match=None,
3999 if_source_metageneration_match=None,
4000 if_source_metageneration_not_match=None,
4001 timeout=_DEFAULT_TIMEOUT,
4002 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
4003 ):
4004 """Rewrite source blob into this one.
4005
4006 If :attr:`user_project` is set on the bucket, bills the API request
4007 to that project.
4008
4009 .. note::
4010
4011 ``rewrite`` is not supported in a ``Batch`` context.
4012
4013 :type source: :class:`Blob`
4014 :param source: blob whose contents will be rewritten into this blob.
4015
4016 :type token: str
4017 :param token:
4018 (Optional) Token returned from an earlier, not-completed call to
4019 rewrite the same source blob. If passed, result will include
4020 updated status, total bytes written.
4021
4022 :type client: :class:`~google.cloud.storage.client.Client`
4023 :param client:
4024 (Optional) The client to use. If not passed, falls back to the
4025 ``client`` stored on the blob's bucket.
4026
4027 :type if_generation_match: long
4028 :param if_generation_match:
4029 (Optional) See :ref:`using-if-generation-match`
4030 Note that the generation to be matched is that of the
4031 ``destination`` blob.
4032
4033 :type if_generation_not_match: long
4034 :param if_generation_not_match:
4035 (Optional) See :ref:`using-if-generation-not-match`
4036 Note that the generation to be matched is that of the
4037 ``destination`` blob.
4038
4039 :type if_metageneration_match: long
4040 :param if_metageneration_match:
4041 (Optional) See :ref:`using-if-metageneration-match`
4042 Note that the metageneration to be matched is that of the
4043 ``destination`` blob.
4044
4045 :type if_metageneration_not_match: long
4046 :param if_metageneration_not_match:
4047 (Optional) See :ref:`using-if-metageneration-not-match`
4048 Note that the metageneration to be matched is that of the
4049 ``destination`` blob.
4050
4051 :type if_source_generation_match: long
4052 :param if_source_generation_match:
4053 (Optional) Makes the operation conditional on whether the source
4054 object's generation matches the given value.
4055
4056 :type if_source_generation_not_match: long
4057 :param if_source_generation_not_match:
4058 (Optional) Makes the operation conditional on whether the source
4059 object's generation does not match the given value.
4060
4061 :type if_source_metageneration_match: long
4062 :param if_source_metageneration_match:
4063 (Optional) Makes the operation conditional on whether the source
4064 object's current metageneration matches the given value.
4065
4066 :type if_source_metageneration_not_match: long
4067 :param if_source_metageneration_not_match:
4068 (Optional) Makes the operation conditional on whether the source
4069 object's current metageneration does not match the given value.
4070
4071 :type timeout: float or tuple
4072 :param timeout:
4073 (Optional) The amount of time, in seconds, to wait
4074 for the server response. See: :ref:`configuring_timeouts`
4075
4076 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4077 :param retry:
4078 (Optional) How to retry the RPC.
4079 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
4080 policy which will only enable retries if ``if_generation_match`` or ``generation``
4081 is set, in order to ensure requests are idempotent before retrying them.
4082 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
4083 to enable retries regardless of generation precondition setting.
4084 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
4085
4086 :rtype: tuple
4087 :returns: ``(token, bytes_rewritten, total_bytes)``, where ``token``
4088 is a rewrite token (``None`` if the rewrite is complete),
4089 ``bytes_rewritten`` is the number of bytes rewritten so far,
4090 and ``total_bytes`` is the total number of bytes to be
4091 rewritten.
4092 """
4093 with create_trace_span(name="Storage.Blob.rewrite"):
4094 client = self._require_client(client)
4095 headers = _get_encryption_headers(self._encryption_key)
4096 headers.update(_get_encryption_headers(source._encryption_key, source=True))
4097
4098 query_params = self._query_params
4099 if "generation" in query_params:
4100 del query_params["generation"]
4101
4102 if token:
4103 query_params["rewriteToken"] = token
4104
4105 if source.generation:
4106 query_params["sourceGeneration"] = source.generation
4107
4108 # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object
4109 # at rest, object resource metadata will store the version of the Key Management
4110 # Service cryptographic material. If a Blob instance with KMS Key metadata set is
4111 # used to rewrite the object, then the existing kmsKeyName version
4112 # value can't be used in the rewrite request and the client instead ignores it.
4113 if (
4114 self.kms_key_name is not None
4115 and "cryptoKeyVersions" not in self.kms_key_name
4116 ):
4117 query_params["destinationKmsKeyName"] = self.kms_key_name
4118
4119 _add_generation_match_parameters(
4120 query_params,
4121 if_generation_match=if_generation_match,
4122 if_generation_not_match=if_generation_not_match,
4123 if_metageneration_match=if_metageneration_match,
4124 if_metageneration_not_match=if_metageneration_not_match,
4125 if_source_generation_match=if_source_generation_match,
4126 if_source_generation_not_match=if_source_generation_not_match,
4127 if_source_metageneration_match=if_source_metageneration_match,
4128 if_source_metageneration_not_match=if_source_metageneration_not_match,
4129 )
4130
4131 path = f"{source.path}/rewriteTo{self.path}"
4132 api_response = client._post_resource(
4133 path,
4134 self._properties,
4135 query_params=query_params,
4136 headers=headers,
4137 timeout=timeout,
4138 retry=retry,
4139 _target_object=self,
4140 )
4141 rewritten = int(api_response["totalBytesRewritten"])
4142 size = int(api_response["objectSize"])
4143
4144 # The resource key is set if and only if the API response is
4145 # completely done. Additionally, there is no rewrite token to return
4146 # in this case.
4147 if api_response["done"]:
4148 self._set_properties(api_response["resource"])
4149 return None, rewritten, size
4150
4151 return api_response["rewriteToken"], rewritten, size
4152
4153 def update_storage_class(
4154 self,
4155 new_class,
4156 client=None,
4157 if_generation_match=None,
4158 if_generation_not_match=None,
4159 if_metageneration_match=None,
4160 if_metageneration_not_match=None,
4161 if_source_generation_match=None,
4162 if_source_generation_not_match=None,
4163 if_source_metageneration_match=None,
4164 if_source_metageneration_not_match=None,
4165 timeout=_DEFAULT_TIMEOUT,
4166 retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
4167 ):
4168 """Update blob's storage class via a rewrite-in-place. This helper will
4169 wait for the rewrite to complete before returning, so it may take some
4170 time for large files.
4171
4172 See
4173 https://cloud.google.com/storage/docs/per-object-storage-class
4174
4175 If :attr:`user_project` is set on the bucket, bills the API request
4176 to that project.
4177
4178 :type new_class: str
4179 :param new_class:
4180 new storage class for the object. One of:
4181 :attr:`~google.cloud.storage.constants.NEARLINE_STORAGE_CLASS`,
4182 :attr:`~google.cloud.storage.constants.COLDLINE_STORAGE_CLASS`,
4183 :attr:`~google.cloud.storage.constants.ARCHIVE_STORAGE_CLASS`,
4184 :attr:`~google.cloud.storage.constants.STANDARD_STORAGE_CLASS`,
4185 :attr:`~google.cloud.storage.constants.MULTI_REGIONAL_LEGACY_STORAGE_CLASS`,
4186 or
4187 :attr:`~google.cloud.storage.constants.REGIONAL_LEGACY_STORAGE_CLASS`.
4188
4189 :type client: :class:`~google.cloud.storage.client.Client`
4190 :param client:
4191 (Optional) The client to use. If not passed, falls back to the
4192 ``client`` stored on the blob's bucket.
4193
4194 :type if_generation_match: long
4195 :param if_generation_match:
4196 (Optional) See :ref:`using-if-generation-match`
4197 Note that the generation to be matched is that of the
4198 ``destination`` blob.
4199
4200 :type if_generation_not_match: long
4201 :param if_generation_not_match:
4202 (Optional) See :ref:`using-if-generation-not-match`
4203 Note that the generation to be matched is that of the
4204 ``destination`` blob.
4205
4206 :type if_metageneration_match: long
4207 :param if_metageneration_match:
4208 (Optional) See :ref:`using-if-metageneration-match`
4209 Note that the metageneration to be matched is that of the
4210 ``destination`` blob.
4211
4212 :type if_metageneration_not_match: long
4213 :param if_metageneration_not_match:
4214 (Optional) See :ref:`using-if-metageneration-not-match`
4215 Note that the metageneration to be matched is that of the
4216 ``destination`` blob.
4217
4218 :type if_source_generation_match: long
4219 :param if_source_generation_match:
4220 (Optional) Makes the operation conditional on whether the source
4221 object's generation matches the given value.
4222
4223 :type if_source_generation_not_match: long
4224 :param if_source_generation_not_match:
4225 (Optional) Makes the operation conditional on whether the source
4226 object's generation does not match the given value.
4227
4228 :type if_source_metageneration_match: long
4229 :param if_source_metageneration_match:
4230 (Optional) Makes the operation conditional on whether the source
4231 object's current metageneration matches the given value.
4232
4233 :type if_source_metageneration_not_match: long
4234 :param if_source_metageneration_not_match:
4235 (Optional) Makes the operation conditional on whether the source
4236 object's current metageneration does not match the given value.
4237
4238 :type timeout: float or tuple
4239 :param timeout:
4240 (Optional) The amount of time, in seconds, to wait
4241 for the server response. See: :ref:`configuring_timeouts`
4242
4243 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4244 :param retry:
4245 (Optional) How to retry the RPC.
4246 The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry
4247 policy which will only enable retries if ``if_generation_match`` or ``generation``
4248 is set, in order to ensure requests are idempotent before retrying them.
4249 Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object
4250 to enable retries regardless of generation precondition setting.
4251 See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
4252 """
4253 with create_trace_span(name="Storage.Blob.updateStorageClass"):
4254 # Update current blob's storage class prior to rewrite
4255 self._patch_property("storageClass", new_class)
4256
4257 # Execute consecutive rewrite operations until operation is done
4258 token, _, _ = self.rewrite(
4259 self,
4260 if_generation_match=if_generation_match,
4261 if_generation_not_match=if_generation_not_match,
4262 if_metageneration_match=if_metageneration_match,
4263 if_metageneration_not_match=if_metageneration_not_match,
4264 if_source_generation_match=if_source_generation_match,
4265 if_source_generation_not_match=if_source_generation_not_match,
4266 if_source_metageneration_match=if_source_metageneration_match,
4267 if_source_metageneration_not_match=if_source_metageneration_not_match,
4268 timeout=timeout,
4269 retry=retry,
4270 )
4271 while token is not None:
4272 token, _, _ = self.rewrite(
4273 self,
4274 token=token,
4275 if_generation_match=if_generation_match,
4276 if_generation_not_match=if_generation_not_match,
4277 if_metageneration_match=if_metageneration_match,
4278 if_metageneration_not_match=if_metageneration_not_match,
4279 if_source_generation_match=if_source_generation_match,
4280 if_source_generation_not_match=if_source_generation_not_match,
4281 if_source_metageneration_match=if_source_metageneration_match,
4282 if_source_metageneration_not_match=if_source_metageneration_not_match,
4283 timeout=timeout,
4284 retry=retry,
4285 )
4286
4287 def open(
4288 self,
4289 mode="r",
4290 chunk_size=None,
4291 ignore_flush=None,
4292 encoding=None,
4293 errors=None,
4294 newline=None,
4295 **kwargs,
4296 ):
4297 r"""Create a file handler for file-like I/O to or from this blob.
4298
4299 This method can be used as a context manager, just like Python's
4300 built-in 'open()' function.
4301
4302 While reading, as with other read methods, if blob.generation is not set
4303 the most recent blob generation will be used. Because the file-like IO
4304 reader downloads progressively in chunks, this could result in data from
4305 multiple versions being mixed together. If this is a concern, use
4306 either bucket.get_blob(), or blob.reload(), which will download the
4307 latest generation number and set it; or, if the generation is known, set
4308 it manually, for instance with bucket.blob(generation=123456).
4309
4310 Checksumming (hashing) to verify data integrity is disabled for reads
4311 using this feature because reads are implemented using request ranges,
4312 which do not provide checksums to validate. See
4313 https://cloud.google.com/storage/docs/hashes-etags for details.
4314
4315 See a [code sample](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_fileio_write_read.py).
4316
4317 Keyword arguments to pass to the underlying API calls.
4318 For both uploads and downloads, the following arguments are
4319 supported:
4320
4321 - ``if_generation_match``
4322 - ``if_generation_not_match``
4323 - ``if_metageneration_match``
4324 - ``if_metageneration_not_match``
4325 - ``timeout``
4326 - ``retry``
4327
4328 For downloads only, the following additional arguments are supported:
4329
4330 - ``raw_download``
4331 - ``single_shot_download``
4332
4333 For uploads only, the following additional arguments are supported:
4334
4335 - ``content_type``
4336 - ``predefined_acl``
4337 - ``checksum``
4338
4339 :type mode: str
4340 :param mode:
4341 (Optional) A mode string, as per standard Python `open()` semantics.The first
4342 character must be 'r', to open the blob for reading, or 'w' to open
4343 it for writing. The second character, if present, must be 't' for
4344 (unicode) text mode, or 'b' for bytes mode. If the second character
4345 is omitted, text mode is the default.
4346
4347 :type chunk_size: long
4348 :param chunk_size:
4349 (Optional) For reads, the minimum number of bytes to read at a time.
4350 If fewer bytes than the chunk_size are requested, the remainder is
4351 buffered. For writes, the maximum number of bytes to buffer before
4352 sending data to the server, and the size of each request when data
4353 is sent. Writes are implemented as a "resumable upload", so
4354 chunk_size for writes must be exactly a multiple of 256KiB as with
4355 other resumable uploads. The default is 40 MiB.
4356
4357 :type ignore_flush: bool
4358 :param ignore_flush:
4359 (Optional) For non text-mode writes, makes flush() do nothing
4360 instead of raising an error. flush() without closing is not
4361 supported by the remote service and therefore calling it normally
4362 results in io.UnsupportedOperation. However, that behavior is
4363 incompatible with some consumers and wrappers of file objects in
4364 Python, such as zipfile.ZipFile or io.TextIOWrapper. Setting
4365 ignore_flush will cause flush() to successfully do nothing, for
4366 compatibility with those contexts. The correct way to actually flush
4367 data to the remote server is to close() (using a context manager,
4368 such as in the example, will cause this to happen automatically).
4369
4370 :type encoding: str
4371 :param encoding:
4372 (Optional) For text mode only, the name of the encoding that the stream will
4373 be decoded or encoded with. If omitted, it defaults to
4374 locale.getpreferredencoding(False).
4375
4376 :type errors: str
4377 :param errors:
4378 (Optional) For text mode only, an optional string that specifies how encoding
4379 and decoding errors are to be handled. Pass 'strict' to raise a
4380 ValueError exception if there is an encoding error (the default of
4381 None has the same effect), or pass 'ignore' to ignore errors. (Note
4382 that ignoring encoding errors can lead to data loss.) Other more
4383 rarely-used options are also available; see the Python 'io' module
4384 documentation for 'io.TextIOWrapper' for a complete list.
4385
4386 :type newline: str
4387 :param newline:
4388 (Optional) For text mode only, controls how line endings are handled. It can
4389 be None, '', '\n', '\r', and '\r\n'. If None, reads use "universal
4390 newline mode" and writes use the system default. See the Python
4391 'io' module documentation for 'io.TextIOWrapper' for details.
4392
4393 :returns: A 'BlobReader' or 'BlobWriter' from
4394 'google.cloud.storage.fileio', or an 'io.TextIOWrapper' around one
4395 of those classes, depending on the 'mode' argument.
4396 """
4397 with create_trace_span(name="Storage.Blob.open"):
4398 if mode == "rb":
4399 if encoding or errors or newline:
4400 raise ValueError(
4401 "encoding, errors and newline arguments are for text mode only"
4402 )
4403 if ignore_flush:
4404 raise ValueError(
4405 "ignore_flush argument is for non-text write mode only"
4406 )
4407 return BlobReader(self, chunk_size=chunk_size, **kwargs)
4408 elif mode == "wb":
4409 if encoding or errors or newline:
4410 raise ValueError(
4411 "encoding, errors and newline arguments are for text mode only"
4412 )
4413 return BlobWriter(
4414 self,
4415 chunk_size=chunk_size,
4416 ignore_flush=ignore_flush,
4417 **kwargs,
4418 )
4419 elif mode in ("r", "rt"):
4420 if ignore_flush:
4421 raise ValueError(
4422 "ignore_flush argument is for non-text write mode only"
4423 )
4424 return TextIOWrapper(
4425 BlobReader(self, chunk_size=chunk_size, **kwargs),
4426 encoding=encoding,
4427 errors=errors,
4428 newline=newline,
4429 )
4430 elif mode in ("w", "wt"):
4431 if ignore_flush is False:
4432 raise ValueError(
4433 "ignore_flush is required for text mode writing and "
4434 "cannot be set to False"
4435 )
4436 return TextIOWrapper(
4437 BlobWriter(
4438 self, chunk_size=chunk_size, ignore_flush=True, **kwargs
4439 ),
4440 encoding=encoding,
4441 errors=errors,
4442 newline=newline,
4443 )
4444 else:
4445 raise NotImplementedError(
4446 "Supported modes strings are 'r', 'rb', 'rt', 'w', 'wb', and 'wt' only."
4447 )
4448
4449 cache_control = _scalar_property("cacheControl")
4450 """HTTP 'Cache-Control' header for this object.
4451
4452 See [`RFC 7234`](https://tools.ietf.org/html/rfc7234#section-5.2)
4453 and [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4454
4455 :rtype: str or ``NoneType``
4456
4457 """
4458
4459 content_disposition = _scalar_property("contentDisposition")
4460 """HTTP 'Content-Disposition' header for this object.
4461
4462 See [`RFC 6266`](https://tools.ietf.org/html/rfc7234#section-5.2) and
4463 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4464
4465 :rtype: str or ``NoneType``
4466 """
4467
4468 content_encoding = _scalar_property("contentEncoding")
4469 """HTTP 'Content-Encoding' header for this object.
4470
4471 See [`RFC 7231`](https://tools.ietf.org/html/rfc7231#section-3.1.2.2) and
4472 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4473
4474 :rtype: str or ``NoneType``
4475 """
4476
4477 content_language = _scalar_property("contentLanguage")
4478 """HTTP 'Content-Language' header for this object.
4479
4480 See [`BCP47`](https://tools.ietf.org/html/bcp47) and
4481 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4482
4483 :rtype: str or ``NoneType``
4484 """
4485
4486 content_type = _scalar_property(_CONTENT_TYPE_FIELD)
4487 """HTTP 'Content-Type' header for this object.
4488
4489 See [`RFC 2616`](https://tools.ietf.org/html/rfc2616#section-14.17) and
4490 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4491
4492 :rtype: str or ``NoneType``
4493 """
4494
4495 crc32c = _scalar_property("crc32c")
4496 """CRC32C checksum for this object.
4497
4498 This returns the blob's CRC32C checksum. To retrieve the value, first use a
4499 reload method of the Blob class which loads the blob's properties from the server.
4500
4501 See [`RFC 4960`](https://tools.ietf.org/html/rfc4960#appendix-B) and
4502 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4503
4504 If not set before upload, the server will compute the hash.
4505
4506 :rtype: str or ``NoneType``
4507 """
4508
4509 def _prep_and_do_download(
4510 self,
4511 file_obj,
4512 client=None,
4513 start=None,
4514 end=None,
4515 raw_download=False,
4516 if_etag_match=None,
4517 if_etag_not_match=None,
4518 if_generation_match=None,
4519 if_generation_not_match=None,
4520 if_metageneration_match=None,
4521 if_metageneration_not_match=None,
4522 timeout=_DEFAULT_TIMEOUT,
4523 checksum="auto",
4524 retry=DEFAULT_RETRY,
4525 single_shot_download=False,
4526 command=None,
4527 ):
4528 """Download the contents of a blob object into a file-like object.
4529
4530 See https://cloud.google.com/storage/docs/downloading-objects
4531
4532 If :attr:`user_project` is set on the bucket, bills the API request
4533 to that project.
4534
4535 :type file_obj: file
4536 :param file_obj: A file handle to which to write the blob's data.
4537
4538 :type client: :class:`~google.cloud.storage.client.Client`
4539 :param client:
4540 (Optional) The client to use. If not passed, falls back to the
4541 ``client`` stored on the blob's bucket.
4542
4543 :type start: int
4544 :param start: (Optional) The first byte in a range to be downloaded.
4545
4546 :type end: int
4547 :param end: (Optional) The last byte in a range to be downloaded.
4548
4549 :type raw_download: bool
4550 :param raw_download:
4551 (Optional) If true, download the object without any expansion.
4552
4553 :type if_etag_match: Union[str, Set[str]]
4554 :param if_etag_match:
4555 (Optional) See :ref:`using-if-etag-match`
4556
4557 :type if_etag_not_match: Union[str, Set[str]]
4558 :param if_etag_not_match:
4559 (Optional) See :ref:`using-if-etag-not-match`
4560
4561 :type if_generation_match: long
4562 :param if_generation_match:
4563 (Optional) See :ref:`using-if-generation-match`
4564
4565 :type if_generation_not_match: long
4566 :param if_generation_not_match:
4567 (Optional) See :ref:`using-if-generation-not-match`
4568
4569 :type if_metageneration_match: long
4570 :param if_metageneration_match:
4571 (Optional) See :ref:`using-if-metageneration-match`
4572
4573 :type if_metageneration_not_match: long
4574 :param if_metageneration_not_match:
4575 (Optional) See :ref:`using-if-metageneration-not-match`
4576
4577 :type timeout: float or tuple
4578 :param timeout:
4579 (Optional) The amount of time, in seconds, to wait
4580 for the server response. See: :ref:`configuring_timeouts`
4581
4582 :type checksum: str
4583 :param checksum:
4584 (Optional) The type of checksum to compute to verify the integrity
4585 of the object. The response headers must contain a checksum of the
4586 requested type. If the headers lack an appropriate checksum (for
4587 instance in the case of transcoded or ranged downloads where the
4588 remote service does not know the correct checksum, including
4589 downloads where chunk_size is set) an INFO-level log will be
4590 emitted. Supported values are "md5", "crc32c", "auto" and None. The
4591 default is "auto", which will try to detect if the C extension for
4592 crc32c is installed and fall back to md5 otherwise.
4593
4594 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4595 :param retry: (Optional) How to retry the RPC. A None value will disable
4596 retries. A google.api_core.retry.Retry value will enable retries,
4597 and the object will define retriable response codes and errors and
4598 configure backoff and timeout options.
4599
4600 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
4601 Retry object and activates it only if certain conditions are met.
4602 This class exists to provide safe defaults for RPC calls that are
4603 not technically safe to retry normally (due to potential data
4604 duplication or other side-effects) but become safe to retry if a
4605 condition such as if_metageneration_match is set.
4606
4607 See the retry.py source code and docstrings in this package
4608 (google.cloud.storage.retry) for information on retry types and how
4609 to configure them.
4610
4611 :type single_shot_download: bool
4612 :param single_shot_download:
4613 (Optional) If true, download the object in a single request.
4614 Caution: Enabling this will increase the memory overload for your application.
4615 Please enable this as per your use case.
4616
4617 :type command: str
4618 :param command:
4619 (Optional) Information about which interface for download was used,
4620 to be included in the X-Goog-API-Client header. Please leave as None
4621 unless otherwise directed.
4622 """
4623 # Handle ConditionalRetryPolicy.
4624 if isinstance(retry, ConditionalRetryPolicy):
4625 # Conditional retries are designed for non-media calls, which change
4626 # arguments into query_params dictionaries. Media operations work
4627 # differently, so here we make a "fake" query_params to feed to the
4628 # ConditionalRetryPolicy.
4629 query_params = {
4630 "ifGenerationMatch": if_generation_match,
4631 "ifMetagenerationMatch": if_metageneration_match,
4632 }
4633 retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
4634
4635 client = self._require_client(client)
4636
4637 download_url = self._get_download_url(
4638 client,
4639 if_generation_match=if_generation_match,
4640 if_generation_not_match=if_generation_not_match,
4641 if_metageneration_match=if_metageneration_match,
4642 if_metageneration_not_match=if_metageneration_not_match,
4643 )
4644 headers = _get_encryption_headers(self._encryption_key)
4645 headers["accept-encoding"] = "gzip"
4646 _add_etag_match_headers(
4647 headers,
4648 if_etag_match=if_etag_match,
4649 if_etag_not_match=if_etag_not_match,
4650 )
4651 # Add any client attached custom headers to be sent with the request.
4652 headers = {
4653 **_get_default_headers(client._connection.user_agent, command=command),
4654 **headers,
4655 **client._extra_headers,
4656 }
4657
4658 transport = client._http
4659
4660 try:
4661 self._do_download(
4662 transport,
4663 file_obj,
4664 download_url,
4665 headers,
4666 start,
4667 end,
4668 raw_download,
4669 timeout=timeout,
4670 checksum=checksum,
4671 retry=retry,
4672 single_shot_download=single_shot_download,
4673 )
4674 except InvalidResponse as exc:
4675 _raise_from_invalid_response(exc)
4676
4677 @property
4678 def component_count(self):
4679 """Number of underlying components that make up this object.
4680
4681 See https://cloud.google.com/storage/docs/json_api/v1/objects
4682
4683 :rtype: int or ``NoneType``
4684 :returns: The component count (in case of a composed object) or
4685 ``None`` if the blob's resource has not been loaded from
4686 the server. This property will not be set on objects
4687 not created via ``compose``.
4688 """
4689 component_count = self._properties.get("componentCount")
4690 if component_count is not None:
4691 return int(component_count)
4692
4693 @property
4694 def etag(self):
4695 """Retrieve the ETag for the object.
4696
4697 See [`RFC 2616 (etags)`](https://tools.ietf.org/html/rfc2616#section-3.11) and
4698 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4699
4700 :rtype: str or ``NoneType``
4701 :returns: The blob etag or ``None`` if the blob's resource has not
4702 been loaded from the server.
4703 """
4704 return self._properties.get("etag")
4705
4706 event_based_hold = _scalar_property("eventBasedHold")
4707 """Is an event-based hold active on the object?
4708
4709 See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4710
4711 If the property is not set locally, returns :data:`None`.
4712
4713 :rtype: bool or ``NoneType``
4714 """
4715
4716 @property
4717 def generation(self):
4718 """Retrieve the generation for the object.
4719
4720 See https://cloud.google.com/storage/docs/json_api/v1/objects
4721
4722 :rtype: int or ``NoneType``
4723 :returns: The generation of the blob or ``None`` if the blob's
4724 resource has not been loaded from the server.
4725 """
4726 generation = self._properties.get("generation")
4727 if generation is not None:
4728 return int(generation)
4729
4730 @property
4731 def id(self):
4732 """Retrieve the ID for the object.
4733
4734 See https://cloud.google.com/storage/docs/json_api/v1/objects
4735
4736 The ID consists of the bucket name, object name, and generation number.
4737
4738 :rtype: str or ``NoneType``
4739 :returns: The ID of the blob or ``None`` if the blob's
4740 resource has not been loaded from the server.
4741 """
4742 return self._properties.get("id")
4743
4744 md5_hash = _scalar_property("md5Hash")
4745 """MD5 hash for this object.
4746
4747 This returns the blob's MD5 hash. To retrieve the value, first use a
4748 reload method of the Blob class which loads the blob's properties from the server.
4749
4750 See [`RFC 1321`](https://tools.ietf.org/html/rfc1321) and
4751 [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4752
4753 If not set before upload, the server will compute the hash.
4754
4755 :rtype: str or ``NoneType``
4756 """
4757
4758 @property
4759 def media_link(self):
4760 """Retrieve the media download URI for the object.
4761
4762 See https://cloud.google.com/storage/docs/json_api/v1/objects
4763
4764 :rtype: str or ``NoneType``
4765 :returns: The media link for the blob or ``None`` if the blob's
4766 resource has not been loaded from the server.
4767 """
4768 return self._properties.get("mediaLink")
4769
4770 @property
4771 def metadata(self):
4772 """Retrieve arbitrary/application specific metadata for the object.
4773
4774 See https://cloud.google.com/storage/docs/json_api/v1/objects
4775
4776 :setter: Update arbitrary/application specific metadata for the
4777 object.
4778 :getter: Retrieve arbitrary/application specific metadata for
4779 the object.
4780
4781 :rtype: dict or ``NoneType``
4782 :returns: The metadata associated with the blob or ``None`` if the
4783 property is not set.
4784 """
4785 return copy.deepcopy(self._properties.get("metadata"))
4786
4787 @metadata.setter
4788 def metadata(self, value):
4789 """Update arbitrary/application specific metadata for the object.
4790
4791 Values are stored to GCS as strings. To delete a key, set its value to
4792 None and call blob.patch().
4793
4794 See https://cloud.google.com/storage/docs/json_api/v1/objects
4795
4796 :type value: dict
4797 :param value: The blob metadata to set.
4798 """
4799 if value is not None:
4800 value = {k: str(v) if v is not None else None for k, v in value.items()}
4801 self._patch_property("metadata", value)
4802
4803 @property
4804 def metageneration(self):
4805 """Retrieve the metageneration for the object.
4806
4807 See https://cloud.google.com/storage/docs/json_api/v1/objects
4808
4809 :rtype: int or ``NoneType``
4810 :returns: The metageneration of the blob or ``None`` if the blob's
4811 resource has not been loaded from the server.
4812 """
4813 metageneration = self._properties.get("metageneration")
4814 if metageneration is not None:
4815 return int(metageneration)
4816
4817 @property
4818 def owner(self):
4819 """Retrieve info about the owner of the object.
4820
4821 See https://cloud.google.com/storage/docs/json_api/v1/objects
4822
4823 :rtype: dict or ``NoneType``
4824 :returns: Mapping of owner's role/ID, or ``None`` if the blob's
4825 resource has not been loaded from the server.
4826 """
4827 return copy.deepcopy(self._properties.get("owner"))
4828
4829 @property
4830 def retention_expiration_time(self):
4831 """Retrieve timestamp at which the object's retention period expires.
4832
4833 See https://cloud.google.com/storage/docs/json_api/v1/objects
4834
4835 :rtype: :class:`datetime.datetime` or ``NoneType``
4836 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4837 ``None`` if the property is not set locally.
4838 """
4839 value = self._properties.get("retentionExpirationTime")
4840 if value is not None:
4841 return _rfc3339_nanos_to_datetime(value)
4842
4843 @property
4844 def self_link(self):
4845 """Retrieve the URI for the object.
4846
4847 See https://cloud.google.com/storage/docs/json_api/v1/objects
4848
4849 :rtype: str or ``NoneType``
4850 :returns: The self link for the blob or ``None`` if the blob's
4851 resource has not been loaded from the server.
4852 """
4853 return self._properties.get("selfLink")
4854
4855 @property
4856 def size(self):
4857 """Size of the object, in bytes.
4858
4859 See https://cloud.google.com/storage/docs/json_api/v1/objects
4860
4861 :rtype: int or ``NoneType``
4862 :returns: The size of the blob or ``None`` if the blob's
4863 resource has not been loaded from the server.
4864 """
4865 size = self._properties.get("size")
4866 if size is not None:
4867 return int(size)
4868
4869 @property
4870 def kms_key_name(self):
4871 """Resource name of Cloud KMS key used to encrypt the blob's contents.
4872
4873 :rtype: str or ``NoneType``
4874 :returns:
4875 The resource name or ``None`` if no Cloud KMS key was used,
4876 or the blob's resource has not been loaded from the server.
4877 """
4878 return self._properties.get("kmsKeyName")
4879
4880 @kms_key_name.setter
4881 def kms_key_name(self, value):
4882 """Set KMS encryption key for object.
4883
4884 :type value: str or ``NoneType``
4885 :param value: new KMS key name (None to clear any existing key).
4886 """
4887 self._patch_property("kmsKeyName", value)
4888
4889 storage_class = _scalar_property("storageClass")
4890 """Retrieve the storage class for the object.
4891
4892 This can only be set at blob / object **creation** time. If you'd
4893 like to change the storage class **after** the blob / object already
4894 exists in a bucket, call :meth:`update_storage_class` (which uses
4895 :meth:`rewrite`).
4896
4897 See https://cloud.google.com/storage/docs/storage-classes
4898
4899 :rtype: str or ``NoneType``
4900 :returns:
4901 If set, one of
4902 :attr:`~google.cloud.storage.constants.STANDARD_STORAGE_CLASS`,
4903 :attr:`~google.cloud.storage.constants.NEARLINE_STORAGE_CLASS`,
4904 :attr:`~google.cloud.storage.constants.COLDLINE_STORAGE_CLASS`,
4905 :attr:`~google.cloud.storage.constants.ARCHIVE_STORAGE_CLASS`,
4906 :attr:`~google.cloud.storage.constants.MULTI_REGIONAL_LEGACY_STORAGE_CLASS`,
4907 :attr:`~google.cloud.storage.constants.REGIONAL_LEGACY_STORAGE_CLASS`,
4908 :attr:`~google.cloud.storage.constants.DURABLE_REDUCED_AVAILABILITY_STORAGE_CLASS`,
4909 else ``None``.
4910 """
4911
4912 temporary_hold = _scalar_property("temporaryHold")
4913 """Is a temporary hold active on the object?
4914
4915 See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects).
4916
4917 If the property is not set locally, returns :data:`None`.
4918
4919 :rtype: bool or ``NoneType``
4920 """
4921
4922 @property
4923 def time_deleted(self):
4924 """Retrieve the timestamp at which the object was deleted.
4925
4926 See https://cloud.google.com/storage/docs/json_api/v1/objects
4927
4928 :rtype: :class:`datetime.datetime` or ``NoneType``
4929 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4930 ``None`` if the blob's resource has not been loaded from
4931 the server (see :meth:`reload`). If the blob has
4932 not been deleted, this will never be set.
4933 """
4934 value = self._properties.get("timeDeleted")
4935 if value is not None:
4936 return _rfc3339_nanos_to_datetime(value)
4937
4938 @property
4939 def time_created(self):
4940 """Retrieve the timestamp at which the object was created.
4941
4942 See https://cloud.google.com/storage/docs/json_api/v1/objects
4943
4944 :rtype: :class:`datetime.datetime` or ``NoneType``
4945 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4946 ``None`` if the blob's resource has not been loaded from
4947 the server (see :meth:`reload`).
4948 """
4949 value = self._properties.get("timeCreated")
4950 if value is not None:
4951 return _rfc3339_nanos_to_datetime(value)
4952
4953 @property
4954 def updated(self):
4955 """Retrieve the timestamp at which the object was updated.
4956
4957 See https://cloud.google.com/storage/docs/json_api/v1/objects
4958
4959 :rtype: :class:`datetime.datetime` or ``NoneType``
4960 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4961 ``None`` if the blob's resource has not been loaded from
4962 the server (see :meth:`reload`).
4963 """
4964 value = self._properties.get("updated")
4965 if value is not None:
4966 return _rfc3339_nanos_to_datetime(value)
4967
4968 @property
4969 def custom_time(self):
4970 """Retrieve the custom time for the object.
4971
4972 See https://cloud.google.com/storage/docs/json_api/v1/objects
4973
4974 :rtype: :class:`datetime.datetime` or ``NoneType``
4975 :returns: Datetime object parsed from RFC3339 valid timestamp, or
4976 ``None`` if the blob's resource has not been loaded from
4977 the server (see :meth:`reload`).
4978 """
4979 value = self._properties.get("customTime")
4980 if value is not None:
4981 return _rfc3339_nanos_to_datetime(value)
4982
4983 @custom_time.setter
4984 def custom_time(self, value):
4985 """Set the custom time for the object.
4986
4987 Once set on the server side object, this value can't be unset, but may
4988 only changed to a custom datetime in the future.
4989
4990 If :attr:`custom_time` must be unset, either perform a rewrite
4991 operation or upload the data again.
4992
4993 See https://cloud.google.com/storage/docs/json_api/v1/objects
4994
4995 :type value: :class:`datetime.datetime`
4996 :param value: new value
4997 """
4998 if value is not None:
4999 value = _datetime_to_rfc3339(value)
5000
5001 self._patch_property("customTime", value)
5002
5003 @property
5004 def retention(self):
5005 """Retrieve the retention configuration for this object.
5006
5007 :rtype: :class:`Retention`
5008 :returns: an instance for managing the object's retention configuration.
5009 """
5010 info = self._properties.get("retention", {})
5011 return Retention.from_api_repr(info, self)
5012
5013 @property
5014 def soft_delete_time(self):
5015 """If this object has been soft-deleted, returns the time at which it became soft-deleted.
5016
5017 :rtype: :class:`datetime.datetime` or ``NoneType``
5018 :returns:
5019 (readonly) The time that the object became soft-deleted.
5020 Note this property is only set for soft-deleted objects.
5021 """
5022 soft_delete_time = self._properties.get("softDeleteTime")
5023 if soft_delete_time is not None:
5024 return _rfc3339_nanos_to_datetime(soft_delete_time)
5025
5026 @property
5027 def hard_delete_time(self):
5028 """If this object has been soft-deleted, returns the time at which it will be permanently deleted.
5029
5030 :rtype: :class:`datetime.datetime` or ``NoneType``
5031 :returns:
5032 (readonly) The time that the object will be permanently deleted.
5033 Note this property is only set for soft-deleted objects.
5034 """
5035 hard_delete_time = self._properties.get("hardDeleteTime")
5036 if hard_delete_time is not None:
5037 return _rfc3339_nanos_to_datetime(hard_delete_time)
5038
5039
5040def _get_host_name(connection):
5041 """Returns the host name from the given connection.
5042
5043 :type connection: :class:`~google.cloud.storage._http.Connection`
5044 :param connection: The connection object.
5045
5046 :rtype: str
5047 :returns: The host name.
5048 """
5049 # TODO: After google-cloud-core 1.6.0 is stable and we upgrade it
5050 # to 1.6.0 in setup.py, we no longer need to check the attribute
5051 # existence. We can simply return connection.get_api_base_url_for_mtls().
5052 return (
5053 connection.API_BASE_URL
5054 if not hasattr(connection, "get_api_base_url_for_mtls")
5055 else connection.get_api_base_url_for_mtls()
5056 )
5057
5058
5059def _get_encryption_headers(key, source=False):
5060 """Builds customer encryption key headers
5061
5062 :type key: bytes
5063 :param key: 32 byte key to build request key and hash.
5064
5065 :type source: bool
5066 :param source: If true, return headers for the "source" blob; otherwise,
5067 return headers for the "destination" blob.
5068
5069 :rtype: dict
5070 :returns: dict of HTTP headers being sent in request.
5071 """
5072 if key is None:
5073 return {}
5074
5075 key = _to_bytes(key)
5076 key_hash = hashlib.sha256(key).digest()
5077 key_hash = base64.b64encode(key_hash)
5078 key = base64.b64encode(key)
5079
5080 if source:
5081 prefix = "X-Goog-Copy-Source-Encryption-"
5082 else:
5083 prefix = "X-Goog-Encryption-"
5084
5085 return {
5086 prefix + "Algorithm": "AES256",
5087 prefix + "Key": _bytes_to_unicode(key),
5088 prefix + "Key-Sha256": _bytes_to_unicode(key_hash),
5089 }
5090
5091
5092def _quote(value, safe=b"~"):
5093 """URL-quote a string.
5094
5095 If the value is unicode, this method first UTF-8 encodes it as bytes and
5096 then quotes the bytes. (In Python 3, ``urllib.parse.quote`` does this
5097 encoding automatically, but in Python 2, non-ASCII characters cannot be
5098 quoted.)
5099
5100 :type value: str or bytes
5101 :param value: The value to be URL-quoted.
5102
5103 :type safe: bytes
5104 :param safe: Bytes *not* to be quoted. By default, includes only ``b'~'``.
5105
5106 :rtype: str
5107 :returns: The encoded value (bytes in Python 2, unicode in Python 3).
5108 """
5109 value = _to_bytes(value, encoding="utf-8")
5110 return quote(value, safe=safe)
5111
5112
5113def _maybe_rewind(stream, rewind=False):
5114 """Rewind the stream if desired.
5115
5116 :type stream: IO[bytes]
5117 :param stream: A bytes IO object open for reading.
5118
5119 :type rewind: bool
5120 :param rewind: Indicates if we should seek to the beginning of the stream.
5121 """
5122 if rewind:
5123 stream.seek(0, os.SEEK_SET)
5124
5125
5126def _raise_from_invalid_response(error):
5127 """Re-wrap and raise an ``InvalidResponse`` exception.
5128
5129 :type error: :exc:`google.cloud.storage.exceptions.InvalidResponse`
5130 :param error: A caught exception from the ``google-resumable-media``
5131 library.
5132
5133 :raises: :class:`~google.cloud.exceptions.GoogleCloudError` corresponding
5134 to the failed status code
5135 """
5136 response = error.response
5137
5138 # The 'response.text' gives the actual reason of error, where 'error' gives
5139 # the message of expected status code.
5140 if response.text:
5141 error_message = response.text + ": " + str(error)
5142 else:
5143 error_message = str(error)
5144
5145 message = f"{response.request.method} {response.request.url}: {error_message}"
5146
5147 raise exceptions.from_http_status(response.status_code, message, response=response)
5148
5149
5150def _add_query_parameters(base_url, name_value_pairs):
5151 """Add one query parameter to a base URL.
5152
5153 :type base_url: string
5154 :param base_url: Base URL (may already contain query parameters)
5155
5156 :type name_value_pairs: list of (string, string) tuples.
5157 :param name_value_pairs: Names and values of the query parameters to add
5158
5159 :rtype: string
5160 :returns: URL with additional query strings appended.
5161 """
5162 if len(name_value_pairs) == 0:
5163 return base_url
5164
5165 scheme, netloc, path, query, frag = urlsplit(base_url)
5166 query = parse_qsl(query)
5167 query.extend(name_value_pairs)
5168 return urlunsplit((scheme, netloc, path, urlencode(query), frag))
5169
5170
5171class Retention(dict):
5172 """Map an object's retention configuration.
5173
5174 :type blob: :class:`Blob`
5175 :params blob: blob for which this retention configuration applies to.
5176
5177 :type mode: str or ``NoneType``
5178 :params mode:
5179 (Optional) The mode of the retention configuration, which can be either Unlocked or Locked.
5180 See: https://cloud.google.com/storage/docs/object-lock
5181
5182 :type retain_until_time: :class:`datetime.datetime` or ``NoneType``
5183 :params retain_until_time:
5184 (Optional) The earliest time that the object can be deleted or replaced, which is the
5185 retention configuration set for this object.
5186
5187 :type retention_expiration_time: :class:`datetime.datetime` or ``NoneType``
5188 :params retention_expiration_time:
5189 (Optional) The earliest time that the object can be deleted, which depends on any
5190 retention configuration set for the object and any retention policy set for the bucket
5191 that contains the object. This value should normally only be set by the back-end API.
5192 """
5193
5194 def __init__(
5195 self,
5196 blob,
5197 mode=None,
5198 retain_until_time=None,
5199 retention_expiration_time=None,
5200 ):
5201 data = {"mode": mode}
5202 if retain_until_time is not None:
5203 retain_until_time = _datetime_to_rfc3339(retain_until_time)
5204 data["retainUntilTime"] = retain_until_time
5205
5206 if retention_expiration_time is not None:
5207 retention_expiration_time = _datetime_to_rfc3339(retention_expiration_time)
5208 data["retentionExpirationTime"] = retention_expiration_time
5209
5210 super(Retention, self).__init__(data)
5211 self._blob = blob
5212
5213 @classmethod
5214 def from_api_repr(cls, resource, blob):
5215 """Factory: construct instance from resource.
5216
5217 :type blob: :class:`Blob`
5218 :params blob: Blob for which this retention configuration applies to.
5219
5220 :type resource: dict
5221 :param resource: mapping as returned from API call.
5222
5223 :rtype: :class:`Retention`
5224 :returns: Retention configuration created from resource.
5225 """
5226 instance = cls(blob)
5227 instance.update(resource)
5228 return instance
5229
5230 @property
5231 def blob(self):
5232 """Blob for which this retention configuration applies to.
5233
5234 :rtype: :class:`Blob`
5235 :returns: the instance's blob.
5236 """
5237 return self._blob
5238
5239 @property
5240 def mode(self):
5241 """The mode of the retention configuration. Options are 'Unlocked' or 'Locked'.
5242
5243 :rtype: string
5244 :returns: The mode of the retention configuration, which can be either set to 'Unlocked' or 'Locked'.
5245 """
5246 return self.get("mode")
5247
5248 @mode.setter
5249 def mode(self, value):
5250 self["mode"] = value
5251 self.blob._patch_property("retention", self)
5252
5253 @property
5254 def retain_until_time(self):
5255 """The earliest time that the object can be deleted or replaced, which is the
5256 retention configuration set for this object.
5257
5258 :rtype: :class:`datetime.datetime` or ``NoneType``
5259 :returns: Datetime object parsed from RFC3339 valid timestamp, or
5260 ``None`` if the blob's resource has not been loaded from
5261 the server (see :meth:`reload`).
5262 """
5263 value = self.get("retainUntilTime")
5264 if value is not None:
5265 return _rfc3339_nanos_to_datetime(value)
5266
5267 @retain_until_time.setter
5268 def retain_until_time(self, value):
5269 """Set the retain_until_time for the object retention configuration.
5270
5271 :type value: :class:`datetime.datetime`
5272 :param value: The earliest time that the object can be deleted or replaced.
5273 """
5274 if value is not None:
5275 value = _datetime_to_rfc3339(value)
5276 self["retainUntilTime"] = value
5277 self.blob._patch_property("retention", self)
5278
5279 @property
5280 def retention_expiration_time(self):
5281 """The earliest time that the object can be deleted, which depends on any
5282 retention configuration set for the object and any retention policy set for
5283 the bucket that contains the object.
5284
5285 :rtype: :class:`datetime.datetime` or ``NoneType``
5286 :returns:
5287 (readonly) The earliest time that the object can be deleted.
5288 """
5289 retention_expiration_time = self.get("retentionExpirationTime")
5290 if retention_expiration_time is not None:
5291 return _rfc3339_nanos_to_datetime(retention_expiration_time)