1# Copyright 2015 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Client for interacting with the Google Cloud Storage API."""
16
17import base64
18import binascii
19import collections
20import datetime
21import functools
22import json
23import os
24import warnings
25import google.api_core.client_options
26
27from google.auth.credentials import AnonymousCredentials
28from google.auth.transport import mtls
29from google.api_core import page_iterator
30from google.cloud._helpers import _LocalStack
31from google.cloud.client import ClientWithProject
32from google.cloud.exceptions import NotFound
33
34from google.cloud.storage._helpers import _add_generation_match_parameters
35from google.cloud.storage._helpers import _bucket_bound_hostname_url
36from google.cloud.storage._helpers import _get_api_endpoint_override
37from google.cloud.storage._helpers import _get_environ_project
38from google.cloud.storage._helpers import _get_storage_emulator_override
39from google.cloud.storage._helpers import _virtual_hosted_style_base_url
40from google.cloud.storage._helpers import _DEFAULT_UNIVERSE_DOMAIN
41from google.cloud.storage._helpers import _DEFAULT_SCHEME
42from google.cloud.storage._helpers import _STORAGE_HOST_TEMPLATE
43from google.cloud.storage._helpers import _NOW
44from google.cloud.storage._helpers import _UTC
45from google.cloud.storage._opentelemetry_tracing import create_trace_span
46
47from google.cloud.storage._http import Connection
48from google.cloud.storage._signing import (
49 get_expiration_seconds_v4,
50 get_v4_now_dtstamps,
51 ensure_signed_credentials,
52 _sign_message,
53)
54from google.cloud.storage.batch import Batch
55from google.cloud.storage.bucket import Bucket, _item_to_blob, _blobs_page_start
56from google.cloud.storage.blob import Blob
57from google.cloud.storage.hmac_key import HMACKeyMetadata
58from google.cloud.storage.acl import BucketACL
59from google.cloud.storage.acl import DefaultObjectACL
60from google.cloud.storage.constants import _DEFAULT_TIMEOUT
61from google.cloud.storage.retry import DEFAULT_RETRY
62
63
64_marker = object()
65
66
67def _buckets_page_start(iterator, page, response):
68 """Grab unreachable buckets after a :class:`~google.cloud.iterator.Page` started."""
69 unreachable = response.get("unreachable", [])
70 if not isinstance(unreachable, list):
71 raise TypeError(
72 f"expected unreachable to be list, but obtained {type(unreachable)}"
73 )
74 page.unreachable = unreachable
75
76
77class Client(ClientWithProject):
78 """Client to bundle configuration needed for API requests.
79
80 :type project: str or None
81 :param project: the project which the client acts on behalf of. Will be
82 passed when creating a topic. If not passed,
83 falls back to the default inferred from the environment.
84
85 :type credentials: :class:`~google.auth.credentials.Credentials`
86 :param credentials: (Optional) The OAuth2 Credentials to use for this
87 client. If not passed (and if no ``_http`` object is
88 passed), falls back to the default inferred from the
89 environment.
90
91 :type _http: :class:`~requests.Session`
92 :param _http: (Optional) HTTP object to make requests. Can be any object
93 that defines ``request()`` with the same interface as
94 :meth:`requests.Session.request`. If not passed, an
95 ``_http`` object is created that is bound to the
96 ``credentials`` for the current object.
97 This parameter should be considered private, and could
98 change in the future.
99
100 :type client_info: :class:`~google.api_core.client_info.ClientInfo`
101 :param client_info:
102 The client info used to send a user-agent string along with API
103 requests. If ``None``, then default info will be used. Generally,
104 you only need to set this if you're developing your own library
105 or partner tool.
106
107 :type client_options: :class:`~google.api_core.client_options.ClientOptions` or :class:`dict`
108 :param client_options: (Optional) Client options used to set user options on the client.
109 A non-default universe domain or api endpoint should be set through client_options.
110
111 :type use_auth_w_custom_endpoint: bool
112 :param use_auth_w_custom_endpoint:
113 (Optional) Whether authentication is required under custom endpoints.
114 If false, uses AnonymousCredentials and bypasses authentication.
115 Defaults to True. Note this is only used when a custom endpoint is set in conjunction.
116
117 :type extra_headers: dict
118 :param extra_headers:
119 (Optional) Custom headers to be sent with the requests attached to the client.
120 For example, you can add custom audit logging headers.
121
122 :type api_key: string
123 :param api_key:
124 (Optional) An API key. Mutually exclusive with any other credentials.
125 This parameter is an alias for setting `client_options.api_key` and
126 will supercede any api key set in the `client_options` parameter.
127 """
128
129 SCOPE = (
130 "https://www.googleapis.com/auth/devstorage.full_control",
131 "https://www.googleapis.com/auth/devstorage.read_only",
132 "https://www.googleapis.com/auth/devstorage.read_write",
133 )
134 """The scopes required for authenticating as a Cloud Storage consumer."""
135
136 def __init__(
137 self,
138 project=_marker,
139 credentials=None,
140 _http=None,
141 client_info=None,
142 client_options=None,
143 use_auth_w_custom_endpoint=True,
144 extra_headers={},
145 *,
146 api_key=None,
147 ):
148 self._base_connection = None
149
150 if project is None:
151 no_project = True
152 project = "<none>"
153 else:
154 no_project = False
155
156 if project is _marker:
157 project = None
158
159 # Save the initial value of constructor arguments before they
160 # are passed along, for use in __reduce__ defined elsewhere.
161 self._initial_client_info = client_info
162 self._initial_client_options = client_options
163 self._extra_headers = extra_headers
164
165 connection_kw_args = {"client_info": client_info}
166
167 # api_key should set client_options.api_key. Set it here whether
168 # client_options was specified as a dict, as a ClientOptions object, or
169 # None.
170 if api_key:
171 if client_options and not isinstance(client_options, dict):
172 client_options.api_key = api_key
173 else:
174 if not client_options:
175 client_options = {}
176 client_options["api_key"] = api_key
177
178 if client_options:
179 if isinstance(client_options, dict):
180 client_options = google.api_core.client_options.from_dict(
181 client_options
182 )
183
184 if client_options and client_options.universe_domain:
185 self._universe_domain = client_options.universe_domain
186 else:
187 self._universe_domain = None
188
189 storage_emulator_override = _get_storage_emulator_override()
190 api_endpoint_override = _get_api_endpoint_override()
191
192 # Determine the api endpoint. The rules are as follows:
193
194 # 1. If the `api_endpoint` is set in `client_options`, use that as the
195 # endpoint.
196 if client_options and client_options.api_endpoint:
197 api_endpoint = client_options.api_endpoint
198
199 # 2. Elif the "STORAGE_EMULATOR_HOST" env var is set, then use that as the
200 # endpoint.
201 elif storage_emulator_override:
202 api_endpoint = storage_emulator_override
203
204 # 3. Elif the "API_ENDPOINT_OVERRIDE" env var is set, then use that as the
205 # endpoint.
206 elif api_endpoint_override:
207 api_endpoint = api_endpoint_override
208
209 # 4. Elif the `universe_domain` is set in `client_options`,
210 # create the endpoint using that as the default.
211 #
212 # Mutual TLS is not compatible with a non-default universe domain
213 # at this time. If such settings are enabled along with the
214 # "GOOGLE_API_USE_CLIENT_CERTIFICATE" env variable, a ValueError will
215 # be raised.
216
217 elif self._universe_domain:
218 # The final decision of whether to use mTLS takes place in
219 # google-auth-library-python. We peek at the environment variable
220 # here only to issue an exception in case of a conflict.
221 use_client_cert = False
222 if hasattr(mtls, "should_use_client_cert"):
223 use_client_cert = mtls.should_use_client_cert()
224 else:
225 use_client_cert = (
226 os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true"
227 )
228
229 if use_client_cert:
230 raise ValueError(
231 'The "GOOGLE_API_USE_CLIENT_CERTIFICATE" env variable is '
232 'set to "true" and a non-default universe domain is '
233 "configured. mTLS is not supported in any universe other than"
234 "googleapis.com."
235 )
236 api_endpoint = _DEFAULT_SCHEME + _STORAGE_HOST_TEMPLATE.format(
237 universe_domain=self._universe_domain
238 )
239
240 # 5. Else, use the default, which is to use the default
241 # universe domain of "googleapis.com" and create the endpoint
242 # "storage.googleapis.com" from that.
243 else:
244 api_endpoint = None
245
246 connection_kw_args["api_endpoint"] = api_endpoint
247
248 self._is_emulator_set = True if storage_emulator_override else False
249
250 # If a custom endpoint is set, the client checks for credentials
251 # or finds the default credentials based on the current environment.
252 # Authentication may be bypassed under certain conditions:
253 # (1) STORAGE_EMULATOR_HOST is set (for backwards compatibility), OR
254 # (2) use_auth_w_custom_endpoint is set to False.
255 if connection_kw_args["api_endpoint"] is not None:
256 if self._is_emulator_set or not use_auth_w_custom_endpoint:
257 if credentials is None:
258 credentials = AnonymousCredentials()
259 if project is None:
260 project = _get_environ_project()
261 if project is None:
262 no_project = True
263 project = "<none>"
264
265 super(Client, self).__init__(
266 project=project,
267 credentials=credentials,
268 client_options=client_options,
269 _http=_http,
270 )
271
272 # Validate that the universe domain of the credentials matches the
273 # universe domain of the client.
274 if self._credentials.universe_domain != self.universe_domain:
275 raise ValueError(
276 "The configured universe domain ({client_ud}) does not match "
277 "the universe domain found in the credentials ({cred_ud}). If "
278 "you haven't configured the universe domain explicitly, "
279 "`googleapis.com` is the default.".format(
280 client_ud=self.universe_domain,
281 cred_ud=self._credentials.universe_domain,
282 )
283 )
284
285 if no_project:
286 self.project = None
287
288 # Pass extra_headers to Connection
289 connection = Connection(self, **connection_kw_args)
290 connection.extra_headers = extra_headers
291 self._connection = connection
292 self._batch_stack = _LocalStack()
293
294 @classmethod
295 def create_anonymous_client(cls):
296 """Factory: return client with anonymous credentials.
297
298 .. note::
299
300 Such a client has only limited access to "public" buckets:
301 listing their contents and downloading their blobs.
302
303 :rtype: :class:`google.cloud.storage.client.Client`
304 :returns: Instance w/ anonymous credentials and no project.
305 """
306 client = cls(project="<none>", credentials=AnonymousCredentials())
307 client.project = None
308 return client
309
310 @property
311 def universe_domain(self):
312 return self._universe_domain or _DEFAULT_UNIVERSE_DOMAIN
313
314 @property
315 def api_endpoint(self):
316 return self._connection.API_BASE_URL
317
318 def update_user_agent(self, user_agent):
319 """Update the user-agent string for this client.
320
321 :type user_agent: str
322 :param user_agent: The string to add to the user-agent.
323 """
324 existing_user_agent = self._connection._client_info.user_agent
325 if existing_user_agent is None:
326 self._connection.user_agent = user_agent
327 else:
328 self._connection.user_agent = f"{user_agent} {existing_user_agent}"
329
330 @property
331 def _connection(self):
332 """Get connection or batch on the client.
333
334 :rtype: :class:`google.cloud.storage._http.Connection`
335 :returns: The connection set on the client, or the batch
336 if one is set.
337 """
338 if self.current_batch is not None:
339 return self.current_batch
340 else:
341 return self._base_connection
342
343 @_connection.setter
344 def _connection(self, value):
345 """Set connection on the client.
346
347 Intended to be used by constructor (since the base class calls)
348 self._connection = connection
349 Will raise if the connection is set more than once.
350
351 :type value: :class:`google.cloud.storage._http.Connection`
352 :param value: The connection set on the client.
353
354 :raises: :class:`ValueError` if connection has already been set.
355 """
356 if self._base_connection is not None:
357 raise ValueError("Connection already set on client")
358 self._base_connection = value
359
360 def _push_batch(self, batch):
361 """Push a batch onto our stack.
362
363 "Protected", intended for use by batch context mgrs.
364
365 :type batch: :class:`google.cloud.storage.batch.Batch`
366 :param batch: newly-active batch
367 """
368 self._batch_stack.push(batch)
369
370 def _pop_batch(self):
371 """Pop a batch from our stack.
372
373 "Protected", intended for use by batch context mgrs.
374
375 :raises: IndexError if the stack is empty.
376 :rtype: :class:`google.cloud.storage.batch.Batch`
377 :returns: the top-most batch/transaction, after removing it.
378 """
379 return self._batch_stack.pop()
380
381 @property
382 def current_batch(self):
383 """Currently-active batch.
384
385 :rtype: :class:`google.cloud.storage.batch.Batch` or ``NoneType`` (if
386 no batch is active).
387 :returns: The batch at the top of the batch stack.
388 """
389 return self._batch_stack.top
390
391 def get_service_account_email(
392 self, project=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY
393 ):
394 """Get the email address of the project's GCS service account
395
396 :type project: str
397 :param project:
398 (Optional) Project ID to use for retreiving GCS service account
399 email address. Defaults to the client's project.
400 :type timeout: float or tuple
401 :param timeout:
402 (Optional) The amount of time, in seconds, to wait
403 for the server response. See: :ref:`configuring_timeouts`
404
405 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
406 :param retry:
407 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
408
409 :rtype: str
410 :returns: service account email address
411 """
412 with create_trace_span(name="Storage.Client.getServiceAccountEmail"):
413 if project is None:
414 project = self.project
415
416 path = f"/projects/{project}/serviceAccount"
417 api_response = self._get_resource(path, timeout=timeout, retry=retry)
418 return api_response["email_address"]
419
420 def bucket(self, bucket_name, user_project=None, generation=None):
421 """Factory constructor for bucket object.
422
423 .. note::
424 This will not make an HTTP request; it simply instantiates
425 a bucket object owned by this client.
426
427 :type bucket_name: str
428 :param bucket_name: The name of the bucket to be instantiated.
429
430 :type user_project: str
431 :param user_project: (Optional) The project ID to be billed for API
432 requests made via the bucket.
433
434 :type generation: int
435 :param generation: (Optional) If present, selects a specific revision of
436 this bucket.
437
438 :rtype: :class:`google.cloud.storage.bucket.Bucket`
439 :returns: The bucket object created.
440 """
441 return Bucket(
442 client=self,
443 name=bucket_name,
444 user_project=user_project,
445 generation=generation,
446 )
447
448 def batch(self, raise_exception=True):
449 """Factory constructor for batch object.
450
451 .. note::
452 This will not make an HTTP request; it simply instantiates
453 a batch object owned by this client.
454
455 :type raise_exception: bool
456 :param raise_exception:
457 (Optional) Defaults to True. If True, instead of adding exceptions
458 to the list of return responses, the final exception will be raised.
459 Note that exceptions are unwrapped after all operations are complete
460 in success or failure, and only the last exception is raised.
461
462 :rtype: :class:`google.cloud.storage.batch.Batch`
463 :returns: The batch object created.
464 """
465 return Batch(client=self, raise_exception=raise_exception)
466
467 def _get_resource(
468 self,
469 path,
470 query_params=None,
471 headers=None,
472 timeout=_DEFAULT_TIMEOUT,
473 retry=DEFAULT_RETRY,
474 _target_object=None,
475 ):
476 """Helper for bucket / blob methods making API 'GET' calls.
477
478 Args:
479 path str:
480 The path of the resource to fetch.
481
482 query_params Optional[dict]:
483 HTTP query parameters to be passed
484
485 headers Optional[dict]:
486 HTTP headers to be passed
487
488 timeout (Optional[Union[float, Tuple[float, float]]]):
489 The amount of time, in seconds, to wait for the server response.
490
491 Can also be passed as a tuple (connect_timeout, read_timeout).
492 See :meth:`requests.Session.request` documentation for details.
493
494 retry (Optional[Union[google.api_core.retry.Retry, google.cloud.storage.retry.ConditionalRetryPolicy]]):
495 How to retry the RPC. A None value will disable retries.
496 A google.api_core.retry.Retry value will enable retries, and the object will
497 define retriable response codes and errors and configure backoff and timeout options.
498
499 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a Retry object and
500 activates it only if certain conditions are met. This class exists to provide safe defaults
501 for RPC calls that are not technically safe to retry normally (due to potential data
502 duplication or other side-effects) but become safe to retry if a condition such as
503 if_metageneration_match is set.
504
505 See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for
506 information on retry types and how to configure them.
507
508 _target_object (Union[ \
509 :class:`~google.cloud.storage.bucket.Bucket`, \
510 :class:`~google.cloud.storage.bucket.blob`, \
511 ]):
512 Object to which future data is to be applied -- only relevant
513 in the context of a batch.
514
515 Returns:
516 dict
517 The JSON resource fetched
518
519 Raises:
520 google.cloud.exceptions.NotFound
521 If the bucket is not found.
522 """
523 return self._connection.api_request(
524 method="GET",
525 path=path,
526 query_params=query_params,
527 headers=headers,
528 timeout=timeout,
529 retry=retry,
530 _target_object=_target_object,
531 )
532
533 def _list_resource(
534 self,
535 path,
536 item_to_value,
537 page_token=None,
538 max_results=None,
539 extra_params=None,
540 page_start=page_iterator._do_nothing_page_start,
541 page_size=None,
542 timeout=_DEFAULT_TIMEOUT,
543 retry=DEFAULT_RETRY,
544 ):
545 kwargs = {
546 "method": "GET",
547 "path": path,
548 "timeout": timeout,
549 }
550 with create_trace_span(
551 name="Storage.Client._list_resource_returns_iterator",
552 client=self,
553 api_request=kwargs,
554 retry=retry,
555 ):
556 api_request = functools.partial(
557 self._connection.api_request, timeout=timeout, retry=retry
558 )
559 return page_iterator.HTTPIterator(
560 client=self,
561 api_request=api_request,
562 path=path,
563 item_to_value=item_to_value,
564 page_token=page_token,
565 max_results=max_results,
566 extra_params=extra_params,
567 page_start=page_start,
568 page_size=page_size,
569 )
570
571 def _patch_resource(
572 self,
573 path,
574 data,
575 query_params=None,
576 headers=None,
577 timeout=_DEFAULT_TIMEOUT,
578 retry=None,
579 _target_object=None,
580 ):
581 """Helper for bucket / blob methods making API 'PATCH' calls.
582
583 Args:
584 path str:
585 The path of the resource to fetch.
586
587 data dict:
588 The data to be patched.
589
590 query_params Optional[dict]:
591 HTTP query parameters to be passed
592
593 headers Optional[dict]:
594 HTTP headers to be passed
595
596 timeout (Optional[Union[float, Tuple[float, float]]]):
597 The amount of time, in seconds, to wait for the server response.
598
599 Can also be passed as a tuple (connect_timeout, read_timeout).
600 See :meth:`requests.Session.request` documentation for details.
601
602 retry (Optional[Union[google.api_core.retry.Retry, google.cloud.storage.retry.ConditionalRetryPolicy]]):
603 How to retry the RPC. A None value will disable retries.
604 A google.api_core.retry.Retry value will enable retries, and the object will
605 define retriable response codes and errors and configure backoff and timeout options.
606
607 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a Retry object and
608 activates it only if certain conditions are met. This class exists to provide safe defaults
609 for RPC calls that are not technically safe to retry normally (due to potential data
610 duplication or other side-effects) but become safe to retry if a condition such as
611 if_metageneration_match is set.
612
613 See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for
614 information on retry types and how to configure them.
615
616 _target_object (Union[ \
617 :class:`~google.cloud.storage.bucket.Bucket`, \
618 :class:`~google.cloud.storage.bucket.blob`, \
619 ]):
620 Object to which future data is to be applied -- only relevant
621 in the context of a batch.
622
623 Returns:
624 dict
625 The JSON resource fetched
626
627 Raises:
628 google.cloud.exceptions.NotFound
629 If the bucket is not found.
630 """
631 return self._connection.api_request(
632 method="PATCH",
633 path=path,
634 data=data,
635 query_params=query_params,
636 headers=headers,
637 timeout=timeout,
638 retry=retry,
639 _target_object=_target_object,
640 )
641
642 def _put_resource(
643 self,
644 path,
645 data,
646 query_params=None,
647 headers=None,
648 timeout=_DEFAULT_TIMEOUT,
649 retry=None,
650 _target_object=None,
651 ):
652 """Helper for bucket / blob methods making API 'PUT' calls.
653
654 Args:
655 path str:
656 The path of the resource to fetch.
657
658 data dict:
659 The data to be patched.
660
661 query_params Optional[dict]:
662 HTTP query parameters to be passed
663
664 headers Optional[dict]:
665 HTTP headers to be passed
666
667 timeout (Optional[Union[float, Tuple[float, float]]]):
668 The amount of time, in seconds, to wait for the server response.
669
670 Can also be passed as a tuple (connect_timeout, read_timeout).
671 See :meth:`requests.Session.request` documentation for details.
672
673 retry (Optional[Union[google.api_core.retry.Retry, google.cloud.storage.retry.ConditionalRetryPolicy]]):
674 How to retry the RPC. A None value will disable retries.
675 A google.api_core.retry.Retry value will enable retries, and the object will
676 define retriable response codes and errors and configure backoff and timeout options.
677
678 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a Retry object and
679 activates it only if certain conditions are met. This class exists to provide safe defaults
680 for RPC calls that are not technically safe to retry normally (due to potential data
681 duplication or other side-effects) but become safe to retry if a condition such as
682 if_metageneration_match is set.
683
684 See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for
685 information on retry types and how to configure them.
686
687 _target_object (Union[ \
688 :class:`~google.cloud.storage.bucket.Bucket`, \
689 :class:`~google.cloud.storage.bucket.blob`, \
690 ]):
691 Object to which future data is to be applied -- only relevant
692 in the context of a batch.
693
694 Returns:
695 dict
696 The JSON resource fetched
697
698 Raises:
699 google.cloud.exceptions.NotFound
700 If the bucket is not found.
701 """
702 return self._connection.api_request(
703 method="PUT",
704 path=path,
705 data=data,
706 query_params=query_params,
707 headers=headers,
708 timeout=timeout,
709 retry=retry,
710 _target_object=_target_object,
711 )
712
713 def _post_resource(
714 self,
715 path,
716 data,
717 query_params=None,
718 headers=None,
719 timeout=_DEFAULT_TIMEOUT,
720 retry=None,
721 _target_object=None,
722 ):
723 """Helper for bucket / blob methods making API 'POST' calls.
724
725 Args:
726 path str:
727 The path of the resource to which to post.
728
729 data dict:
730 The data to be posted.
731
732 query_params Optional[dict]:
733 HTTP query parameters to be passed
734
735 headers Optional[dict]:
736 HTTP headers to be passed
737
738 timeout (Optional[Union[float, Tuple[float, float]]]):
739 The amount of time, in seconds, to wait for the server response.
740
741 Can also be passed as a tuple (connect_timeout, read_timeout).
742 See :meth:`requests.Session.request` documentation for details.
743
744 retry (Optional[Union[google.api_core.retry.Retry, google.cloud.storage.retry.ConditionalRetryPolicy]]):
745 How to retry the RPC. A None value will disable retries.
746 A google.api_core.retry.Retry value will enable retries, and the object will
747 define retriable response codes and errors and configure backoff and timeout options.
748
749 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a Retry object and
750 activates it only if certain conditions are met. This class exists to provide safe defaults
751 for RPC calls that are not technically safe to retry normally (due to potential data
752 duplication or other side-effects) but become safe to retry if a condition such as
753 if_metageneration_match is set.
754
755 See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for
756 information on retry types and how to configure them.
757
758 _target_object (Union[ \
759 :class:`~google.cloud.storage.bucket.Bucket`, \
760 :class:`~google.cloud.storage.bucket.blob`, \
761 ]):
762 Object to which future data is to be applied -- only relevant
763 in the context of a batch.
764
765 Returns:
766 dict
767 The JSON resource returned from the post.
768
769 Raises:
770 google.cloud.exceptions.NotFound
771 If the bucket is not found.
772 """
773
774 return self._connection.api_request(
775 method="POST",
776 path=path,
777 data=data,
778 query_params=query_params,
779 headers=headers,
780 timeout=timeout,
781 retry=retry,
782 _target_object=_target_object,
783 )
784
785 def _delete_resource(
786 self,
787 path,
788 query_params=None,
789 headers=None,
790 timeout=_DEFAULT_TIMEOUT,
791 retry=DEFAULT_RETRY,
792 _target_object=None,
793 ):
794 """Helper for bucket / blob methods making API 'DELETE' calls.
795
796 Args:
797 path str:
798 The path of the resource to delete.
799
800 query_params Optional[dict]:
801 HTTP query parameters to be passed
802
803 headers Optional[dict]:
804 HTTP headers to be passed
805
806 timeout (Optional[Union[float, Tuple[float, float]]]):
807 The amount of time, in seconds, to wait for the server response.
808
809 Can also be passed as a tuple (connect_timeout, read_timeout).
810 See :meth:`requests.Session.request` documentation for details.
811
812 retry (Optional[Union[google.api_core.retry.Retry, google.cloud.storage.retry.ConditionalRetryPolicy]]):
813 How to retry the RPC. A None value will disable retries.
814 A google.api_core.retry.Retry value will enable retries, and the object will
815 define retriable response codes and errors and configure backoff and timeout options.
816
817 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a Retry object and
818 activates it only if certain conditions are met. This class exists to provide safe defaults
819 for RPC calls that are not technically safe to retry normally (due to potential data
820 duplication or other side-effects) but become safe to retry if a condition such as
821 if_metageneration_match is set.
822
823 See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for
824 information on retry types and how to configure them.
825
826 _target_object (Union[ \
827 :class:`~google.cloud.storage.bucket.Bucket`, \
828 :class:`~google.cloud.storage.bucket.blob`, \
829 ]):
830 Object to which future data is to be applied -- only relevant
831 in the context of a batch.
832
833 Returns:
834 dict
835 The JSON resource fetched
836
837 Raises:
838 google.cloud.exceptions.NotFound
839 If the bucket is not found.
840 """
841 return self._connection.api_request(
842 method="DELETE",
843 path=path,
844 query_params=query_params,
845 headers=headers,
846 timeout=timeout,
847 retry=retry,
848 _target_object=_target_object,
849 )
850
851 def _bucket_arg_to_bucket(self, bucket_or_name, generation=None):
852 """Helper to return given bucket or create new by name.
853
854 Args:
855 bucket_or_name (Union[ \
856 :class:`~google.cloud.storage.bucket.Bucket`, \
857 str, \
858 ]):
859 The bucket resource to pass or name to create.
860 generation (Optional[int]):
861 The bucket generation. If generation is specified,
862 bucket_or_name must be a name (str).
863
864 Returns:
865 google.cloud.storage.bucket.Bucket
866 The newly created bucket or the given one.
867 """
868 if isinstance(bucket_or_name, Bucket):
869 if generation:
870 raise ValueError(
871 "The generation can only be specified if a "
872 "name is used to specify a bucket, not a Bucket object. "
873 "Create a new Bucket object with the correct generation "
874 "instead."
875 )
876 bucket = bucket_or_name
877 if bucket.client is None:
878 bucket._client = self
879 else:
880 bucket = Bucket(self, name=bucket_or_name, generation=generation)
881 return bucket
882
883 def get_bucket(
884 self,
885 bucket_or_name,
886 timeout=_DEFAULT_TIMEOUT,
887 if_metageneration_match=None,
888 if_metageneration_not_match=None,
889 retry=DEFAULT_RETRY,
890 *,
891 generation=None,
892 soft_deleted=None,
893 ):
894 """Retrieve a bucket via a GET request.
895
896 See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/buckets/get) and a [code sample](https://cloud.google.com/storage/docs/samples/storage-get-bucket-metadata#storage_get_bucket_metadata-python).
897
898 Args:
899 bucket_or_name (Union[ \
900 :class:`~google.cloud.storage.bucket.Bucket`, \
901 str, \
902 ]):
903 The bucket resource to pass or name to create.
904
905 timeout (Optional[Union[float, Tuple[float, float]]]):
906 The amount of time, in seconds, to wait for the server response.
907
908 Can also be passed as a tuple (connect_timeout, read_timeout).
909 See :meth:`requests.Session.request` documentation for details.
910
911 if_metageneration_match (Optional[int]):
912 Make the operation conditional on whether the
913 bucket's current metageneration matches the given value.
914
915 if_metageneration_not_match (Optional[int]):
916 Make the operation conditional on whether the bucket's
917 current metageneration does not match the given value.
918
919 retry (Optional[Union[google.api_core.retry.Retry, google.cloud.storage.retry.ConditionalRetryPolicy]]):
920 How to retry the RPC. A None value will disable retries.
921 A google.api_core.retry.Retry value will enable retries, and the object will
922 define retriable response codes and errors and configure backoff and timeout options.
923
924 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a Retry object and
925 activates it only if certain conditions are met. This class exists to provide safe defaults
926 for RPC calls that are not technically safe to retry normally (due to potential data
927 duplication or other side-effects) but become safe to retry if a condition such as
928 if_metageneration_match is set.
929
930 See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for
931 information on retry types and how to configure them.
932
933 generation (Optional[int]):
934 The generation of the bucket. The generation can be used to
935 specify a specific soft-deleted version of the bucket, in
936 conjunction with the ``soft_deleted`` argument below. If
937 ``soft_deleted`` is not True, the generation is unused.
938
939 soft_deleted (Optional[bool]):
940 If True, looks for a soft-deleted bucket. Will only return
941 the bucket metadata if the bucket exists and is in a
942 soft-deleted state. The bucket ``generation`` is required if
943 ``soft_deleted`` is set to True.
944 See: https://cloud.google.com/storage/docs/soft-delete
945
946 Returns:
947 google.cloud.storage.bucket.Bucket
948 The bucket matching the name provided.
949
950 Raises:
951 google.cloud.exceptions.NotFound
952 If the bucket is not found.
953 """
954 with create_trace_span(name="Storage.Client.getBucket"):
955 bucket = self._bucket_arg_to_bucket(bucket_or_name, generation=generation)
956 bucket.reload(
957 client=self,
958 timeout=timeout,
959 if_metageneration_match=if_metageneration_match,
960 if_metageneration_not_match=if_metageneration_not_match,
961 retry=retry,
962 soft_deleted=soft_deleted,
963 )
964 return bucket
965
966 def lookup_bucket(
967 self,
968 bucket_name,
969 timeout=_DEFAULT_TIMEOUT,
970 if_metageneration_match=None,
971 if_metageneration_not_match=None,
972 retry=DEFAULT_RETRY,
973 ):
974 """Get a bucket by name, returning None if not found.
975
976 You can use this if you would rather check for a None value
977 than catching a NotFound exception.
978
979 :type bucket_name: str
980 :param bucket_name: The name of the bucket to get.
981
982 :type timeout: float or tuple
983 :param timeout:
984 (Optional) The amount of time, in seconds, to wait
985 for the server response. See: :ref:`configuring_timeouts`
986
987 :type if_metageneration_match: long
988 :param if_metageneration_match: (Optional) Make the operation conditional on whether the
989 blob's current metageneration matches the given value.
990
991 :type if_metageneration_not_match: long
992 :param if_metageneration_not_match: (Optional) Make the operation conditional on whether the
993 blob's current metageneration does not match the given value.
994
995 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
996 :param retry:
997 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
998
999 :rtype: :class:`google.cloud.storage.bucket.Bucket` or ``NoneType``
1000 :returns: The bucket matching the name provided or None if not found.
1001 """
1002 with create_trace_span(name="Storage.Client.lookupBucket"):
1003 try:
1004 return self.get_bucket(
1005 bucket_name,
1006 timeout=timeout,
1007 if_metageneration_match=if_metageneration_match,
1008 if_metageneration_not_match=if_metageneration_not_match,
1009 retry=retry,
1010 )
1011 except NotFound:
1012 return None
1013
1014 def create_bucket(
1015 self,
1016 bucket_or_name,
1017 requester_pays=None,
1018 project=None,
1019 user_project=None,
1020 location=None,
1021 data_locations=None,
1022 predefined_acl=None,
1023 predefined_default_object_acl=None,
1024 enable_object_retention=False,
1025 timeout=_DEFAULT_TIMEOUT,
1026 retry=DEFAULT_RETRY,
1027 ):
1028 """Create a new bucket via a POST request.
1029
1030 See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/buckets/insert) and a [code sample](https://cloud.google.com/storage/docs/samples/storage-create-bucket#storage_create_bucket-python).
1031
1032 Args:
1033 bucket_or_name (Union[ \
1034 :class:`~google.cloud.storage.bucket.Bucket`, \
1035 str, \
1036 ]):
1037 The bucket resource to pass or name to create.
1038 requester_pays (bool):
1039 DEPRECATED. Use Bucket().requester_pays instead.
1040 (Optional) Whether requester pays for API requests for
1041 this bucket and its blobs.
1042 project (str):
1043 (Optional) The project under which the bucket is to be created.
1044 If not passed, uses the project set on the client.
1045 user_project (str):
1046 (Optional) The project ID to be billed for API requests
1047 made via created bucket.
1048 location (str):
1049 (Optional) The location of the bucket. If not passed,
1050 the default location, US, will be used. If specifying a dual-region,
1051 `data_locations` should be set in conjunction. See:
1052 https://cloud.google.com/storage/docs/locations
1053 data_locations (list of str):
1054 (Optional) The list of regional locations of a custom dual-region bucket.
1055 Dual-regions require exactly 2 regional locations. See:
1056 https://cloud.google.com/storage/docs/locations
1057 predefined_acl (str):
1058 (Optional) Name of predefined ACL to apply to bucket. See:
1059 https://cloud.google.com/storage/docs/access-control/lists#predefined-acl
1060 predefined_default_object_acl (str):
1061 (Optional) Name of predefined ACL to apply to bucket's objects. See:
1062 https://cloud.google.com/storage/docs/access-control/lists#predefined-acl
1063 enable_object_retention (bool):
1064 (Optional) Whether object retention should be enabled on this bucket. See:
1065 https://cloud.google.com/storage/docs/object-lock
1066 timeout (Optional[Union[float, Tuple[float, float]]]):
1067 The amount of time, in seconds, to wait for the server response.
1068
1069 Can also be passed as a tuple (connect_timeout, read_timeout).
1070 See :meth:`requests.Session.request` documentation for details.
1071
1072 retry (Optional[Union[google.api_core.retry.Retry, google.cloud.storage.retry.ConditionalRetryPolicy]]):
1073 How to retry the RPC. A None value will disable retries.
1074 A google.api_core.retry.Retry value will enable retries, and the object will
1075 define retriable response codes and errors and configure backoff and timeout options.
1076
1077 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a Retry object and
1078 activates it only if certain conditions are met. This class exists to provide safe defaults
1079 for RPC calls that are not technically safe to retry normally (due to potential data
1080 duplication or other side-effects) but become safe to retry if a condition such as
1081 if_metageneration_match is set.
1082
1083 See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for
1084 information on retry types and how to configure them.
1085
1086 Returns:
1087 google.cloud.storage.bucket.Bucket
1088 The newly created bucket.
1089
1090 Raises:
1091 google.cloud.exceptions.Conflict
1092 If the bucket already exists.
1093 """
1094 with create_trace_span(name="Storage.Client.createBucket"):
1095 bucket = self._bucket_arg_to_bucket(bucket_or_name)
1096 query_params = {}
1097
1098 if project is None:
1099 project = self.project
1100
1101 # Use no project if STORAGE_EMULATOR_HOST is set
1102 if self._is_emulator_set:
1103 if project is None:
1104 project = _get_environ_project()
1105 if project is None:
1106 project = "<none>"
1107
1108 # Only include the project parameter if a project is set.
1109 # If a project is not set, falls back to API validation (BadRequest).
1110 if project is not None:
1111 query_params = {"project": project}
1112
1113 if requester_pays is not None:
1114 warnings.warn(
1115 "requester_pays arg is deprecated. Use Bucket().requester_pays instead.",
1116 PendingDeprecationWarning,
1117 stacklevel=1,
1118 )
1119 bucket.requester_pays = requester_pays
1120
1121 if predefined_acl is not None:
1122 predefined_acl = BucketACL.validate_predefined(predefined_acl)
1123 query_params["predefinedAcl"] = predefined_acl
1124
1125 if predefined_default_object_acl is not None:
1126 predefined_default_object_acl = DefaultObjectACL.validate_predefined(
1127 predefined_default_object_acl
1128 )
1129 query_params[
1130 "predefinedDefaultObjectAcl"
1131 ] = predefined_default_object_acl
1132
1133 if user_project is not None:
1134 query_params["userProject"] = user_project
1135
1136 if enable_object_retention:
1137 query_params["enableObjectRetention"] = enable_object_retention
1138
1139 properties = {key: bucket._properties[key] for key in bucket._changes}
1140 properties["name"] = bucket.name
1141
1142 if location is not None:
1143 properties["location"] = location
1144
1145 if data_locations is not None:
1146 properties["customPlacementConfig"] = {"dataLocations": data_locations}
1147
1148 api_response = self._post_resource(
1149 "/b",
1150 properties,
1151 query_params=query_params,
1152 timeout=timeout,
1153 retry=retry,
1154 _target_object=bucket,
1155 )
1156
1157 bucket._set_properties(api_response)
1158 return bucket
1159
1160 def download_blob_to_file(
1161 self,
1162 blob_or_uri,
1163 file_obj,
1164 start=None,
1165 end=None,
1166 raw_download=False,
1167 if_etag_match=None,
1168 if_etag_not_match=None,
1169 if_generation_match=None,
1170 if_generation_not_match=None,
1171 if_metageneration_match=None,
1172 if_metageneration_not_match=None,
1173 timeout=_DEFAULT_TIMEOUT,
1174 checksum="auto",
1175 retry=DEFAULT_RETRY,
1176 single_shot_download=False,
1177 ):
1178 """Download the contents of a blob object or blob URI into a file-like object.
1179
1180 See https://cloud.google.com/storage/docs/downloading-objects
1181
1182 Args:
1183 blob_or_uri (Union[ \
1184 :class:`~google.cloud.storage.blob.Blob`, \
1185 str, \
1186 ]):
1187 The blob resource to pass or URI to download.
1188
1189 file_obj (file):
1190 A file handle to which to write the blob's data.
1191
1192 start (int):
1193 (Optional) The first byte in a range to be downloaded.
1194
1195 end (int):
1196 (Optional) The last byte in a range to be downloaded.
1197
1198 raw_download (bool):
1199 (Optional) If true, download the object without any expansion.
1200
1201 if_etag_match (Union[str, Set[str]]):
1202 (Optional) See :ref:`using-if-etag-match`
1203
1204 if_etag_not_match (Union[str, Set[str]]):
1205 (Optional) See :ref:`using-if-etag-not-match`
1206
1207 if_generation_match (long):
1208 (Optional) See :ref:`using-if-generation-match`
1209
1210 if_generation_not_match (long):
1211 (Optional) See :ref:`using-if-generation-not-match`
1212
1213 if_metageneration_match (long):
1214 (Optional) See :ref:`using-if-metageneration-match`
1215
1216 if_metageneration_not_match (long):
1217 (Optional) See :ref:`using-if-metageneration-not-match`
1218
1219 timeout ([Union[float, Tuple[float, float]]]):
1220 (Optional) The amount of time, in seconds, to wait
1221 for the server response. See: :ref:`configuring_timeouts`
1222
1223 checksum (str):
1224 (Optional) The type of checksum to compute to verify the integrity
1225 of the object. The response headers must contain a checksum of the
1226 requested type. If the headers lack an appropriate checksum (for
1227 instance in the case of transcoded or ranged downloads where the
1228 remote service does not know the correct checksum, including
1229 downloads where chunk_size is set) an INFO-level log will be
1230 emitted. Supported values are "md5", "crc32c", "auto" and None.
1231 The default is "auto", which will try to detect if the C
1232 extension for crc32c is installed and fall back to md5 otherwise.
1233
1234 retry (google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy)
1235 (Optional) How to retry the RPC. A None value will disable
1236 retries. A google.api_core.retry.Retry value will enable retries,
1237 and the object will define retriable response codes and errors and
1238 configure backoff and timeout options.
1239
1240 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
1241 Retry object and activates it only if certain conditions are met.
1242 This class exists to provide safe defaults for RPC calls that are
1243 not technically safe to retry normally (due to potential data
1244 duplication or other side-effects) but become safe to retry if a
1245 condition such as if_metageneration_match is set.
1246
1247 See the retry.py source code and docstrings in this package
1248 (google.cloud.storage.retry) for information on retry types and how
1249 to configure them.
1250
1251 single_shot_download (bool):
1252 (Optional) If true, download the object in a single request.
1253 """
1254 with create_trace_span(name="Storage.Client.downloadBlobToFile"):
1255 if not isinstance(blob_or_uri, Blob):
1256 blob_or_uri = Blob.from_uri(blob_or_uri)
1257
1258 blob_or_uri._prep_and_do_download(
1259 file_obj,
1260 client=self,
1261 start=start,
1262 end=end,
1263 raw_download=raw_download,
1264 if_etag_match=if_etag_match,
1265 if_etag_not_match=if_etag_not_match,
1266 if_generation_match=if_generation_match,
1267 if_generation_not_match=if_generation_not_match,
1268 if_metageneration_match=if_metageneration_match,
1269 if_metageneration_not_match=if_metageneration_not_match,
1270 timeout=timeout,
1271 checksum=checksum,
1272 retry=retry,
1273 single_shot_download=single_shot_download,
1274 )
1275
1276 def list_blobs(
1277 self,
1278 bucket_or_name,
1279 max_results=None,
1280 page_token=None,
1281 prefix=None,
1282 delimiter=None,
1283 start_offset=None,
1284 end_offset=None,
1285 include_trailing_delimiter=None,
1286 versions=None,
1287 projection="noAcl",
1288 fields=None,
1289 page_size=None,
1290 timeout=_DEFAULT_TIMEOUT,
1291 retry=DEFAULT_RETRY,
1292 match_glob=None,
1293 include_folders_as_prefixes=None,
1294 soft_deleted=None,
1295 ):
1296 """Return an iterator used to find blobs in the bucket.
1297
1298 If :attr:`user_project` is set, bills the API request to that project.
1299
1300 .. note::
1301 List prefixes (directories) in a bucket using a prefix and delimiter.
1302 See a [code sample](https://cloud.google.com/storage/docs/samples/storage-list-files-with-prefix#storage_list_files_with_prefix-python)
1303 listing objects using a prefix filter.
1304
1305 Args:
1306 bucket_or_name (Union[ \
1307 :class:`~google.cloud.storage.bucket.Bucket`, \
1308 str, \
1309 ]):
1310 The bucket resource to pass or name to create.
1311
1312 max_results (int):
1313 (Optional) The maximum number of blobs to return.
1314
1315 page_token (str):
1316 (Optional) If present, return the next batch of blobs, using the
1317 value, which must correspond to the ``nextPageToken`` value
1318 returned in the previous response. Deprecated: use the ``pages``
1319 property of the returned iterator instead of manually passing the
1320 token.
1321
1322 prefix (str):
1323 (Optional) Prefix used to filter blobs.
1324
1325 delimiter (str):
1326 (Optional) Delimiter, used with ``prefix`` to
1327 emulate hierarchy.
1328
1329 start_offset (str):
1330 (Optional) Filter results to objects whose names are
1331 lexicographically equal to or after ``startOffset``. If
1332 ``endOffset`` is also set, the objects listed will have names
1333 between ``startOffset`` (inclusive) and ``endOffset``
1334 (exclusive).
1335
1336 end_offset (str):
1337 (Optional) Filter results to objects whose names are
1338 lexicographically before ``endOffset``. If ``startOffset`` is
1339 also set, the objects listed will have names between
1340 ``startOffset`` (inclusive) and ``endOffset`` (exclusive).
1341
1342 include_trailing_delimiter (boolean):
1343 (Optional) If true, objects that end in exactly one instance of
1344 ``delimiter`` will have their metadata included in ``items`` in
1345 addition to ``prefixes``.
1346
1347 versions (bool):
1348 (Optional) Whether object versions should be returned
1349 as separate blobs.
1350
1351 projection (str):
1352 (Optional) If used, must be 'full' or 'noAcl'.
1353 Defaults to ``'noAcl'``. Specifies the set of
1354 properties to return.
1355
1356 fields (str):
1357 (Optional) Selector specifying which fields to include
1358 in a partial response. Must be a list of fields. For
1359 example to get a partial response with just the next
1360 page token and the name and language of each blob returned:
1361 ``'items(name,contentLanguage),nextPageToken'``.
1362 See: https://cloud.google.com/storage/docs/json_api/v1/parameters#fields
1363
1364 page_size (int):
1365 (Optional) Maximum number of blobs to return in each page.
1366 Defaults to a value set by the API.
1367
1368 timeout (Optional[Union[float, Tuple[float, float]]]):
1369 The amount of time, in seconds, to wait for the server response.
1370
1371 Can also be passed as a tuple (connect_timeout, read_timeout).
1372 See :meth:`requests.Session.request` documentation for details.
1373
1374 retry (Optional[Union[google.api_core.retry.Retry, google.cloud.storage.retry.ConditionalRetryPolicy]]):
1375 How to retry the RPC. A None value will disable retries.
1376 A google.api_core.retry.Retry value will enable retries, and the object will
1377 define retriable response codes and errors and configure backoff and timeout options.
1378
1379 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a Retry object and
1380 activates it only if certain conditions are met. This class exists to provide safe defaults
1381 for RPC calls that are not technically safe to retry normally (due to potential data
1382 duplication or other side-effects) but become safe to retry if a condition such as
1383 if_metageneration_match is set.
1384
1385 See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for
1386 information on retry types and how to configure them.
1387
1388 match_glob (str):
1389 (Optional) A glob pattern used to filter results (for example, foo*bar).
1390 The string value must be UTF-8 encoded. See:
1391 https://cloud.google.com/storage/docs/json_api/v1/objects/list#list-object-glob
1392
1393 include_folders_as_prefixes (bool):
1394 (Optional) If true, includes Folders and Managed Folders in the set of
1395 ``prefixes`` returned by the query. Only applicable if ``delimiter`` is set to /.
1396 See: https://cloud.google.com/storage/docs/managed-folders
1397
1398 soft_deleted (bool):
1399 (Optional) If true, only soft-deleted objects will be listed as distinct results in order of increasing
1400 generation number. This parameter can only be used successfully if the bucket has a soft delete policy.
1401 Note ``soft_deleted`` and ``versions`` cannot be set to True simultaneously. See:
1402 https://cloud.google.com/storage/docs/soft-delete
1403
1404 Returns:
1405 Iterator of all :class:`~google.cloud.storage.blob.Blob`
1406 in this bucket matching the arguments. The RPC call
1407 returns a response when the iterator is consumed.
1408
1409 As part of the response, you'll also get back an iterator.prefixes entity that lists object names
1410 up to and including the requested delimiter. Duplicate entries are omitted from this list.
1411 """
1412 with create_trace_span(name="Storage.Client.listBlobs"):
1413 bucket = self._bucket_arg_to_bucket(bucket_or_name)
1414
1415 extra_params = {"projection": projection}
1416
1417 if prefix is not None:
1418 extra_params["prefix"] = prefix
1419
1420 if delimiter is not None:
1421 extra_params["delimiter"] = delimiter
1422
1423 if match_glob is not None:
1424 extra_params["matchGlob"] = match_glob
1425
1426 if start_offset is not None:
1427 extra_params["startOffset"] = start_offset
1428
1429 if end_offset is not None:
1430 extra_params["endOffset"] = end_offset
1431
1432 if include_trailing_delimiter is not None:
1433 extra_params["includeTrailingDelimiter"] = include_trailing_delimiter
1434
1435 if versions is not None:
1436 extra_params["versions"] = versions
1437
1438 if fields is not None:
1439 extra_params["fields"] = fields
1440
1441 if include_folders_as_prefixes is not None:
1442 extra_params["includeFoldersAsPrefixes"] = include_folders_as_prefixes
1443
1444 if soft_deleted is not None:
1445 extra_params["softDeleted"] = soft_deleted
1446
1447 if bucket.user_project is not None:
1448 extra_params["userProject"] = bucket.user_project
1449
1450 path = bucket.path + "/o"
1451 iterator = self._list_resource(
1452 path,
1453 _item_to_blob,
1454 page_token=page_token,
1455 max_results=max_results,
1456 extra_params=extra_params,
1457 page_start=_blobs_page_start,
1458 page_size=page_size,
1459 timeout=timeout,
1460 retry=retry,
1461 )
1462 iterator.bucket = bucket
1463 iterator.prefixes = set()
1464 return iterator
1465
1466 def list_buckets(
1467 self,
1468 max_results=None,
1469 page_token=None,
1470 prefix=None,
1471 projection="noAcl",
1472 fields=None,
1473 project=None,
1474 page_size=None,
1475 timeout=_DEFAULT_TIMEOUT,
1476 retry=DEFAULT_RETRY,
1477 *,
1478 soft_deleted=None,
1479 return_partial_success=None,
1480 ):
1481 """Get all buckets in the project associated to the client.
1482
1483 This will not populate the list of blobs available in each
1484 bucket.
1485
1486 See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/buckets/list) and a [code sample](https://cloud.google.com/storage/docs/samples/storage-list-buckets#storage_list_buckets-python).
1487
1488 :type max_results: int
1489 :param max_results: (Optional) The maximum number of buckets to return.
1490
1491 :type page_token: str
1492 :param page_token:
1493 (Optional) If present, return the next batch of buckets, using the
1494 value, which must correspond to the ``nextPageToken`` value
1495 returned in the previous response. Deprecated: use the ``pages``
1496 property of the returned iterator instead of manually passing the
1497 token.
1498
1499 :type prefix: str
1500 :param prefix: (Optional) Filter results to buckets whose names begin
1501 with this prefix.
1502
1503 :type projection: str
1504 :param projection:
1505 (Optional) Specifies the set of properties to return. If used, must
1506 be 'full' or 'noAcl'. Defaults to 'noAcl'.
1507
1508 :type fields: str
1509 :param fields:
1510 (Optional) Selector specifying which fields to include in a partial
1511 response. Must be a list of fields. For example to get a partial
1512 response with just the next page token and the language of each
1513 bucket returned: 'items/id,nextPageToken'
1514
1515 :type project: str
1516 :param project: (Optional) The project whose buckets are to be listed.
1517 If not passed, uses the project set on the client.
1518
1519 :type page_size: int
1520 :param page_size: (Optional) Maximum number of buckets to return in each page.
1521 Defaults to a value set by the API.
1522
1523 :type timeout: float or tuple
1524 :param timeout:
1525 (Optional) The amount of time, in seconds, to wait
1526 for the server response. See: :ref:`configuring_timeouts`
1527
1528 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1529 :param retry:
1530 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
1531
1532 :type soft_deleted: bool
1533 :param soft_deleted:
1534 (Optional) If true, only soft-deleted buckets will be listed as distinct results in order of increasing
1535 generation number. This parameter can only be used successfully if the bucket has a soft delete policy.
1536 See: https://cloud.google.com/storage/docs/soft-delete
1537
1538 :type return_partial_success: bool
1539 :param return_partial_success:
1540 (Optional) If True, the response will also contain a list of
1541 unreachable buckets if the buckets are unavailable. The
1542 unreachable buckets will be available on the ``unreachable``
1543 attribute of the returned iterator.
1544
1545 :rtype: :class:`~google.api_core.page_iterator.Iterator`
1546 :raises ValueError: if both ``project`` is ``None`` and the client's
1547 project is also ``None``.
1548 :returns: Iterator of all :class:`~google.cloud.storage.bucket.Bucket`
1549 belonging to this project.
1550 """
1551 with create_trace_span(name="Storage.Client.listBuckets"):
1552 extra_params = {}
1553
1554 if project is None:
1555 project = self.project
1556
1557 # Use no project if STORAGE_EMULATOR_HOST is set
1558 if self._is_emulator_set:
1559 if project is None:
1560 project = _get_environ_project()
1561 if project is None:
1562 project = "<none>"
1563
1564 # Only include the project parameter if a project is set.
1565 # If a project is not set, falls back to API validation (BadRequest).
1566 if project is not None:
1567 extra_params = {"project": project}
1568
1569 if prefix is not None:
1570 extra_params["prefix"] = prefix
1571
1572 extra_params["projection"] = projection
1573
1574 if fields is not None:
1575 extra_params["fields"] = fields
1576
1577 if soft_deleted is not None:
1578 extra_params["softDeleted"] = soft_deleted
1579
1580 if return_partial_success is not None:
1581 extra_params["returnPartialSuccess"] = return_partial_success
1582
1583 iterator = self._list_resource(
1584 "/b",
1585 _item_to_bucket,
1586 page_token=page_token,
1587 max_results=max_results,
1588 extra_params=extra_params,
1589 page_size=page_size,
1590 timeout=timeout,
1591 retry=retry,
1592 page_start=_buckets_page_start,
1593 )
1594 return iterator
1595
1596 def restore_bucket(
1597 self,
1598 bucket_name,
1599 generation,
1600 projection="noAcl",
1601 if_metageneration_match=None,
1602 if_metageneration_not_match=None,
1603 timeout=_DEFAULT_TIMEOUT,
1604 retry=DEFAULT_RETRY,
1605 ):
1606 """Restores a soft-deleted bucket.
1607
1608 :type bucket_name: str
1609 :param bucket_name: The name of the bucket to be restored.
1610
1611 :type generation: int
1612 :param generation: Selects the specific revision of the bucket.
1613
1614 :type projection: str
1615 :param projection:
1616 (Optional) Specifies the set of properties to return. If used, must
1617 be 'full' or 'noAcl'. Defaults to 'noAcl'.
1618
1619 if_metageneration_match (Optional[int]):
1620 Make the operation conditional on whether the
1621 blob's current metageneration matches the given value.
1622
1623 if_metageneration_not_match (Optional[int]):
1624 Make the operation conditional on whether the blob's
1625 current metageneration does not match the given value.
1626
1627 :type timeout: float or tuple
1628 :param timeout:
1629 (Optional) The amount of time, in seconds, to wait
1630 for the server response. See: :ref:`configuring_timeouts`
1631
1632 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1633 :param retry:
1634 (Optional) How to retry the RPC.
1635
1636 Users can configure non-default retry behavior. A ``None`` value will
1637 disable retries. See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
1638
1639 :rtype: :class:`google.cloud.storage.bucket.Bucket`
1640 :returns: The restored Bucket.
1641 """
1642 query_params = {"generation": generation, "projection": projection}
1643
1644 _add_generation_match_parameters(
1645 query_params,
1646 if_metageneration_match=if_metageneration_match,
1647 if_metageneration_not_match=if_metageneration_not_match,
1648 )
1649
1650 bucket = self.bucket(bucket_name)
1651 api_response = self._post_resource(
1652 f"{bucket.path}/restore",
1653 None,
1654 query_params=query_params,
1655 timeout=timeout,
1656 retry=retry,
1657 )
1658 bucket._set_properties(api_response)
1659 return bucket
1660
1661 def create_hmac_key(
1662 self,
1663 service_account_email,
1664 project_id=None,
1665 user_project=None,
1666 timeout=_DEFAULT_TIMEOUT,
1667 retry=None,
1668 ):
1669 """Create an HMAC key for a service account.
1670
1671 :type service_account_email: str
1672 :param service_account_email: e-mail address of the service account
1673
1674 :type project_id: str
1675 :param project_id: (Optional) Explicit project ID for the key.
1676 Defaults to the client's project.
1677
1678 :type user_project: str
1679 :param user_project: (Optional) This parameter is currently ignored.
1680
1681 :type timeout: float or tuple
1682 :param timeout:
1683 (Optional) The amount of time, in seconds, to wait
1684 for the server response. See: :ref:`configuring_timeouts`
1685
1686 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1687 :param retry: (Optional) How to retry the RPC. A None value will disable retries.
1688 A google.api_core.retry.Retry value will enable retries, and the object will
1689 define retriable response codes and errors and configure backoff and timeout options.
1690
1691 A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a Retry object and
1692 activates it only if certain conditions are met. This class exists to provide safe defaults
1693 for RPC calls that are not technically safe to retry normally (due to potential data
1694 duplication or other side-effects) but become safe to retry if a condition such as
1695 if_metageneration_match is set.
1696
1697 See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for
1698 information on retry types and how to configure them.
1699
1700 :rtype:
1701 Tuple[:class:`~google.cloud.storage.hmac_key.HMACKeyMetadata`, str]
1702 :returns: metadata for the created key, plus the bytes of the key's secret, which is an 40-character base64-encoded string.
1703 """
1704 with create_trace_span(name="Storage.Client.createHmacKey"):
1705 if project_id is None:
1706 project_id = self.project
1707
1708 path = f"/projects/{project_id}/hmacKeys"
1709 qs_params = {"serviceAccountEmail": service_account_email}
1710
1711 if user_project is not None:
1712 qs_params["userProject"] = user_project
1713
1714 api_response = self._post_resource(
1715 path,
1716 None,
1717 query_params=qs_params,
1718 timeout=timeout,
1719 retry=retry,
1720 )
1721 metadata = HMACKeyMetadata(self)
1722 metadata._properties = api_response["metadata"]
1723 secret = api_response["secret"]
1724 return metadata, secret
1725
1726 def list_hmac_keys(
1727 self,
1728 max_results=None,
1729 service_account_email=None,
1730 show_deleted_keys=None,
1731 project_id=None,
1732 user_project=None,
1733 timeout=_DEFAULT_TIMEOUT,
1734 retry=DEFAULT_RETRY,
1735 ):
1736 """List HMAC keys for a project.
1737
1738 :type max_results: int
1739 :param max_results:
1740 (Optional) Max number of keys to return in a given page.
1741
1742 :type service_account_email: str
1743 :param service_account_email:
1744 (Optional) Limit keys to those created by the given service account.
1745
1746 :type show_deleted_keys: bool
1747 :param show_deleted_keys:
1748 (Optional) Included deleted keys in the list. Default is to
1749 exclude them.
1750
1751 :type project_id: str
1752 :param project_id: (Optional) Explicit project ID for the key.
1753 Defaults to the client's project.
1754
1755 :type user_project: str
1756 :param user_project: (Optional) This parameter is currently ignored.
1757
1758 :type timeout: float or tuple
1759 :param timeout:
1760 (Optional) The amount of time, in seconds, to wait
1761 for the server response. See: :ref:`configuring_timeouts`
1762
1763 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
1764 :param retry:
1765 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
1766
1767 :rtype:
1768 Tuple[:class:`~google.cloud.storage.hmac_key.HMACKeyMetadata`, str]
1769 :returns: metadata for the created key, plus the bytes of the key's secret, which is an 40-character base64-encoded string.
1770 """
1771 with create_trace_span(name="Storage.Client.listHmacKeys"):
1772 if project_id is None:
1773 project_id = self.project
1774
1775 path = f"/projects/{project_id}/hmacKeys"
1776 extra_params = {}
1777
1778 if service_account_email is not None:
1779 extra_params["serviceAccountEmail"] = service_account_email
1780
1781 if show_deleted_keys is not None:
1782 extra_params["showDeletedKeys"] = show_deleted_keys
1783
1784 if user_project is not None:
1785 extra_params["userProject"] = user_project
1786
1787 return self._list_resource(
1788 path,
1789 _item_to_hmac_key_metadata,
1790 max_results=max_results,
1791 extra_params=extra_params,
1792 timeout=timeout,
1793 retry=retry,
1794 )
1795
1796 def get_hmac_key_metadata(
1797 self, access_id, project_id=None, user_project=None, timeout=_DEFAULT_TIMEOUT
1798 ):
1799 """Return a metadata instance for the given HMAC key.
1800
1801 :type access_id: str
1802 :param access_id: Unique ID of an existing key.
1803
1804 :type project_id: str
1805 :param project_id: (Optional) Project ID of an existing key.
1806 Defaults to client's project.
1807
1808 :type timeout: float or tuple
1809 :param timeout:
1810 (Optional) The amount of time, in seconds, to wait
1811 for the server response. See: :ref:`configuring_timeouts`
1812
1813 :type user_project: str
1814 :param user_project: (Optional) This parameter is currently ignored.
1815 """
1816 with create_trace_span(name="Storage.Client.getHmacKeyMetadata"):
1817 metadata = HMACKeyMetadata(self, access_id, project_id, user_project)
1818 metadata.reload(timeout=timeout) # raises NotFound for missing key
1819 return metadata
1820
1821 def generate_signed_post_policy_v4(
1822 self,
1823 bucket_name,
1824 blob_name,
1825 expiration,
1826 conditions=None,
1827 fields=None,
1828 credentials=None,
1829 virtual_hosted_style=False,
1830 bucket_bound_hostname=None,
1831 scheme="http",
1832 service_account_email=None,
1833 access_token=None,
1834 ):
1835 """Generate a V4 signed policy object. Generated policy object allows user to upload objects with a POST request.
1836
1837 .. note::
1838
1839 Assumes ``credentials`` implements the
1840 :class:`google.auth.credentials.Signing` interface. Also assumes
1841 ``credentials`` has a ``service_account_email`` property which
1842 identifies the credentials.
1843
1844 See a [code sample](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_generate_signed_post_policy_v4.py).
1845
1846 :type bucket_name: str
1847 :param bucket_name: Bucket name.
1848
1849 :type blob_name: str
1850 :param blob_name: Object name.
1851
1852 :type expiration: Union[Integer, datetime.datetime, datetime.timedelta]
1853 :param expiration: Policy expiration time. If a ``datetime`` instance is
1854 passed without an explicit ``tzinfo`` set, it will be
1855 assumed to be ``UTC``.
1856
1857 :type conditions: list
1858 :param conditions: (Optional) List of POST policy conditions, which are
1859 used to restrict what is allowed in the request.
1860
1861 :type fields: dict
1862 :param fields: (Optional) Additional elements to include into request.
1863
1864 :type credentials: :class:`google.auth.credentials.Signing`
1865 :param credentials: (Optional) Credentials object with an associated private
1866 key to sign text.
1867
1868 :type virtual_hosted_style: bool
1869 :param virtual_hosted_style:
1870 (Optional) If True, construct the URL relative to the bucket
1871 virtual hostname, e.g., '<bucket-name>.storage.googleapis.com'.
1872 Incompatible with bucket_bound_hostname.
1873
1874 :type bucket_bound_hostname: str
1875 :param bucket_bound_hostname:
1876 (Optional) If passed, construct the URL relative to the bucket-bound hostname.
1877 Value can be bare or with a scheme, e.g., 'example.com' or 'http://example.com'.
1878 Incompatible with virtual_hosted_style.
1879 See: https://cloud.google.com/storage/docs/request-endpoints#cname
1880
1881 :type scheme: str
1882 :param scheme:
1883 (Optional) If ``bucket_bound_hostname`` is passed as a bare hostname, use
1884 this value as a scheme. ``https`` will work only when using a CDN.
1885 Defaults to ``"http"``.
1886
1887 :type service_account_email: str
1888 :param service_account_email: (Optional) E-mail address of the service account.
1889
1890 :type access_token: str
1891 :param access_token: (Optional) Access token for a service account.
1892
1893 :raises: :exc:`ValueError` when mutually exclusive arguments are used.
1894
1895 :rtype: dict
1896 :returns: Signed POST policy.
1897 """
1898 if virtual_hosted_style and bucket_bound_hostname:
1899 raise ValueError(
1900 "Only one of virtual_hosted_style and bucket_bound_hostname "
1901 "can be specified."
1902 )
1903
1904 credentials = self._credentials if credentials is None else credentials
1905 client_email = service_account_email
1906 if not access_token or not service_account_email:
1907 ensure_signed_credentials(credentials)
1908 client_email = credentials.signer_email
1909
1910 # prepare policy conditions and fields
1911 timestamp, datestamp = get_v4_now_dtstamps()
1912
1913 x_goog_credential = "{email}/{datestamp}/auto/storage/goog4_request".format(
1914 email=client_email, datestamp=datestamp
1915 )
1916 required_conditions = [
1917 {"bucket": bucket_name},
1918 {"key": blob_name},
1919 {"x-goog-date": timestamp},
1920 {"x-goog-credential": x_goog_credential},
1921 {"x-goog-algorithm": "GOOG4-RSA-SHA256"},
1922 ]
1923
1924 conditions = conditions or []
1925 policy_fields = {}
1926 for key, value in sorted((fields or {}).items()):
1927 if not key.startswith("x-ignore-"):
1928 policy_fields[key] = value
1929 conditions.append({key: value})
1930
1931 conditions += required_conditions
1932
1933 # calculate policy expiration time
1934 now = _NOW(_UTC).replace(tzinfo=None)
1935 if expiration is None:
1936 expiration = now + datetime.timedelta(hours=1)
1937
1938 policy_expires = now + datetime.timedelta(
1939 seconds=get_expiration_seconds_v4(expiration)
1940 )
1941
1942 # encode policy for signing
1943 policy = json.dumps(
1944 collections.OrderedDict(
1945 sorted(
1946 {
1947 "conditions": conditions,
1948 "expiration": policy_expires.isoformat() + "Z",
1949 }.items()
1950 )
1951 ),
1952 separators=(",", ":"),
1953 )
1954 str_to_sign = base64.b64encode(policy.encode("utf-8"))
1955
1956 # sign the policy and get its cryptographic signature
1957 if access_token and service_account_email:
1958 signature = _sign_message(str_to_sign, access_token, service_account_email)
1959 signature_bytes = base64.b64decode(signature)
1960 else:
1961 signature_bytes = credentials.sign_bytes(str_to_sign)
1962
1963 # get hexadecimal representation of the signature
1964 signature = binascii.hexlify(signature_bytes).decode("utf-8")
1965
1966 policy_fields.update(
1967 {
1968 "key": blob_name,
1969 "x-goog-algorithm": "GOOG4-RSA-SHA256",
1970 "x-goog-credential": x_goog_credential,
1971 "x-goog-date": timestamp,
1972 "x-goog-signature": signature,
1973 "policy": str_to_sign.decode("utf-8"),
1974 }
1975 )
1976 # designate URL
1977 if virtual_hosted_style:
1978 url = _virtual_hosted_style_base_url(
1979 self.api_endpoint, bucket_name, trailing_slash=True
1980 )
1981 elif bucket_bound_hostname:
1982 url = f"{_bucket_bound_hostname_url(bucket_bound_hostname, scheme)}/"
1983 else:
1984 url = f"{self.api_endpoint}/{bucket_name}/"
1985
1986 return {"url": url, "fields": policy_fields}
1987
1988
1989def _item_to_bucket(iterator, item):
1990 """Convert a JSON bucket to the native object.
1991
1992 :type iterator: :class:`~google.api_core.page_iterator.Iterator`
1993 :param iterator: The iterator that has retrieved the item.
1994
1995 :type item: dict
1996 :param item: An item to be converted to a bucket.
1997
1998 :rtype: :class:`.Bucket`
1999 :returns: The next bucket in the page.
2000 """
2001 name = item.get("name")
2002 bucket = Bucket(iterator.client, name)
2003 bucket._set_properties(item)
2004 return bucket
2005
2006
2007def _item_to_hmac_key_metadata(iterator, item):
2008 """Convert a JSON key metadata resource to the native object.
2009
2010 :type iterator: :class:`~google.api_core.page_iterator.Iterator`
2011 :param iterator: The iterator that has retrieved the item.
2012
2013 :type item: dict
2014 :param item: An item to be converted to a key metadata instance.
2015
2016 :rtype: :class:`~google.cloud.storage.hmac_key.HMACKeyMetadata`
2017 :returns: The next key metadata instance in the page.
2018 """
2019 metadata = HMACKeyMetadata(iterator.client)
2020 metadata._properties = item
2021 return metadata