1# Copyright 2014 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Helper functions for Cloud Storage utility classes.
16
17These are *not* part of the API.
18"""
19
20import base64
21import datetime
22from hashlib import md5
23import os
24import sys
25from urllib.parse import urlsplit
26from urllib.parse import urlunsplit
27from uuid import uuid4
28
29from google.auth import environment_vars
30from google.cloud.storage.constants import _DEFAULT_TIMEOUT
31from google.cloud.storage.retry import DEFAULT_RETRY
32from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED
33
34
35STORAGE_EMULATOR_ENV_VAR = "STORAGE_EMULATOR_HOST" # Despite name, includes scheme.
36"""Environment variable defining host for Storage emulator."""
37
38_API_ENDPOINT_OVERRIDE_ENV_VAR = "API_ENDPOINT_OVERRIDE" # Includes scheme.
39"""This is an experimental configuration variable. Use api_endpoint instead."""
40
41_API_VERSION_OVERRIDE_ENV_VAR = "API_VERSION_OVERRIDE"
42"""This is an experimental configuration variable used for internal testing."""
43
44_DEFAULT_UNIVERSE_DOMAIN = "googleapis.com"
45
46_STORAGE_HOST_TEMPLATE = "storage.{universe_domain}"
47
48_TRUE_DEFAULT_STORAGE_HOST = _STORAGE_HOST_TEMPLATE.format(
49 universe_domain=_DEFAULT_UNIVERSE_DOMAIN
50)
51
52_DEFAULT_SCHEME = "https://"
53
54_API_VERSION = os.getenv(_API_VERSION_OVERRIDE_ENV_VAR, "v1")
55"""API version of the default storage host"""
56
57# etag match parameters in snake case and equivalent header
58_ETAG_MATCH_PARAMETERS = (
59 ("if_etag_match", "If-Match"),
60 ("if_etag_not_match", "If-None-Match"),
61)
62
63# generation match parameters in camel and snake cases
64_GENERATION_MATCH_PARAMETERS = (
65 ("if_generation_match", "ifGenerationMatch"),
66 ("if_generation_not_match", "ifGenerationNotMatch"),
67 ("if_metageneration_match", "ifMetagenerationMatch"),
68 ("if_metageneration_not_match", "ifMetagenerationNotMatch"),
69 ("if_source_generation_match", "ifSourceGenerationMatch"),
70 ("if_source_generation_not_match", "ifSourceGenerationNotMatch"),
71 ("if_source_metageneration_match", "ifSourceMetagenerationMatch"),
72 ("if_source_metageneration_not_match", "ifSourceMetagenerationNotMatch"),
73)
74
75# _NOW() returns the current local date and time.
76# It is preferred to use timezone-aware datetimes _NOW(_UTC),
77# which returns the current UTC date and time.
78_NOW = datetime.datetime.now
79_UTC = datetime.timezone.utc
80
81
82def _get_storage_emulator_override():
83 return os.environ.get(STORAGE_EMULATOR_ENV_VAR, None)
84
85
86def _get_default_storage_base_url():
87 return os.getenv(
88 _API_ENDPOINT_OVERRIDE_ENV_VAR, _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST
89 )
90
91
92def _get_api_endpoint_override():
93 """This is an experimental configuration variable. Use api_endpoint instead."""
94 if _get_default_storage_base_url() != _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST:
95 return _get_default_storage_base_url()
96 return None
97
98
99def _virtual_hosted_style_base_url(url, bucket, trailing_slash=False):
100 """Returns the scheme and netloc sections of the url, with the bucket
101 prepended to the netloc.
102
103 Not intended for use with netlocs which include a username and password.
104 """
105 parsed_url = urlsplit(url)
106 new_netloc = f"{bucket}.{parsed_url.netloc}"
107 base_url = urlunsplit(
108 (parsed_url.scheme, new_netloc, "/" if trailing_slash else "", "", "")
109 )
110 return base_url
111
112
113def _use_client_cert():
114 return os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true"
115
116
117def _get_environ_project():
118 return os.getenv(
119 environment_vars.PROJECT,
120 os.getenv(environment_vars.LEGACY_PROJECT),
121 )
122
123
124def _validate_name(name):
125 """Pre-flight ``Bucket`` name validation.
126
127 :type name: str or :data:`NoneType`
128 :param name: Proposed bucket name.
129
130 :rtype: str or :data:`NoneType`
131 :returns: ``name`` if valid.
132 """
133 if name is None:
134 return
135
136 # The first and last characters must be alphanumeric.
137 if not all([name[0].isalnum(), name[-1].isalnum()]):
138 raise ValueError("Bucket names must start and end with a number or letter.")
139 return name
140
141
142class _PropertyMixin(object):
143 """Abstract mixin for cloud storage classes with associated properties.
144
145 Non-abstract subclasses should implement:
146 - path
147 - client
148 - user_project
149
150 :type name: str
151 :param name: The name of the object. Bucket names must start and end with a
152 number or letter.
153 """
154
155 def __init__(self, name=None):
156 self.name = name
157 self._properties = {}
158 self._changes = set()
159
160 @property
161 def path(self):
162 """Abstract getter for the object path."""
163 raise NotImplementedError
164
165 @property
166 def client(self):
167 """Abstract getter for the object client."""
168 raise NotImplementedError
169
170 @property
171 def user_project(self):
172 """Abstract getter for the object user_project."""
173 raise NotImplementedError
174
175 def _require_client(self, client):
176 """Check client or verify over-ride.
177
178 :type client: :class:`~google.cloud.storage.client.Client` or
179 ``NoneType``
180 :param client: the client to use. If not passed, falls back to the
181 ``client`` stored on the current object.
182
183 :rtype: :class:`google.cloud.storage.client.Client`
184 :returns: The client passed in or the currently bound client.
185 """
186 if client is None:
187 client = self.client
188 return client
189
190 def _encryption_headers(self):
191 """Return any encryption headers needed to fetch the object.
192
193 .. note::
194 Defined here because :meth:`reload` calls it, but this method is
195 really only relevant for :class:`~google.cloud.storage.blob.Blob`.
196
197 :rtype: dict
198 :returns: a mapping of encryption-related headers.
199 """
200 return {}
201
202 @property
203 def _query_params(self):
204 """Default query parameters."""
205 params = {}
206 if self.user_project is not None:
207 params["userProject"] = self.user_project
208 return params
209
210 def reload(
211 self,
212 client=None,
213 projection="noAcl",
214 if_etag_match=None,
215 if_etag_not_match=None,
216 if_generation_match=None,
217 if_generation_not_match=None,
218 if_metageneration_match=None,
219 if_metageneration_not_match=None,
220 timeout=_DEFAULT_TIMEOUT,
221 retry=DEFAULT_RETRY,
222 soft_deleted=None,
223 ):
224 """Reload properties from Cloud Storage.
225
226 If :attr:`user_project` is set, bills the API request to that project.
227
228 :type client: :class:`~google.cloud.storage.client.Client` or
229 ``NoneType``
230 :param client: the client to use. If not passed, falls back to the
231 ``client`` stored on the current object.
232
233 :type projection: str
234 :param projection: (Optional) If used, must be 'full' or 'noAcl'.
235 Defaults to ``'noAcl'``. Specifies the set of
236 properties to return.
237
238 :type if_etag_match: Union[str, Set[str]]
239 :param if_etag_match: (Optional) See :ref:`using-if-etag-match`
240
241 :type if_etag_not_match: Union[str, Set[str]])
242 :param if_etag_not_match: (Optional) See :ref:`using-if-etag-not-match`
243
244 :type if_generation_match: long
245 :param if_generation_match:
246 (Optional) See :ref:`using-if-generation-match`
247
248 :type if_generation_not_match: long
249 :param if_generation_not_match:
250 (Optional) See :ref:`using-if-generation-not-match`
251
252 :type if_metageneration_match: long
253 :param if_metageneration_match:
254 (Optional) See :ref:`using-if-metageneration-match`
255
256 :type if_metageneration_not_match: long
257 :param if_metageneration_not_match:
258 (Optional) See :ref:`using-if-metageneration-not-match`
259
260 :type timeout: float or tuple
261 :param timeout:
262 (Optional) The amount of time, in seconds, to wait
263 for the server response. See: :ref:`configuring_timeouts`
264
265 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
266 :param retry:
267 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
268
269 :type soft_deleted: bool
270 :param soft_deleted:
271 (Optional) If True, looks for a soft-deleted object. Will only return
272 the object metadata if the object exists and is in a soft-deleted state.
273 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True.
274 See: https://cloud.google.com/storage/docs/soft-delete
275 """
276 client = self._require_client(client)
277 query_params = self._query_params
278 # Pass only '?projection=noAcl' here because 'acl' and related
279 # are handled via custom endpoints.
280 query_params["projection"] = projection
281 _add_generation_match_parameters(
282 query_params,
283 if_generation_match=if_generation_match,
284 if_generation_not_match=if_generation_not_match,
285 if_metageneration_match=if_metageneration_match,
286 if_metageneration_not_match=if_metageneration_not_match,
287 )
288 if soft_deleted is not None:
289 query_params["softDeleted"] = soft_deleted
290 # Soft delete reload requires a generation, even for targets
291 # that don't include them in default query params (buckets).
292 query_params["generation"] = self.generation
293 headers = self._encryption_headers()
294 _add_etag_match_headers(
295 headers, if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match
296 )
297 api_response = client._get_resource(
298 self.path,
299 query_params=query_params,
300 headers=headers,
301 timeout=timeout,
302 retry=retry,
303 _target_object=self,
304 )
305 self._set_properties(api_response)
306
307 def _patch_property(self, name, value):
308 """Update field of this object's properties.
309
310 This method will only update the field provided and will not
311 touch the other fields.
312
313 It **will not** reload the properties from the server. The behavior is
314 local only and syncing occurs via :meth:`patch`.
315
316 :type name: str
317 :param name: The field name to update.
318
319 :type value: object
320 :param value: The value being updated.
321 """
322 self._changes.add(name)
323 self._properties[name] = value
324
325 def _set_properties(self, value):
326 """Set the properties for the current object.
327
328 :type value: dict or :class:`google.cloud.storage.batch._FutureDict`
329 :param value: The properties to be set.
330 """
331 self._properties = value
332 # If the values are reset, the changes must as well.
333 self._changes = set()
334
335 def patch(
336 self,
337 client=None,
338 if_generation_match=None,
339 if_generation_not_match=None,
340 if_metageneration_match=None,
341 if_metageneration_not_match=None,
342 timeout=_DEFAULT_TIMEOUT,
343 retry=DEFAULT_RETRY,
344 override_unlocked_retention=False,
345 ):
346 """Sends all changed properties in a PATCH request.
347
348 Updates the ``_properties`` with the response from the backend.
349
350 If :attr:`user_project` is set, bills the API request to that project.
351
352 :type client: :class:`~google.cloud.storage.client.Client` or
353 ``NoneType``
354 :param client: the client to use. If not passed, falls back to the
355 ``client`` stored on the current object.
356
357 :type if_generation_match: long
358 :param if_generation_match:
359 (Optional) See :ref:`using-if-generation-match`
360
361 :type if_generation_not_match: long
362 :param if_generation_not_match:
363 (Optional) See :ref:`using-if-generation-not-match`
364
365 :type if_metageneration_match: long
366 :param if_metageneration_match:
367 (Optional) See :ref:`using-if-metageneration-match`
368
369 :type if_metageneration_not_match: long
370 :param if_metageneration_not_match:
371 (Optional) See :ref:`using-if-metageneration-not-match`
372
373 :type timeout: float or tuple
374 :param timeout:
375 (Optional) The amount of time, in seconds, to wait
376 for the server response. See: :ref:`configuring_timeouts`
377
378 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
379 :param retry:
380 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
381
382 :type override_unlocked_retention: bool
383 :param override_unlocked_retention:
384 (Optional) override_unlocked_retention must be set to True if the operation includes
385 a retention property that changes the mode from Unlocked to Locked, reduces the
386 retainUntilTime, or removes the retention configuration from the object. See:
387 https://cloud.google.com/storage/docs/json_api/v1/objects/patch
388 """
389 client = self._require_client(client)
390 query_params = self._query_params
391 # Pass '?projection=full' here because 'PATCH' documented not
392 # to work properly w/ 'noAcl'.
393 query_params["projection"] = "full"
394 if override_unlocked_retention:
395 query_params["overrideUnlockedRetention"] = override_unlocked_retention
396 _add_generation_match_parameters(
397 query_params,
398 if_generation_match=if_generation_match,
399 if_generation_not_match=if_generation_not_match,
400 if_metageneration_match=if_metageneration_match,
401 if_metageneration_not_match=if_metageneration_not_match,
402 )
403 update_properties = {key: self._properties[key] for key in self._changes}
404
405 # Make the API call.
406 api_response = client._patch_resource(
407 self.path,
408 update_properties,
409 query_params=query_params,
410 _target_object=self,
411 timeout=timeout,
412 retry=retry,
413 )
414 self._set_properties(api_response)
415
416 def update(
417 self,
418 client=None,
419 if_generation_match=None,
420 if_generation_not_match=None,
421 if_metageneration_match=None,
422 if_metageneration_not_match=None,
423 timeout=_DEFAULT_TIMEOUT,
424 retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED,
425 override_unlocked_retention=False,
426 ):
427 """Sends all properties in a PUT request.
428
429 Updates the ``_properties`` with the response from the backend.
430
431 If :attr:`user_project` is set, bills the API request to that project.
432
433 :type client: :class:`~google.cloud.storage.client.Client` or
434 ``NoneType``
435 :param client: the client to use. If not passed, falls back to the
436 ``client`` stored on the current object.
437
438 :type if_generation_match: long
439 :param if_generation_match:
440 (Optional) See :ref:`using-if-generation-match`
441
442 :type if_generation_not_match: long
443 :param if_generation_not_match:
444 (Optional) See :ref:`using-if-generation-not-match`
445
446 :type if_metageneration_match: long
447 :param if_metageneration_match:
448 (Optional) See :ref:`using-if-metageneration-match`
449
450 :type if_metageneration_not_match: long
451 :param if_metageneration_not_match:
452 (Optional) See :ref:`using-if-metageneration-not-match`
453
454 :type timeout: float or tuple
455 :param timeout:
456 (Optional) The amount of time, in seconds, to wait
457 for the server response. See: :ref:`configuring_timeouts`
458
459 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
460 :param retry:
461 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
462
463 :type override_unlocked_retention: bool
464 :param override_unlocked_retention:
465 (Optional) override_unlocked_retention must be set to True if the operation includes
466 a retention property that changes the mode from Unlocked to Locked, reduces the
467 retainUntilTime, or removes the retention configuration from the object. See:
468 https://cloud.google.com/storage/docs/json_api/v1/objects/patch
469 """
470 client = self._require_client(client)
471
472 query_params = self._query_params
473 query_params["projection"] = "full"
474 if override_unlocked_retention:
475 query_params["overrideUnlockedRetention"] = override_unlocked_retention
476 _add_generation_match_parameters(
477 query_params,
478 if_generation_match=if_generation_match,
479 if_generation_not_match=if_generation_not_match,
480 if_metageneration_match=if_metageneration_match,
481 if_metageneration_not_match=if_metageneration_not_match,
482 )
483
484 api_response = client._put_resource(
485 self.path,
486 self._properties,
487 query_params=query_params,
488 timeout=timeout,
489 retry=retry,
490 _target_object=self,
491 )
492 self._set_properties(api_response)
493
494
495def _scalar_property(fieldname):
496 """Create a property descriptor around the :class:`_PropertyMixin` helpers."""
497
498 def _getter(self):
499 """Scalar property getter."""
500 return self._properties.get(fieldname)
501
502 def _setter(self, value):
503 """Scalar property setter."""
504 self._patch_property(fieldname, value)
505
506 return property(_getter, _setter)
507
508
509def _write_buffer_to_hash(buffer_object, hash_obj, digest_block_size=8192):
510 """Read blocks from a buffer and update a hash with them.
511
512 :type buffer_object: bytes buffer
513 :param buffer_object: Buffer containing bytes used to update a hash object.
514
515 :type hash_obj: object that implements update
516 :param hash_obj: A hash object (MD5 or CRC32-C).
517
518 :type digest_block_size: int
519 :param digest_block_size: The block size to write to the hash.
520 Defaults to 8192.
521 """
522 block = buffer_object.read(digest_block_size)
523
524 while len(block) > 0:
525 hash_obj.update(block)
526 # Update the block for the next iteration.
527 block = buffer_object.read(digest_block_size)
528
529
530def _base64_md5hash(buffer_object):
531 """Get MD5 hash of bytes (as base64).
532
533 :type buffer_object: bytes buffer
534 :param buffer_object: Buffer containing bytes used to compute an MD5
535 hash (as base64).
536
537 :rtype: str
538 :returns: A base64 encoded digest of the MD5 hash.
539 """
540 if sys.version_info >= (3, 9):
541 hash_obj = md5(usedforsecurity=False)
542 else:
543 hash_obj = md5()
544 _write_buffer_to_hash(buffer_object, hash_obj)
545 digest_bytes = hash_obj.digest()
546 return base64.b64encode(digest_bytes)
547
548
549def _add_etag_match_headers(headers, **match_parameters):
550 """Add generation match parameters into the given parameters list.
551
552 :type headers: dict
553 :param headers: Headers dict.
554
555 :type match_parameters: dict
556 :param match_parameters: if*etag*match parameters to add.
557 """
558 for snakecase_name, header_name in _ETAG_MATCH_PARAMETERS:
559 value = match_parameters.get(snakecase_name)
560
561 if value is not None:
562 if isinstance(value, str):
563 value = [value]
564 headers[header_name] = ", ".join(value)
565
566
567def _add_generation_match_parameters(parameters, **match_parameters):
568 """Add generation match parameters into the given parameters list.
569
570 :type parameters: list or dict
571 :param parameters: Parameters list or dict.
572
573 :type match_parameters: dict
574 :param match_parameters: if*generation*match parameters to add.
575
576 :raises: :exc:`ValueError` if ``parameters`` is not a ``list()``
577 or a ``dict()``.
578 """
579 for snakecase_name, camelcase_name in _GENERATION_MATCH_PARAMETERS:
580 value = match_parameters.get(snakecase_name)
581
582 if value is not None:
583 if isinstance(parameters, list):
584 parameters.append((camelcase_name, value))
585
586 elif isinstance(parameters, dict):
587 parameters[camelcase_name] = value
588
589 else:
590 raise ValueError(
591 "`parameters` argument should be a dict() or a list()."
592 )
593
594
595def _raise_if_more_than_one_set(**kwargs):
596 """Raise ``ValueError`` exception if more than one parameter was set.
597
598 :type error: :exc:`ValueError`
599 :param error: Description of which fields were set
600
601 :raises: :class:`~ValueError` containing the fields that were set
602 """
603 if sum(arg is not None for arg in kwargs.values()) > 1:
604 escaped_keys = [f"'{name}'" for name in kwargs.keys()]
605
606 keys_but_last = ", ".join(escaped_keys[:-1])
607 last_key = escaped_keys[-1]
608
609 msg = f"Pass at most one of {keys_but_last} and {last_key}"
610
611 raise ValueError(msg)
612
613
614def _bucket_bound_hostname_url(host, scheme=None):
615 """Helper to build bucket bound hostname URL.
616
617 :type host: str
618 :param host: Host name.
619
620 :type scheme: str
621 :param scheme: (Optional) Web scheme. If passed, use it
622 as a scheme in the result URL.
623
624 :rtype: str
625 :returns: A bucket bound hostname URL.
626 """
627 url_parts = urlsplit(host)
628 if url_parts.scheme and url_parts.netloc:
629 return host
630
631 return f"{scheme}://{host}"
632
633
634def _get_invocation_id():
635 return "gccl-invocation-id/" + str(uuid4())
636
637
638def _get_default_headers(
639 user_agent,
640 content_type="application/json; charset=UTF-8",
641 x_upload_content_type=None,
642 command=None,
643):
644 """Get the headers for a request.
645
646 :type user_agent: str
647 :param user_agent: The user-agent for requests.
648
649 :type command: str
650 :param command:
651 (Optional) Information about which interface for the operation was
652 used, to be included in the X-Goog-API-Client header. Please leave
653 as None unless otherwise directed.
654
655 :rtype: dict
656 :returns: The headers to be used for the request.
657 """
658 x_goog_api_client = f"{user_agent} {_get_invocation_id()}"
659
660 if command:
661 x_goog_api_client += f" gccl-gcs-cmd/{command}"
662
663 return {
664 "Accept": "application/json",
665 "Accept-Encoding": "gzip, deflate",
666 "User-Agent": user_agent,
667 "X-Goog-API-Client": x_goog_api_client,
668 "content-type": content_type,
669 "x-upload-content-type": x_upload_content_type or content_type,
670 }