1# Copyright 2014 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Helper functions for Cloud Storage utility classes.
16
17These are *not* part of the API.
18"""
19
20import base64
21import datetime
22from hashlib import md5
23import os
24import sys
25import secrets
26from urllib.parse import urlsplit
27from urllib.parse import urlunsplit
28from uuid import uuid4
29
30from google.auth import environment_vars
31from google.cloud.storage.constants import _DEFAULT_TIMEOUT
32from google.cloud.storage.retry import DEFAULT_RETRY
33from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED
34
35
36STORAGE_EMULATOR_ENV_VAR = "STORAGE_EMULATOR_HOST" # Despite name, includes scheme.
37"""Environment variable defining host for Storage emulator."""
38
39_API_ENDPOINT_OVERRIDE_ENV_VAR = "API_ENDPOINT_OVERRIDE" # Includes scheme.
40"""This is an experimental configuration variable. Use api_endpoint instead."""
41
42_API_VERSION_OVERRIDE_ENV_VAR = "API_VERSION_OVERRIDE"
43"""This is an experimental configuration variable used for internal testing."""
44
45_DEFAULT_UNIVERSE_DOMAIN = "googleapis.com"
46
47_STORAGE_HOST_TEMPLATE = "storage.{universe_domain}"
48
49_TRUE_DEFAULT_STORAGE_HOST = _STORAGE_HOST_TEMPLATE.format(
50 universe_domain=_DEFAULT_UNIVERSE_DOMAIN
51)
52
53_DEFAULT_SCHEME = "https://"
54
55_API_VERSION = os.getenv(_API_VERSION_OVERRIDE_ENV_VAR, "v1")
56"""API version of the default storage host"""
57
58# etag match parameters in snake case and equivalent header
59_ETAG_MATCH_PARAMETERS = (
60 ("if_etag_match", "If-Match"),
61 ("if_etag_not_match", "If-None-Match"),
62)
63
64# generation match parameters in camel and snake cases
65_GENERATION_MATCH_PARAMETERS = (
66 ("if_generation_match", "ifGenerationMatch"),
67 ("if_generation_not_match", "ifGenerationNotMatch"),
68 ("if_metageneration_match", "ifMetagenerationMatch"),
69 ("if_metageneration_not_match", "ifMetagenerationNotMatch"),
70 ("if_source_generation_match", "ifSourceGenerationMatch"),
71 ("if_source_generation_not_match", "ifSourceGenerationNotMatch"),
72 ("if_source_metageneration_match", "ifSourceMetagenerationMatch"),
73 ("if_source_metageneration_not_match", "ifSourceMetagenerationNotMatch"),
74)
75
76# _NOW() returns the current local date and time.
77# It is preferred to use timezone-aware datetimes _NOW(_UTC),
78# which returns the current UTC date and time.
79_NOW = datetime.datetime.now
80_UTC = datetime.timezone.utc
81
82
83def _get_storage_emulator_override():
84 return os.environ.get(STORAGE_EMULATOR_ENV_VAR, None)
85
86
87def _get_default_storage_base_url():
88 return os.getenv(
89 _API_ENDPOINT_OVERRIDE_ENV_VAR, _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST
90 )
91
92
93def _get_api_endpoint_override():
94 """This is an experimental configuration variable. Use api_endpoint instead."""
95 if _get_default_storage_base_url() != _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST:
96 return _get_default_storage_base_url()
97 return None
98
99
100def _virtual_hosted_style_base_url(url, bucket, trailing_slash=False):
101 """Returns the scheme and netloc sections of the url, with the bucket
102 prepended to the netloc.
103
104 Not intended for use with netlocs which include a username and password.
105 """
106 parsed_url = urlsplit(url)
107 new_netloc = f"{bucket}.{parsed_url.netloc}"
108 base_url = urlunsplit(
109 (parsed_url.scheme, new_netloc, "/" if trailing_slash else "", "", "")
110 )
111 return base_url
112
113
114def _get_environ_project():
115 return os.getenv(
116 environment_vars.PROJECT,
117 os.getenv(environment_vars.LEGACY_PROJECT),
118 )
119
120
121def _validate_name(name):
122 """Pre-flight ``Bucket`` name validation.
123
124 :type name: str or :data:`NoneType`
125 :param name: Proposed bucket name.
126
127 :rtype: str or :data:`NoneType`
128 :returns: ``name`` if valid.
129 """
130 if name is None:
131 return
132
133 # The first and last characters must be alphanumeric.
134 if not all([name[0].isalnum(), name[-1].isalnum()]):
135 raise ValueError("Bucket names must start and end with a number or letter.")
136 return name
137
138
139class _PropertyMixin(object):
140 """Abstract mixin for cloud storage classes with associated properties.
141
142 Non-abstract subclasses should implement:
143 - path
144 - client
145 - user_project
146
147 :type name: str
148 :param name: The name of the object. Bucket names must start and end with a
149 number or letter.
150 """
151
152 def __init__(self, name=None):
153 self.name = name
154 self._properties = {}
155 self._changes = set()
156
157 @property
158 def path(self):
159 """Abstract getter for the object path."""
160 raise NotImplementedError
161
162 @property
163 def client(self):
164 """Abstract getter for the object client."""
165 raise NotImplementedError
166
167 @property
168 def user_project(self):
169 """Abstract getter for the object user_project."""
170 raise NotImplementedError
171
172 def _require_client(self, client):
173 """Check client or verify over-ride.
174
175 :type client: :class:`~google.cloud.storage.client.Client` or
176 ``NoneType``
177 :param client: the client to use. If not passed, falls back to the
178 ``client`` stored on the current object.
179
180 :rtype: :class:`google.cloud.storage.client.Client`
181 :returns: The client passed in or the currently bound client.
182 """
183 if client is None:
184 client = self.client
185 return client
186
187 def _encryption_headers(self):
188 """Return any encryption headers needed to fetch the object.
189
190 .. note::
191 Defined here because :meth:`reload` calls it, but this method is
192 really only relevant for :class:`~google.cloud.storage.blob.Blob`.
193
194 :rtype: dict
195 :returns: a mapping of encryption-related headers.
196 """
197 return {}
198
199 @property
200 def _query_params(self):
201 """Default query parameters."""
202 params = {}
203 if self.user_project is not None:
204 params["userProject"] = self.user_project
205 return params
206
207 def reload(
208 self,
209 client=None,
210 projection="noAcl",
211 if_etag_match=None,
212 if_etag_not_match=None,
213 if_generation_match=None,
214 if_generation_not_match=None,
215 if_metageneration_match=None,
216 if_metageneration_not_match=None,
217 timeout=_DEFAULT_TIMEOUT,
218 retry=DEFAULT_RETRY,
219 soft_deleted=None,
220 ):
221 """Reload properties from Cloud Storage.
222
223 If :attr:`user_project` is set, bills the API request to that project.
224
225 :type client: :class:`~google.cloud.storage.client.Client` or
226 ``NoneType``
227 :param client: the client to use. If not passed, falls back to the
228 ``client`` stored on the current object.
229
230 :type projection: str
231 :param projection: (Optional) If used, must be 'full' or 'noAcl'.
232 Defaults to ``'noAcl'``. Specifies the set of
233 properties to return.
234
235 :type if_etag_match: Union[str, Set[str]]
236 :param if_etag_match: (Optional) See :ref:`using-if-etag-match`
237
238 :type if_etag_not_match: Union[str, Set[str]])
239 :param if_etag_not_match: (Optional) See :ref:`using-if-etag-not-match`
240
241 :type if_generation_match: long
242 :param if_generation_match:
243 (Optional) See :ref:`using-if-generation-match`
244
245 :type if_generation_not_match: long
246 :param if_generation_not_match:
247 (Optional) See :ref:`using-if-generation-not-match`
248
249 :type if_metageneration_match: long
250 :param if_metageneration_match:
251 (Optional) See :ref:`using-if-metageneration-match`
252
253 :type if_metageneration_not_match: long
254 :param if_metageneration_not_match:
255 (Optional) See :ref:`using-if-metageneration-not-match`
256
257 :type timeout: float or tuple
258 :param timeout:
259 (Optional) The amount of time, in seconds, to wait
260 for the server response. See: :ref:`configuring_timeouts`
261
262 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
263 :param retry:
264 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
265
266 :type soft_deleted: bool
267 :param soft_deleted:
268 (Optional) If True, looks for a soft-deleted object. Will only return
269 the object metadata if the object exists and is in a soft-deleted state.
270 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True.
271 See: https://cloud.google.com/storage/docs/soft-delete
272 """
273 client = self._require_client(client)
274 query_params = self._query_params
275 # Pass only '?projection=noAcl' here because 'acl' and related
276 # are handled via custom endpoints.
277 query_params["projection"] = projection
278 _add_generation_match_parameters(
279 query_params,
280 if_generation_match=if_generation_match,
281 if_generation_not_match=if_generation_not_match,
282 if_metageneration_match=if_metageneration_match,
283 if_metageneration_not_match=if_metageneration_not_match,
284 )
285 if soft_deleted is not None:
286 query_params["softDeleted"] = soft_deleted
287 # Soft delete reload requires a generation, even for targets
288 # that don't include them in default query params (buckets).
289 query_params["generation"] = self.generation
290 headers = self._encryption_headers()
291 _add_etag_match_headers(
292 headers, if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match
293 )
294 api_response = client._get_resource(
295 self.path,
296 query_params=query_params,
297 headers=headers,
298 timeout=timeout,
299 retry=retry,
300 _target_object=self,
301 )
302 self._set_properties(api_response)
303
304 def _patch_property(self, name, value):
305 """Update field of this object's properties.
306
307 This method will only update the field provided and will not
308 touch the other fields.
309
310 It **will not** reload the properties from the server. The behavior is
311 local only and syncing occurs via :meth:`patch`.
312
313 :type name: str
314 :param name: The field name to update.
315
316 :type value: object
317 :param value: The value being updated.
318 """
319 self._changes.add(name)
320 self._properties[name] = value
321
322 def _set_properties(self, value):
323 """Set the properties for the current object.
324
325 :type value: dict or :class:`google.cloud.storage.batch._FutureDict`
326 :param value: The properties to be set.
327 """
328 self._properties = value
329 # If the values are reset, the changes must as well.
330 self._changes = set()
331
332 def patch(
333 self,
334 client=None,
335 if_generation_match=None,
336 if_generation_not_match=None,
337 if_metageneration_match=None,
338 if_metageneration_not_match=None,
339 timeout=_DEFAULT_TIMEOUT,
340 retry=DEFAULT_RETRY,
341 override_unlocked_retention=False,
342 ):
343 """Sends all changed properties in a PATCH request.
344
345 Updates the ``_properties`` with the response from the backend.
346
347 If :attr:`user_project` is set, bills the API request to that project.
348
349 :type client: :class:`~google.cloud.storage.client.Client` or
350 ``NoneType``
351 :param client: the client to use. If not passed, falls back to the
352 ``client`` stored on the current object.
353
354 :type if_generation_match: long
355 :param if_generation_match:
356 (Optional) See :ref:`using-if-generation-match`
357
358 :type if_generation_not_match: long
359 :param if_generation_not_match:
360 (Optional) See :ref:`using-if-generation-not-match`
361
362 :type if_metageneration_match: long
363 :param if_metageneration_match:
364 (Optional) See :ref:`using-if-metageneration-match`
365
366 :type if_metageneration_not_match: long
367 :param if_metageneration_not_match:
368 (Optional) See :ref:`using-if-metageneration-not-match`
369
370 :type timeout: float or tuple
371 :param timeout:
372 (Optional) The amount of time, in seconds, to wait
373 for the server response. See: :ref:`configuring_timeouts`
374
375 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
376 :param retry:
377 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
378
379 :type override_unlocked_retention: bool
380 :param override_unlocked_retention:
381 (Optional) override_unlocked_retention must be set to True if the operation includes
382 a retention property that changes the mode from Unlocked to Locked, reduces the
383 retainUntilTime, or removes the retention configuration from the object. See:
384 https://cloud.google.com/storage/docs/json_api/v1/objects/patch
385 """
386 client = self._require_client(client)
387 query_params = self._query_params
388 # Pass '?projection=full' here because 'PATCH' documented not
389 # to work properly w/ 'noAcl'.
390 query_params["projection"] = "full"
391 if override_unlocked_retention:
392 query_params["overrideUnlockedRetention"] = override_unlocked_retention
393 _add_generation_match_parameters(
394 query_params,
395 if_generation_match=if_generation_match,
396 if_generation_not_match=if_generation_not_match,
397 if_metageneration_match=if_metageneration_match,
398 if_metageneration_not_match=if_metageneration_not_match,
399 )
400 update_properties = {key: self._properties[key] for key in self._changes}
401
402 # Make the API call.
403 api_response = client._patch_resource(
404 self.path,
405 update_properties,
406 query_params=query_params,
407 _target_object=self,
408 timeout=timeout,
409 retry=retry,
410 )
411 self._set_properties(api_response)
412
413 def update(
414 self,
415 client=None,
416 if_generation_match=None,
417 if_generation_not_match=None,
418 if_metageneration_match=None,
419 if_metageneration_not_match=None,
420 timeout=_DEFAULT_TIMEOUT,
421 retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED,
422 override_unlocked_retention=False,
423 ):
424 """Sends all properties in a PUT request.
425
426 Updates the ``_properties`` with the response from the backend.
427
428 If :attr:`user_project` is set, bills the API request to that project.
429
430 :type client: :class:`~google.cloud.storage.client.Client` or
431 ``NoneType``
432 :param client: the client to use. If not passed, falls back to the
433 ``client`` stored on the current object.
434
435 :type if_generation_match: long
436 :param if_generation_match:
437 (Optional) See :ref:`using-if-generation-match`
438
439 :type if_generation_not_match: long
440 :param if_generation_not_match:
441 (Optional) See :ref:`using-if-generation-not-match`
442
443 :type if_metageneration_match: long
444 :param if_metageneration_match:
445 (Optional) See :ref:`using-if-metageneration-match`
446
447 :type if_metageneration_not_match: long
448 :param if_metageneration_not_match:
449 (Optional) See :ref:`using-if-metageneration-not-match`
450
451 :type timeout: float or tuple
452 :param timeout:
453 (Optional) The amount of time, in seconds, to wait
454 for the server response. See: :ref:`configuring_timeouts`
455
456 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
457 :param retry:
458 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
459
460 :type override_unlocked_retention: bool
461 :param override_unlocked_retention:
462 (Optional) override_unlocked_retention must be set to True if the operation includes
463 a retention property that changes the mode from Unlocked to Locked, reduces the
464 retainUntilTime, or removes the retention configuration from the object. See:
465 https://cloud.google.com/storage/docs/json_api/v1/objects/patch
466 """
467 client = self._require_client(client)
468
469 query_params = self._query_params
470 query_params["projection"] = "full"
471 if override_unlocked_retention:
472 query_params["overrideUnlockedRetention"] = override_unlocked_retention
473 _add_generation_match_parameters(
474 query_params,
475 if_generation_match=if_generation_match,
476 if_generation_not_match=if_generation_not_match,
477 if_metageneration_match=if_metageneration_match,
478 if_metageneration_not_match=if_metageneration_not_match,
479 )
480
481 api_response = client._put_resource(
482 self.path,
483 self._properties,
484 query_params=query_params,
485 timeout=timeout,
486 retry=retry,
487 _target_object=self,
488 )
489 self._set_properties(api_response)
490
491
492def _scalar_property(fieldname):
493 """Create a property descriptor around the :class:`_PropertyMixin` helpers."""
494
495 def _getter(self):
496 """Scalar property getter."""
497 return self._properties.get(fieldname)
498
499 def _setter(self, value):
500 """Scalar property setter."""
501 self._patch_property(fieldname, value)
502
503 return property(_getter, _setter)
504
505
506def _write_buffer_to_hash(buffer_object, hash_obj, digest_block_size=8192):
507 """Read blocks from a buffer and update a hash with them.
508
509 :type buffer_object: bytes buffer
510 :param buffer_object: Buffer containing bytes used to update a hash object.
511
512 :type hash_obj: object that implements update
513 :param hash_obj: A hash object (MD5 or CRC32-C).
514
515 :type digest_block_size: int
516 :param digest_block_size: The block size to write to the hash.
517 Defaults to 8192.
518 """
519 block = buffer_object.read(digest_block_size)
520
521 while len(block) > 0:
522 hash_obj.update(block)
523 # Update the block for the next iteration.
524 block = buffer_object.read(digest_block_size)
525
526
527def _base64_md5hash(buffer_object):
528 """Get MD5 hash of bytes (as base64).
529
530 :type buffer_object: bytes buffer
531 :param buffer_object: Buffer containing bytes used to compute an MD5
532 hash (as base64).
533
534 :rtype: str
535 :returns: A base64 encoded digest of the MD5 hash.
536 """
537 if sys.version_info >= (3, 9):
538 hash_obj = md5(usedforsecurity=False)
539 else:
540 hash_obj = md5()
541 _write_buffer_to_hash(buffer_object, hash_obj)
542 digest_bytes = hash_obj.digest()
543 return base64.b64encode(digest_bytes)
544
545
546def _add_etag_match_headers(headers, **match_parameters):
547 """Add generation match parameters into the given parameters list.
548
549 :type headers: dict
550 :param headers: Headers dict.
551
552 :type match_parameters: dict
553 :param match_parameters: if*etag*match parameters to add.
554 """
555 for snakecase_name, header_name in _ETAG_MATCH_PARAMETERS:
556 value = match_parameters.get(snakecase_name)
557
558 if value is not None:
559 if isinstance(value, str):
560 value = [value]
561 headers[header_name] = ", ".join(value)
562
563
564def _add_generation_match_parameters(parameters, **match_parameters):
565 """Add generation match parameters into the given parameters list.
566
567 :type parameters: list or dict
568 :param parameters: Parameters list or dict.
569
570 :type match_parameters: dict
571 :param match_parameters: if*generation*match parameters to add.
572
573 :raises: :exc:`ValueError` if ``parameters`` is not a ``list()``
574 or a ``dict()``.
575 """
576 for snakecase_name, camelcase_name in _GENERATION_MATCH_PARAMETERS:
577 value = match_parameters.get(snakecase_name)
578
579 if value is not None:
580 if isinstance(parameters, list):
581 parameters.append((camelcase_name, value))
582
583 elif isinstance(parameters, dict):
584 parameters[camelcase_name] = value
585
586 else:
587 raise ValueError(
588 "`parameters` argument should be a dict() or a list()."
589 )
590
591
592def _raise_if_more_than_one_set(**kwargs):
593 """Raise ``ValueError`` exception if more than one parameter was set.
594
595 :type error: :exc:`ValueError`
596 :param error: Description of which fields were set
597
598 :raises: :class:`~ValueError` containing the fields that were set
599 """
600 if sum(arg is not None for arg in kwargs.values()) > 1:
601 escaped_keys = [f"'{name}'" for name in kwargs.keys()]
602
603 keys_but_last = ", ".join(escaped_keys[:-1])
604 last_key = escaped_keys[-1]
605
606 msg = f"Pass at most one of {keys_but_last} and {last_key}"
607
608 raise ValueError(msg)
609
610
611def _bucket_bound_hostname_url(host, scheme=None):
612 """Helper to build bucket bound hostname URL.
613
614 :type host: str
615 :param host: Host name.
616
617 :type scheme: str
618 :param scheme: (Optional) Web scheme. If passed, use it
619 as a scheme in the result URL.
620
621 :rtype: str
622 :returns: A bucket bound hostname URL.
623 """
624 url_parts = urlsplit(host)
625 if url_parts.scheme and url_parts.netloc:
626 return host
627
628 return f"{scheme}://{host}"
629
630
631def _get_invocation_id():
632 return "gccl-invocation-id/" + str(uuid4())
633
634
635def _get_default_headers(
636 user_agent,
637 content_type="application/json; charset=UTF-8",
638 x_upload_content_type=None,
639 command=None,
640):
641 """Get the headers for a request.
642
643 :type user_agent: str
644 :param user_agent: The user-agent for requests.
645
646 :type command: str
647 :param command:
648 (Optional) Information about which interface for the operation was
649 used, to be included in the X-Goog-API-Client header. Please leave
650 as None unless otherwise directed.
651
652 :rtype: dict
653 :returns: The headers to be used for the request.
654 """
655 x_goog_api_client = f"{user_agent} {_get_invocation_id()}"
656
657 if command:
658 x_goog_api_client += f" gccl-gcs-cmd/{command}"
659
660 return {
661 "Accept": "application/json",
662 "Accept-Encoding": "gzip, deflate",
663 "User-Agent": user_agent,
664 "X-Goog-API-Client": x_goog_api_client,
665 "content-type": content_type,
666 "x-upload-content-type": x_upload_content_type or content_type,
667 }
668
669
670def generate_random_56_bit_integer():
671 """Generates a secure 56 bit random integer.
672
673
674 If 64 bit int is used, sometimes the random int generated is greater than
675 max positive value of signed 64 bit int which is 2^63 -1 causing overflow
676 issues.
677
678 :rtype: int
679 :returns: A secure random 56 bit integer.
680 """
681 # 7 bytes * 8 bits/byte = 56 bits
682 random_bytes = secrets.token_bytes(7)
683 # Convert bytes to an integer
684 return int.from_bytes(random_bytes, "big")