1# Copyright 2014 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Helper functions for Cloud Storage utility classes.
16
17These are *not* part of the API.
18"""
19
20import base64
21import datetime
22from hashlib import md5
23import os
24import sys
25import secrets
26from urllib.parse import urlsplit
27from urllib.parse import urlunsplit
28from uuid import uuid4
29
30from google.auth import environment_vars
31from google.cloud.storage.constants import _DEFAULT_TIMEOUT
32from google.cloud.storage.retry import DEFAULT_RETRY
33from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED
34
35
36STORAGE_EMULATOR_ENV_VAR = "STORAGE_EMULATOR_HOST" # Despite name, includes scheme.
37"""Environment variable defining host for Storage emulator."""
38
39_API_ENDPOINT_OVERRIDE_ENV_VAR = "API_ENDPOINT_OVERRIDE" # Includes scheme.
40"""This is an experimental configuration variable. Use api_endpoint instead."""
41
42_API_VERSION_OVERRIDE_ENV_VAR = "API_VERSION_OVERRIDE"
43"""This is an experimental configuration variable used for internal testing."""
44
45_DEFAULT_UNIVERSE_DOMAIN = "googleapis.com"
46
47_STORAGE_HOST_TEMPLATE = "storage.{universe_domain}"
48
49_TRUE_DEFAULT_STORAGE_HOST = _STORAGE_HOST_TEMPLATE.format(
50 universe_domain=_DEFAULT_UNIVERSE_DOMAIN
51)
52
53_DEFAULT_SCHEME = "https://"
54
55_API_VERSION = os.getenv(_API_VERSION_OVERRIDE_ENV_VAR, "v1")
56"""API version of the default storage host"""
57
58# etag match parameters in snake case and equivalent header
59_ETAG_MATCH_PARAMETERS = (
60 ("if_etag_match", "If-Match"),
61 ("if_etag_not_match", "If-None-Match"),
62)
63
64# generation match parameters in camel and snake cases
65_GENERATION_MATCH_PARAMETERS = (
66 ("if_generation_match", "ifGenerationMatch"),
67 ("if_generation_not_match", "ifGenerationNotMatch"),
68 ("if_metageneration_match", "ifMetagenerationMatch"),
69 ("if_metageneration_not_match", "ifMetagenerationNotMatch"),
70 ("if_source_generation_match", "ifSourceGenerationMatch"),
71 ("if_source_generation_not_match", "ifSourceGenerationNotMatch"),
72 ("if_source_metageneration_match", "ifSourceMetagenerationMatch"),
73 ("if_source_metageneration_not_match", "ifSourceMetagenerationNotMatch"),
74)
75
76# _NOW() returns the current local date and time.
77# It is preferred to use timezone-aware datetimes _NOW(_UTC),
78# which returns the current UTC date and time.
79_NOW = datetime.datetime.now
80_UTC = datetime.timezone.utc
81
82
83def _get_storage_emulator_override():
84 return os.environ.get(STORAGE_EMULATOR_ENV_VAR, None)
85
86
87def _get_default_storage_base_url():
88 return os.getenv(
89 _API_ENDPOINT_OVERRIDE_ENV_VAR, _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST
90 )
91
92
93def _get_api_endpoint_override():
94 """This is an experimental configuration variable. Use api_endpoint instead."""
95 if _get_default_storage_base_url() != _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST:
96 return _get_default_storage_base_url()
97 return None
98
99
100def _virtual_hosted_style_base_url(url, bucket, trailing_slash=False):
101 """Returns the scheme and netloc sections of the url, with the bucket
102 prepended to the netloc.
103
104 Not intended for use with netlocs which include a username and password.
105 """
106 parsed_url = urlsplit(url)
107 new_netloc = f"{bucket}.{parsed_url.netloc}"
108 base_url = urlunsplit(
109 (parsed_url.scheme, new_netloc, "/" if trailing_slash else "", "", "")
110 )
111 return base_url
112
113
114def _use_client_cert():
115 return os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true"
116
117
118def _get_environ_project():
119 return os.getenv(
120 environment_vars.PROJECT,
121 os.getenv(environment_vars.LEGACY_PROJECT),
122 )
123
124
125def _validate_name(name):
126 """Pre-flight ``Bucket`` name validation.
127
128 :type name: str or :data:`NoneType`
129 :param name: Proposed bucket name.
130
131 :rtype: str or :data:`NoneType`
132 :returns: ``name`` if valid.
133 """
134 if name is None:
135 return
136
137 # The first and last characters must be alphanumeric.
138 if not all([name[0].isalnum(), name[-1].isalnum()]):
139 raise ValueError("Bucket names must start and end with a number or letter.")
140 return name
141
142
143class _PropertyMixin(object):
144 """Abstract mixin for cloud storage classes with associated properties.
145
146 Non-abstract subclasses should implement:
147 - path
148 - client
149 - user_project
150
151 :type name: str
152 :param name: The name of the object. Bucket names must start and end with a
153 number or letter.
154 """
155
156 def __init__(self, name=None):
157 self.name = name
158 self._properties = {}
159 self._changes = set()
160
161 @property
162 def path(self):
163 """Abstract getter for the object path."""
164 raise NotImplementedError
165
166 @property
167 def client(self):
168 """Abstract getter for the object client."""
169 raise NotImplementedError
170
171 @property
172 def user_project(self):
173 """Abstract getter for the object user_project."""
174 raise NotImplementedError
175
176 def _require_client(self, client):
177 """Check client or verify over-ride.
178
179 :type client: :class:`~google.cloud.storage.client.Client` or
180 ``NoneType``
181 :param client: the client to use. If not passed, falls back to the
182 ``client`` stored on the current object.
183
184 :rtype: :class:`google.cloud.storage.client.Client`
185 :returns: The client passed in or the currently bound client.
186 """
187 if client is None:
188 client = self.client
189 return client
190
191 def _encryption_headers(self):
192 """Return any encryption headers needed to fetch the object.
193
194 .. note::
195 Defined here because :meth:`reload` calls it, but this method is
196 really only relevant for :class:`~google.cloud.storage.blob.Blob`.
197
198 :rtype: dict
199 :returns: a mapping of encryption-related headers.
200 """
201 return {}
202
203 @property
204 def _query_params(self):
205 """Default query parameters."""
206 params = {}
207 if self.user_project is not None:
208 params["userProject"] = self.user_project
209 return params
210
211 def reload(
212 self,
213 client=None,
214 projection="noAcl",
215 if_etag_match=None,
216 if_etag_not_match=None,
217 if_generation_match=None,
218 if_generation_not_match=None,
219 if_metageneration_match=None,
220 if_metageneration_not_match=None,
221 timeout=_DEFAULT_TIMEOUT,
222 retry=DEFAULT_RETRY,
223 soft_deleted=None,
224 ):
225 """Reload properties from Cloud Storage.
226
227 If :attr:`user_project` is set, bills the API request to that project.
228
229 :type client: :class:`~google.cloud.storage.client.Client` or
230 ``NoneType``
231 :param client: the client to use. If not passed, falls back to the
232 ``client`` stored on the current object.
233
234 :type projection: str
235 :param projection: (Optional) If used, must be 'full' or 'noAcl'.
236 Defaults to ``'noAcl'``. Specifies the set of
237 properties to return.
238
239 :type if_etag_match: Union[str, Set[str]]
240 :param if_etag_match: (Optional) See :ref:`using-if-etag-match`
241
242 :type if_etag_not_match: Union[str, Set[str]])
243 :param if_etag_not_match: (Optional) See :ref:`using-if-etag-not-match`
244
245 :type if_generation_match: long
246 :param if_generation_match:
247 (Optional) See :ref:`using-if-generation-match`
248
249 :type if_generation_not_match: long
250 :param if_generation_not_match:
251 (Optional) See :ref:`using-if-generation-not-match`
252
253 :type if_metageneration_match: long
254 :param if_metageneration_match:
255 (Optional) See :ref:`using-if-metageneration-match`
256
257 :type if_metageneration_not_match: long
258 :param if_metageneration_not_match:
259 (Optional) See :ref:`using-if-metageneration-not-match`
260
261 :type timeout: float or tuple
262 :param timeout:
263 (Optional) The amount of time, in seconds, to wait
264 for the server response. See: :ref:`configuring_timeouts`
265
266 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
267 :param retry:
268 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
269
270 :type soft_deleted: bool
271 :param soft_deleted:
272 (Optional) If True, looks for a soft-deleted object. Will only return
273 the object metadata if the object exists and is in a soft-deleted state.
274 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True.
275 See: https://cloud.google.com/storage/docs/soft-delete
276 """
277 client = self._require_client(client)
278 query_params = self._query_params
279 # Pass only '?projection=noAcl' here because 'acl' and related
280 # are handled via custom endpoints.
281 query_params["projection"] = projection
282 _add_generation_match_parameters(
283 query_params,
284 if_generation_match=if_generation_match,
285 if_generation_not_match=if_generation_not_match,
286 if_metageneration_match=if_metageneration_match,
287 if_metageneration_not_match=if_metageneration_not_match,
288 )
289 if soft_deleted is not None:
290 query_params["softDeleted"] = soft_deleted
291 # Soft delete reload requires a generation, even for targets
292 # that don't include them in default query params (buckets).
293 query_params["generation"] = self.generation
294 headers = self._encryption_headers()
295 _add_etag_match_headers(
296 headers, if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match
297 )
298 api_response = client._get_resource(
299 self.path,
300 query_params=query_params,
301 headers=headers,
302 timeout=timeout,
303 retry=retry,
304 _target_object=self,
305 )
306 self._set_properties(api_response)
307
308 def _patch_property(self, name, value):
309 """Update field of this object's properties.
310
311 This method will only update the field provided and will not
312 touch the other fields.
313
314 It **will not** reload the properties from the server. The behavior is
315 local only and syncing occurs via :meth:`patch`.
316
317 :type name: str
318 :param name: The field name to update.
319
320 :type value: object
321 :param value: The value being updated.
322 """
323 self._changes.add(name)
324 self._properties[name] = value
325
326 def _set_properties(self, value):
327 """Set the properties for the current object.
328
329 :type value: dict or :class:`google.cloud.storage.batch._FutureDict`
330 :param value: The properties to be set.
331 """
332 self._properties = value
333 # If the values are reset, the changes must as well.
334 self._changes = set()
335
336 def patch(
337 self,
338 client=None,
339 if_generation_match=None,
340 if_generation_not_match=None,
341 if_metageneration_match=None,
342 if_metageneration_not_match=None,
343 timeout=_DEFAULT_TIMEOUT,
344 retry=DEFAULT_RETRY,
345 override_unlocked_retention=False,
346 ):
347 """Sends all changed properties in a PATCH request.
348
349 Updates the ``_properties`` with the response from the backend.
350
351 If :attr:`user_project` is set, bills the API request to that project.
352
353 :type client: :class:`~google.cloud.storage.client.Client` or
354 ``NoneType``
355 :param client: the client to use. If not passed, falls back to the
356 ``client`` stored on the current object.
357
358 :type if_generation_match: long
359 :param if_generation_match:
360 (Optional) See :ref:`using-if-generation-match`
361
362 :type if_generation_not_match: long
363 :param if_generation_not_match:
364 (Optional) See :ref:`using-if-generation-not-match`
365
366 :type if_metageneration_match: long
367 :param if_metageneration_match:
368 (Optional) See :ref:`using-if-metageneration-match`
369
370 :type if_metageneration_not_match: long
371 :param if_metageneration_not_match:
372 (Optional) See :ref:`using-if-metageneration-not-match`
373
374 :type timeout: float or tuple
375 :param timeout:
376 (Optional) The amount of time, in seconds, to wait
377 for the server response. See: :ref:`configuring_timeouts`
378
379 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
380 :param retry:
381 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
382
383 :type override_unlocked_retention: bool
384 :param override_unlocked_retention:
385 (Optional) override_unlocked_retention must be set to True if the operation includes
386 a retention property that changes the mode from Unlocked to Locked, reduces the
387 retainUntilTime, or removes the retention configuration from the object. See:
388 https://cloud.google.com/storage/docs/json_api/v1/objects/patch
389 """
390 client = self._require_client(client)
391 query_params = self._query_params
392 # Pass '?projection=full' here because 'PATCH' documented not
393 # to work properly w/ 'noAcl'.
394 query_params["projection"] = "full"
395 if override_unlocked_retention:
396 query_params["overrideUnlockedRetention"] = override_unlocked_retention
397 _add_generation_match_parameters(
398 query_params,
399 if_generation_match=if_generation_match,
400 if_generation_not_match=if_generation_not_match,
401 if_metageneration_match=if_metageneration_match,
402 if_metageneration_not_match=if_metageneration_not_match,
403 )
404 update_properties = {key: self._properties[key] for key in self._changes}
405
406 # Make the API call.
407 api_response = client._patch_resource(
408 self.path,
409 update_properties,
410 query_params=query_params,
411 _target_object=self,
412 timeout=timeout,
413 retry=retry,
414 )
415 self._set_properties(api_response)
416
417 def update(
418 self,
419 client=None,
420 if_generation_match=None,
421 if_generation_not_match=None,
422 if_metageneration_match=None,
423 if_metageneration_not_match=None,
424 timeout=_DEFAULT_TIMEOUT,
425 retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED,
426 override_unlocked_retention=False,
427 ):
428 """Sends all properties in a PUT request.
429
430 Updates the ``_properties`` with the response from the backend.
431
432 If :attr:`user_project` is set, bills the API request to that project.
433
434 :type client: :class:`~google.cloud.storage.client.Client` or
435 ``NoneType``
436 :param client: the client to use. If not passed, falls back to the
437 ``client`` stored on the current object.
438
439 :type if_generation_match: long
440 :param if_generation_match:
441 (Optional) See :ref:`using-if-generation-match`
442
443 :type if_generation_not_match: long
444 :param if_generation_not_match:
445 (Optional) See :ref:`using-if-generation-not-match`
446
447 :type if_metageneration_match: long
448 :param if_metageneration_match:
449 (Optional) See :ref:`using-if-metageneration-match`
450
451 :type if_metageneration_not_match: long
452 :param if_metageneration_not_match:
453 (Optional) See :ref:`using-if-metageneration-not-match`
454
455 :type timeout: float or tuple
456 :param timeout:
457 (Optional) The amount of time, in seconds, to wait
458 for the server response. See: :ref:`configuring_timeouts`
459
460 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
461 :param retry:
462 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
463
464 :type override_unlocked_retention: bool
465 :param override_unlocked_retention:
466 (Optional) override_unlocked_retention must be set to True if the operation includes
467 a retention property that changes the mode from Unlocked to Locked, reduces the
468 retainUntilTime, or removes the retention configuration from the object. See:
469 https://cloud.google.com/storage/docs/json_api/v1/objects/patch
470 """
471 client = self._require_client(client)
472
473 query_params = self._query_params
474 query_params["projection"] = "full"
475 if override_unlocked_retention:
476 query_params["overrideUnlockedRetention"] = override_unlocked_retention
477 _add_generation_match_parameters(
478 query_params,
479 if_generation_match=if_generation_match,
480 if_generation_not_match=if_generation_not_match,
481 if_metageneration_match=if_metageneration_match,
482 if_metageneration_not_match=if_metageneration_not_match,
483 )
484
485 api_response = client._put_resource(
486 self.path,
487 self._properties,
488 query_params=query_params,
489 timeout=timeout,
490 retry=retry,
491 _target_object=self,
492 )
493 self._set_properties(api_response)
494
495
496def _scalar_property(fieldname):
497 """Create a property descriptor around the :class:`_PropertyMixin` helpers."""
498
499 def _getter(self):
500 """Scalar property getter."""
501 return self._properties.get(fieldname)
502
503 def _setter(self, value):
504 """Scalar property setter."""
505 self._patch_property(fieldname, value)
506
507 return property(_getter, _setter)
508
509
510def _write_buffer_to_hash(buffer_object, hash_obj, digest_block_size=8192):
511 """Read blocks from a buffer and update a hash with them.
512
513 :type buffer_object: bytes buffer
514 :param buffer_object: Buffer containing bytes used to update a hash object.
515
516 :type hash_obj: object that implements update
517 :param hash_obj: A hash object (MD5 or CRC32-C).
518
519 :type digest_block_size: int
520 :param digest_block_size: The block size to write to the hash.
521 Defaults to 8192.
522 """
523 block = buffer_object.read(digest_block_size)
524
525 while len(block) > 0:
526 hash_obj.update(block)
527 # Update the block for the next iteration.
528 block = buffer_object.read(digest_block_size)
529
530
531def _base64_md5hash(buffer_object):
532 """Get MD5 hash of bytes (as base64).
533
534 :type buffer_object: bytes buffer
535 :param buffer_object: Buffer containing bytes used to compute an MD5
536 hash (as base64).
537
538 :rtype: str
539 :returns: A base64 encoded digest of the MD5 hash.
540 """
541 if sys.version_info >= (3, 9):
542 hash_obj = md5(usedforsecurity=False)
543 else:
544 hash_obj = md5()
545 _write_buffer_to_hash(buffer_object, hash_obj)
546 digest_bytes = hash_obj.digest()
547 return base64.b64encode(digest_bytes)
548
549
550def _add_etag_match_headers(headers, **match_parameters):
551 """Add generation match parameters into the given parameters list.
552
553 :type headers: dict
554 :param headers: Headers dict.
555
556 :type match_parameters: dict
557 :param match_parameters: if*etag*match parameters to add.
558 """
559 for snakecase_name, header_name in _ETAG_MATCH_PARAMETERS:
560 value = match_parameters.get(snakecase_name)
561
562 if value is not None:
563 if isinstance(value, str):
564 value = [value]
565 headers[header_name] = ", ".join(value)
566
567
568def _add_generation_match_parameters(parameters, **match_parameters):
569 """Add generation match parameters into the given parameters list.
570
571 :type parameters: list or dict
572 :param parameters: Parameters list or dict.
573
574 :type match_parameters: dict
575 :param match_parameters: if*generation*match parameters to add.
576
577 :raises: :exc:`ValueError` if ``parameters`` is not a ``list()``
578 or a ``dict()``.
579 """
580 for snakecase_name, camelcase_name in _GENERATION_MATCH_PARAMETERS:
581 value = match_parameters.get(snakecase_name)
582
583 if value is not None:
584 if isinstance(parameters, list):
585 parameters.append((camelcase_name, value))
586
587 elif isinstance(parameters, dict):
588 parameters[camelcase_name] = value
589
590 else:
591 raise ValueError(
592 "`parameters` argument should be a dict() or a list()."
593 )
594
595
596def _raise_if_more_than_one_set(**kwargs):
597 """Raise ``ValueError`` exception if more than one parameter was set.
598
599 :type error: :exc:`ValueError`
600 :param error: Description of which fields were set
601
602 :raises: :class:`~ValueError` containing the fields that were set
603 """
604 if sum(arg is not None for arg in kwargs.values()) > 1:
605 escaped_keys = [f"'{name}'" for name in kwargs.keys()]
606
607 keys_but_last = ", ".join(escaped_keys[:-1])
608 last_key = escaped_keys[-1]
609
610 msg = f"Pass at most one of {keys_but_last} and {last_key}"
611
612 raise ValueError(msg)
613
614
615def _bucket_bound_hostname_url(host, scheme=None):
616 """Helper to build bucket bound hostname URL.
617
618 :type host: str
619 :param host: Host name.
620
621 :type scheme: str
622 :param scheme: (Optional) Web scheme. If passed, use it
623 as a scheme in the result URL.
624
625 :rtype: str
626 :returns: A bucket bound hostname URL.
627 """
628 url_parts = urlsplit(host)
629 if url_parts.scheme and url_parts.netloc:
630 return host
631
632 return f"{scheme}://{host}"
633
634
635def _get_invocation_id():
636 return "gccl-invocation-id/" + str(uuid4())
637
638
639def _get_default_headers(
640 user_agent,
641 content_type="application/json; charset=UTF-8",
642 x_upload_content_type=None,
643 command=None,
644):
645 """Get the headers for a request.
646
647 :type user_agent: str
648 :param user_agent: The user-agent for requests.
649
650 :type command: str
651 :param command:
652 (Optional) Information about which interface for the operation was
653 used, to be included in the X-Goog-API-Client header. Please leave
654 as None unless otherwise directed.
655
656 :rtype: dict
657 :returns: The headers to be used for the request.
658 """
659 x_goog_api_client = f"{user_agent} {_get_invocation_id()}"
660
661 if command:
662 x_goog_api_client += f" gccl-gcs-cmd/{command}"
663
664 return {
665 "Accept": "application/json",
666 "Accept-Encoding": "gzip, deflate",
667 "User-Agent": user_agent,
668 "X-Goog-API-Client": x_goog_api_client,
669 "content-type": content_type,
670 "x-upload-content-type": x_upload_content_type or content_type,
671 }
672
673
674def generate_random_56_bit_integer():
675 """Generates a secure 56 bit random integer.
676
677
678 If 64 bit int is used, sometimes the random int generated is greater than
679 max positive value of signed 64 bit int which is 2^63 -1 causing overflow
680 issues.
681
682 :rtype: int
683 :returns: A secure random 56 bit integer.
684 """
685 # 7 bytes * 8 bits/byte = 56 bits
686 random_bytes = secrets.token_bytes(7)
687 # Convert bytes to an integer
688 return int.from_bytes(random_bytes, "big")