1# Copyright 2014 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Helper functions for Cloud Storage utility classes.
16
17These are *not* part of the API.
18"""
19
20import base64
21import datetime
22from hashlib import md5
23import os
24from urllib.parse import urlsplit
25from urllib.parse import urlunsplit
26from uuid import uuid4
27
28from google.auth import environment_vars
29from google.cloud.storage.constants import _DEFAULT_TIMEOUT
30from google.cloud.storage.retry import DEFAULT_RETRY
31from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED
32
33
34STORAGE_EMULATOR_ENV_VAR = "STORAGE_EMULATOR_HOST" # Despite name, includes scheme.
35"""Environment variable defining host for Storage emulator."""
36
37_API_ENDPOINT_OVERRIDE_ENV_VAR = "API_ENDPOINT_OVERRIDE" # Includes scheme.
38"""This is an experimental configuration variable. Use api_endpoint instead."""
39
40_API_VERSION_OVERRIDE_ENV_VAR = "API_VERSION_OVERRIDE"
41"""This is an experimental configuration variable used for internal testing."""
42
43_DEFAULT_UNIVERSE_DOMAIN = "googleapis.com"
44
45_STORAGE_HOST_TEMPLATE = "storage.{universe_domain}"
46
47_TRUE_DEFAULT_STORAGE_HOST = _STORAGE_HOST_TEMPLATE.format(
48 universe_domain=_DEFAULT_UNIVERSE_DOMAIN
49)
50
51_DEFAULT_SCHEME = "https://"
52
53_API_VERSION = os.getenv(_API_VERSION_OVERRIDE_ENV_VAR, "v1")
54"""API version of the default storage host"""
55
56# etag match parameters in snake case and equivalent header
57_ETAG_MATCH_PARAMETERS = (
58 ("if_etag_match", "If-Match"),
59 ("if_etag_not_match", "If-None-Match"),
60)
61
62# generation match parameters in camel and snake cases
63_GENERATION_MATCH_PARAMETERS = (
64 ("if_generation_match", "ifGenerationMatch"),
65 ("if_generation_not_match", "ifGenerationNotMatch"),
66 ("if_metageneration_match", "ifMetagenerationMatch"),
67 ("if_metageneration_not_match", "ifMetagenerationNotMatch"),
68 ("if_source_generation_match", "ifSourceGenerationMatch"),
69 ("if_source_generation_not_match", "ifSourceGenerationNotMatch"),
70 ("if_source_metageneration_match", "ifSourceMetagenerationMatch"),
71 ("if_source_metageneration_not_match", "ifSourceMetagenerationNotMatch"),
72)
73
74# _NOW() returns the current local date and time.
75# It is preferred to use timezone-aware datetimes _NOW(_UTC),
76# which returns the current UTC date and time.
77_NOW = datetime.datetime.now
78_UTC = datetime.timezone.utc
79
80
81def _get_storage_emulator_override():
82 return os.environ.get(STORAGE_EMULATOR_ENV_VAR, None)
83
84
85def _get_default_storage_base_url():
86 return os.getenv(
87 _API_ENDPOINT_OVERRIDE_ENV_VAR, _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST
88 )
89
90
91def _get_api_endpoint_override():
92 """This is an experimental configuration variable. Use api_endpoint instead."""
93 if _get_default_storage_base_url() != _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST:
94 return _get_default_storage_base_url()
95 return None
96
97
98def _virtual_hosted_style_base_url(url, bucket, trailing_slash=False):
99 """Returns the scheme and netloc sections of the url, with the bucket
100 prepended to the netloc.
101
102 Not intended for use with netlocs which include a username and password.
103 """
104 parsed_url = urlsplit(url)
105 new_netloc = f"{bucket}.{parsed_url.netloc}"
106 base_url = urlunsplit(
107 (parsed_url.scheme, new_netloc, "/" if trailing_slash else "", "", "")
108 )
109 return base_url
110
111
112def _use_client_cert():
113 return os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true"
114
115
116def _get_environ_project():
117 return os.getenv(
118 environment_vars.PROJECT,
119 os.getenv(environment_vars.LEGACY_PROJECT),
120 )
121
122
123def _validate_name(name):
124 """Pre-flight ``Bucket`` name validation.
125
126 :type name: str or :data:`NoneType`
127 :param name: Proposed bucket name.
128
129 :rtype: str or :data:`NoneType`
130 :returns: ``name`` if valid.
131 """
132 if name is None:
133 return
134
135 # The first and last characters must be alphanumeric.
136 if not all([name[0].isalnum(), name[-1].isalnum()]):
137 raise ValueError("Bucket names must start and end with a number or letter.")
138 return name
139
140
141class _PropertyMixin(object):
142 """Abstract mixin for cloud storage classes with associated properties.
143
144 Non-abstract subclasses should implement:
145 - path
146 - client
147 - user_project
148
149 :type name: str
150 :param name: The name of the object. Bucket names must start and end with a
151 number or letter.
152 """
153
154 def __init__(self, name=None):
155 self.name = name
156 self._properties = {}
157 self._changes = set()
158
159 @property
160 def path(self):
161 """Abstract getter for the object path."""
162 raise NotImplementedError
163
164 @property
165 def client(self):
166 """Abstract getter for the object client."""
167 raise NotImplementedError
168
169 @property
170 def user_project(self):
171 """Abstract getter for the object user_project."""
172 raise NotImplementedError
173
174 def _require_client(self, client):
175 """Check client or verify over-ride.
176
177 :type client: :class:`~google.cloud.storage.client.Client` or
178 ``NoneType``
179 :param client: the client to use. If not passed, falls back to the
180 ``client`` stored on the current object.
181
182 :rtype: :class:`google.cloud.storage.client.Client`
183 :returns: The client passed in or the currently bound client.
184 """
185 if client is None:
186 client = self.client
187 return client
188
189 def _encryption_headers(self):
190 """Return any encryption headers needed to fetch the object.
191
192 .. note::
193 Defined here because :meth:`reload` calls it, but this method is
194 really only relevant for :class:`~google.cloud.storage.blob.Blob`.
195
196 :rtype: dict
197 :returns: a mapping of encryption-related headers.
198 """
199 return {}
200
201 @property
202 def _query_params(self):
203 """Default query parameters."""
204 params = {}
205 if self.user_project is not None:
206 params["userProject"] = self.user_project
207 return params
208
209 def reload(
210 self,
211 client=None,
212 projection="noAcl",
213 if_etag_match=None,
214 if_etag_not_match=None,
215 if_generation_match=None,
216 if_generation_not_match=None,
217 if_metageneration_match=None,
218 if_metageneration_not_match=None,
219 timeout=_DEFAULT_TIMEOUT,
220 retry=DEFAULT_RETRY,
221 soft_deleted=None,
222 ):
223 """Reload properties from Cloud Storage.
224
225 If :attr:`user_project` is set, bills the API request to that project.
226
227 :type client: :class:`~google.cloud.storage.client.Client` or
228 ``NoneType``
229 :param client: the client to use. If not passed, falls back to the
230 ``client`` stored on the current object.
231
232 :type projection: str
233 :param projection: (Optional) If used, must be 'full' or 'noAcl'.
234 Defaults to ``'noAcl'``. Specifies the set of
235 properties to return.
236
237 :type if_etag_match: Union[str, Set[str]]
238 :param if_etag_match: (Optional) See :ref:`using-if-etag-match`
239
240 :type if_etag_not_match: Union[str, Set[str]])
241 :param if_etag_not_match: (Optional) See :ref:`using-if-etag-not-match`
242
243 :type if_generation_match: long
244 :param if_generation_match:
245 (Optional) See :ref:`using-if-generation-match`
246
247 :type if_generation_not_match: long
248 :param if_generation_not_match:
249 (Optional) See :ref:`using-if-generation-not-match`
250
251 :type if_metageneration_match: long
252 :param if_metageneration_match:
253 (Optional) See :ref:`using-if-metageneration-match`
254
255 :type if_metageneration_not_match: long
256 :param if_metageneration_not_match:
257 (Optional) See :ref:`using-if-metageneration-not-match`
258
259 :type timeout: float or tuple
260 :param timeout:
261 (Optional) The amount of time, in seconds, to wait
262 for the server response. See: :ref:`configuring_timeouts`
263
264 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
265 :param retry:
266 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
267
268 :type soft_deleted: bool
269 :param soft_deleted:
270 (Optional) If True, looks for a soft-deleted object. Will only return
271 the object metadata if the object exists and is in a soft-deleted state.
272 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True.
273 See: https://cloud.google.com/storage/docs/soft-delete
274 """
275 client = self._require_client(client)
276 query_params = self._query_params
277 # Pass only '?projection=noAcl' here because 'acl' and related
278 # are handled via custom endpoints.
279 query_params["projection"] = projection
280 _add_generation_match_parameters(
281 query_params,
282 if_generation_match=if_generation_match,
283 if_generation_not_match=if_generation_not_match,
284 if_metageneration_match=if_metageneration_match,
285 if_metageneration_not_match=if_metageneration_not_match,
286 )
287 if soft_deleted is not None:
288 query_params["softDeleted"] = soft_deleted
289 # Soft delete reload requires a generation, even for targets
290 # that don't include them in default query params (buckets).
291 query_params["generation"] = self.generation
292 headers = self._encryption_headers()
293 _add_etag_match_headers(
294 headers, if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match
295 )
296 api_response = client._get_resource(
297 self.path,
298 query_params=query_params,
299 headers=headers,
300 timeout=timeout,
301 retry=retry,
302 _target_object=self,
303 )
304 self._set_properties(api_response)
305
306 def _patch_property(self, name, value):
307 """Update field of this object's properties.
308
309 This method will only update the field provided and will not
310 touch the other fields.
311
312 It **will not** reload the properties from the server. The behavior is
313 local only and syncing occurs via :meth:`patch`.
314
315 :type name: str
316 :param name: The field name to update.
317
318 :type value: object
319 :param value: The value being updated.
320 """
321 self._changes.add(name)
322 self._properties[name] = value
323
324 def _set_properties(self, value):
325 """Set the properties for the current object.
326
327 :type value: dict or :class:`google.cloud.storage.batch._FutureDict`
328 :param value: The properties to be set.
329 """
330 self._properties = value
331 # If the values are reset, the changes must as well.
332 self._changes = set()
333
334 def patch(
335 self,
336 client=None,
337 if_generation_match=None,
338 if_generation_not_match=None,
339 if_metageneration_match=None,
340 if_metageneration_not_match=None,
341 timeout=_DEFAULT_TIMEOUT,
342 retry=DEFAULT_RETRY,
343 override_unlocked_retention=False,
344 ):
345 """Sends all changed properties in a PATCH request.
346
347 Updates the ``_properties`` with the response from the backend.
348
349 If :attr:`user_project` is set, bills the API request to that project.
350
351 :type client: :class:`~google.cloud.storage.client.Client` or
352 ``NoneType``
353 :param client: the client to use. If not passed, falls back to the
354 ``client`` stored on the current object.
355
356 :type if_generation_match: long
357 :param if_generation_match:
358 (Optional) See :ref:`using-if-generation-match`
359
360 :type if_generation_not_match: long
361 :param if_generation_not_match:
362 (Optional) See :ref:`using-if-generation-not-match`
363
364 :type if_metageneration_match: long
365 :param if_metageneration_match:
366 (Optional) See :ref:`using-if-metageneration-match`
367
368 :type if_metageneration_not_match: long
369 :param if_metageneration_not_match:
370 (Optional) See :ref:`using-if-metageneration-not-match`
371
372 :type timeout: float or tuple
373 :param timeout:
374 (Optional) The amount of time, in seconds, to wait
375 for the server response. See: :ref:`configuring_timeouts`
376
377 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
378 :param retry:
379 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
380
381 :type override_unlocked_retention: bool
382 :param override_unlocked_retention:
383 (Optional) override_unlocked_retention must be set to True if the operation includes
384 a retention property that changes the mode from Unlocked to Locked, reduces the
385 retainUntilTime, or removes the retention configuration from the object. See:
386 https://cloud.google.com/storage/docs/json_api/v1/objects/patch
387 """
388 client = self._require_client(client)
389 query_params = self._query_params
390 # Pass '?projection=full' here because 'PATCH' documented not
391 # to work properly w/ 'noAcl'.
392 query_params["projection"] = "full"
393 if override_unlocked_retention:
394 query_params["overrideUnlockedRetention"] = override_unlocked_retention
395 _add_generation_match_parameters(
396 query_params,
397 if_generation_match=if_generation_match,
398 if_generation_not_match=if_generation_not_match,
399 if_metageneration_match=if_metageneration_match,
400 if_metageneration_not_match=if_metageneration_not_match,
401 )
402 update_properties = {key: self._properties[key] for key in self._changes}
403
404 # Make the API call.
405 api_response = client._patch_resource(
406 self.path,
407 update_properties,
408 query_params=query_params,
409 _target_object=self,
410 timeout=timeout,
411 retry=retry,
412 )
413 self._set_properties(api_response)
414
415 def update(
416 self,
417 client=None,
418 if_generation_match=None,
419 if_generation_not_match=None,
420 if_metageneration_match=None,
421 if_metageneration_not_match=None,
422 timeout=_DEFAULT_TIMEOUT,
423 retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED,
424 override_unlocked_retention=False,
425 ):
426 """Sends all properties in a PUT request.
427
428 Updates the ``_properties`` with the response from the backend.
429
430 If :attr:`user_project` is set, bills the API request to that project.
431
432 :type client: :class:`~google.cloud.storage.client.Client` or
433 ``NoneType``
434 :param client: the client to use. If not passed, falls back to the
435 ``client`` stored on the current object.
436
437 :type if_generation_match: long
438 :param if_generation_match:
439 (Optional) See :ref:`using-if-generation-match`
440
441 :type if_generation_not_match: long
442 :param if_generation_not_match:
443 (Optional) See :ref:`using-if-generation-not-match`
444
445 :type if_metageneration_match: long
446 :param if_metageneration_match:
447 (Optional) See :ref:`using-if-metageneration-match`
448
449 :type if_metageneration_not_match: long
450 :param if_metageneration_not_match:
451 (Optional) See :ref:`using-if-metageneration-not-match`
452
453 :type timeout: float or tuple
454 :param timeout:
455 (Optional) The amount of time, in seconds, to wait
456 for the server response. See: :ref:`configuring_timeouts`
457
458 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
459 :param retry:
460 (Optional) How to retry the RPC. See: :ref:`configuring_retries`
461
462 :type override_unlocked_retention: bool
463 :param override_unlocked_retention:
464 (Optional) override_unlocked_retention must be set to True if the operation includes
465 a retention property that changes the mode from Unlocked to Locked, reduces the
466 retainUntilTime, or removes the retention configuration from the object. See:
467 https://cloud.google.com/storage/docs/json_api/v1/objects/patch
468 """
469 client = self._require_client(client)
470
471 query_params = self._query_params
472 query_params["projection"] = "full"
473 if override_unlocked_retention:
474 query_params["overrideUnlockedRetention"] = override_unlocked_retention
475 _add_generation_match_parameters(
476 query_params,
477 if_generation_match=if_generation_match,
478 if_generation_not_match=if_generation_not_match,
479 if_metageneration_match=if_metageneration_match,
480 if_metageneration_not_match=if_metageneration_not_match,
481 )
482
483 api_response = client._put_resource(
484 self.path,
485 self._properties,
486 query_params=query_params,
487 timeout=timeout,
488 retry=retry,
489 _target_object=self,
490 )
491 self._set_properties(api_response)
492
493
494def _scalar_property(fieldname):
495 """Create a property descriptor around the :class:`_PropertyMixin` helpers."""
496
497 def _getter(self):
498 """Scalar property getter."""
499 return self._properties.get(fieldname)
500
501 def _setter(self, value):
502 """Scalar property setter."""
503 self._patch_property(fieldname, value)
504
505 return property(_getter, _setter)
506
507
508def _write_buffer_to_hash(buffer_object, hash_obj, digest_block_size=8192):
509 """Read blocks from a buffer and update a hash with them.
510
511 :type buffer_object: bytes buffer
512 :param buffer_object: Buffer containing bytes used to update a hash object.
513
514 :type hash_obj: object that implements update
515 :param hash_obj: A hash object (MD5 or CRC32-C).
516
517 :type digest_block_size: int
518 :param digest_block_size: The block size to write to the hash.
519 Defaults to 8192.
520 """
521 block = buffer_object.read(digest_block_size)
522
523 while len(block) > 0:
524 hash_obj.update(block)
525 # Update the block for the next iteration.
526 block = buffer_object.read(digest_block_size)
527
528
529def _base64_md5hash(buffer_object):
530 """Get MD5 hash of bytes (as base64).
531
532 :type buffer_object: bytes buffer
533 :param buffer_object: Buffer containing bytes used to compute an MD5
534 hash (as base64).
535
536 :rtype: str
537 :returns: A base64 encoded digest of the MD5 hash.
538 """
539 hash_obj = md5()
540 _write_buffer_to_hash(buffer_object, hash_obj)
541 digest_bytes = hash_obj.digest()
542 return base64.b64encode(digest_bytes)
543
544
545def _add_etag_match_headers(headers, **match_parameters):
546 """Add generation match parameters into the given parameters list.
547
548 :type headers: dict
549 :param headers: Headers dict.
550
551 :type match_parameters: dict
552 :param match_parameters: if*etag*match parameters to add.
553 """
554 for snakecase_name, header_name in _ETAG_MATCH_PARAMETERS:
555 value = match_parameters.get(snakecase_name)
556
557 if value is not None:
558 if isinstance(value, str):
559 value = [value]
560 headers[header_name] = ", ".join(value)
561
562
563def _add_generation_match_parameters(parameters, **match_parameters):
564 """Add generation match parameters into the given parameters list.
565
566 :type parameters: list or dict
567 :param parameters: Parameters list or dict.
568
569 :type match_parameters: dict
570 :param match_parameters: if*generation*match parameters to add.
571
572 :raises: :exc:`ValueError` if ``parameters`` is not a ``list()``
573 or a ``dict()``.
574 """
575 for snakecase_name, camelcase_name in _GENERATION_MATCH_PARAMETERS:
576 value = match_parameters.get(snakecase_name)
577
578 if value is not None:
579 if isinstance(parameters, list):
580 parameters.append((camelcase_name, value))
581
582 elif isinstance(parameters, dict):
583 parameters[camelcase_name] = value
584
585 else:
586 raise ValueError(
587 "`parameters` argument should be a dict() or a list()."
588 )
589
590
591def _raise_if_more_than_one_set(**kwargs):
592 """Raise ``ValueError`` exception if more than one parameter was set.
593
594 :type error: :exc:`ValueError`
595 :param error: Description of which fields were set
596
597 :raises: :class:`~ValueError` containing the fields that were set
598 """
599 if sum(arg is not None for arg in kwargs.values()) > 1:
600 escaped_keys = [f"'{name}'" for name in kwargs.keys()]
601
602 keys_but_last = ", ".join(escaped_keys[:-1])
603 last_key = escaped_keys[-1]
604
605 msg = f"Pass at most one of {keys_but_last} and {last_key}"
606
607 raise ValueError(msg)
608
609
610def _bucket_bound_hostname_url(host, scheme=None):
611 """Helper to build bucket bound hostname URL.
612
613 :type host: str
614 :param host: Host name.
615
616 :type scheme: str
617 :param scheme: (Optional) Web scheme. If passed, use it
618 as a scheme in the result URL.
619
620 :rtype: str
621 :returns: A bucket bound hostname URL.
622 """
623 url_parts = urlsplit(host)
624 if url_parts.scheme and url_parts.netloc:
625 return host
626
627 return f"{scheme}://{host}"
628
629
630def _get_invocation_id():
631 return "gccl-invocation-id/" + str(uuid4())
632
633
634def _get_default_headers(
635 user_agent,
636 content_type="application/json; charset=UTF-8",
637 x_upload_content_type=None,
638 command=None,
639):
640 """Get the headers for a request.
641
642 :type user_agent: str
643 :param user_agent: The user-agent for requests.
644
645 :type command: str
646 :param command:
647 (Optional) Information about which interface for the operation was
648 used, to be included in the X-Goog-API-Client header. Please leave
649 as None unless otherwise directed.
650
651 :rtype: dict
652 :returns: The headers to be used for the request.
653 """
654 x_goog_api_client = f"{user_agent} {_get_invocation_id()}"
655
656 if command:
657 x_goog_api_client += f" gccl-gcs-cmd/{command}"
658
659 return {
660 "Accept": "application/json",
661 "Accept-Encoding": "gzip, deflate",
662 "User-Agent": user_agent,
663 "X-Goog-API-Client": x_goog_api_client,
664 "content-type": content_type,
665 "x-upload-content-type": x_upload_content_type or content_type,
666 }