Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/storage/_helpers.py: 39%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

171 statements  

1# Copyright 2014 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Helper functions for Cloud Storage utility classes. 

16 

17These are *not* part of the API. 

18""" 

19 

20import base64 

21import datetime 

22from hashlib import md5 

23import os 

24import sys 

25import secrets 

26from urllib.parse import urlsplit 

27from urllib.parse import urlunsplit 

28from uuid import uuid4 

29 

30from google.auth import environment_vars 

31from google.cloud.storage.constants import _DEFAULT_TIMEOUT 

32from google.cloud.storage.retry import DEFAULT_RETRY 

33from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED 

34 

35 

36STORAGE_EMULATOR_ENV_VAR = "STORAGE_EMULATOR_HOST" # Despite name, includes scheme. 

37"""Environment variable defining host for Storage emulator.""" 

38 

39_API_ENDPOINT_OVERRIDE_ENV_VAR = "API_ENDPOINT_OVERRIDE" # Includes scheme. 

40"""This is an experimental configuration variable. Use api_endpoint instead.""" 

41 

42_API_VERSION_OVERRIDE_ENV_VAR = "API_VERSION_OVERRIDE" 

43"""This is an experimental configuration variable used for internal testing.""" 

44 

45_DEFAULT_UNIVERSE_DOMAIN = "googleapis.com" 

46 

47_STORAGE_HOST_TEMPLATE = "storage.{universe_domain}" 

48 

49_TRUE_DEFAULT_STORAGE_HOST = _STORAGE_HOST_TEMPLATE.format( 

50 universe_domain=_DEFAULT_UNIVERSE_DOMAIN 

51) 

52 

53_DEFAULT_SCHEME = "https://" 

54 

55_API_VERSION = os.getenv(_API_VERSION_OVERRIDE_ENV_VAR, "v1") 

56"""API version of the default storage host""" 

57 

58# etag match parameters in snake case and equivalent header 

59_ETAG_MATCH_PARAMETERS = ( 

60 ("if_etag_match", "If-Match"), 

61 ("if_etag_not_match", "If-None-Match"), 

62) 

63 

64# generation match parameters in camel and snake cases 

65_GENERATION_MATCH_PARAMETERS = ( 

66 ("if_generation_match", "ifGenerationMatch"), 

67 ("if_generation_not_match", "ifGenerationNotMatch"), 

68 ("if_metageneration_match", "ifMetagenerationMatch"), 

69 ("if_metageneration_not_match", "ifMetagenerationNotMatch"), 

70 ("if_source_generation_match", "ifSourceGenerationMatch"), 

71 ("if_source_generation_not_match", "ifSourceGenerationNotMatch"), 

72 ("if_source_metageneration_match", "ifSourceMetagenerationMatch"), 

73 ("if_source_metageneration_not_match", "ifSourceMetagenerationNotMatch"), 

74) 

75 

76# _NOW() returns the current local date and time. 

77# It is preferred to use timezone-aware datetimes _NOW(_UTC), 

78# which returns the current UTC date and time. 

79_NOW = datetime.datetime.now 

80_UTC = datetime.timezone.utc 

81 

82 

83def _get_storage_emulator_override(): 

84 return os.environ.get(STORAGE_EMULATOR_ENV_VAR, None) 

85 

86 

87def _get_default_storage_base_url(): 

88 return os.getenv( 

89 _API_ENDPOINT_OVERRIDE_ENV_VAR, _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST 

90 ) 

91 

92 

93def _get_api_endpoint_override(): 

94 """This is an experimental configuration variable. Use api_endpoint instead.""" 

95 if _get_default_storage_base_url() != _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST: 

96 return _get_default_storage_base_url() 

97 return None 

98 

99 

100def _virtual_hosted_style_base_url(url, bucket, trailing_slash=False): 

101 """Returns the scheme and netloc sections of the url, with the bucket 

102 prepended to the netloc. 

103 

104 Not intended for use with netlocs which include a username and password. 

105 """ 

106 parsed_url = urlsplit(url) 

107 new_netloc = f"{bucket}.{parsed_url.netloc}" 

108 base_url = urlunsplit( 

109 (parsed_url.scheme, new_netloc, "/" if trailing_slash else "", "", "") 

110 ) 

111 return base_url 

112 

113 

114def _get_environ_project(): 

115 return os.getenv( 

116 environment_vars.PROJECT, 

117 os.getenv(environment_vars.LEGACY_PROJECT), 

118 ) 

119 

120 

121def _validate_name(name): 

122 """Pre-flight ``Bucket`` name validation. 

123 

124 :type name: str or :data:`NoneType` 

125 :param name: Proposed bucket name. 

126 

127 :rtype: str or :data:`NoneType` 

128 :returns: ``name`` if valid. 

129 """ 

130 if name is None: 

131 return 

132 

133 # The first and last characters must be alphanumeric. 

134 if not all([name[0].isalnum(), name[-1].isalnum()]): 

135 raise ValueError("Bucket names must start and end with a number or letter.") 

136 return name 

137 

138 

139class _PropertyMixin(object): 

140 """Abstract mixin for cloud storage classes with associated properties. 

141 

142 Non-abstract subclasses should implement: 

143 - path 

144 - client 

145 - user_project 

146 

147 :type name: str 

148 :param name: The name of the object. Bucket names must start and end with a 

149 number or letter. 

150 """ 

151 

152 def __init__(self, name=None): 

153 self.name = name 

154 self._properties = {} 

155 self._changes = set() 

156 

157 @property 

158 def path(self): 

159 """Abstract getter for the object path.""" 

160 raise NotImplementedError 

161 

162 @property 

163 def client(self): 

164 """Abstract getter for the object client.""" 

165 raise NotImplementedError 

166 

167 @property 

168 def user_project(self): 

169 """Abstract getter for the object user_project.""" 

170 raise NotImplementedError 

171 

172 def _require_client(self, client): 

173 """Check client or verify over-ride. 

174 

175 :type client: :class:`~google.cloud.storage.client.Client` or 

176 ``NoneType`` 

177 :param client: the client to use. If not passed, falls back to the 

178 ``client`` stored on the current object. 

179 

180 :rtype: :class:`google.cloud.storage.client.Client` 

181 :returns: The client passed in or the currently bound client. 

182 """ 

183 if client is None: 

184 client = self.client 

185 return client 

186 

187 def _encryption_headers(self): 

188 """Return any encryption headers needed to fetch the object. 

189 

190 .. note:: 

191 Defined here because :meth:`reload` calls it, but this method is 

192 really only relevant for :class:`~google.cloud.storage.blob.Blob`. 

193 

194 :rtype: dict 

195 :returns: a mapping of encryption-related headers. 

196 """ 

197 return {} 

198 

199 @property 

200 def _query_params(self): 

201 """Default query parameters.""" 

202 params = {} 

203 if self.user_project is not None: 

204 params["userProject"] = self.user_project 

205 return params 

206 

207 def reload( 

208 self, 

209 client=None, 

210 projection="noAcl", 

211 if_etag_match=None, 

212 if_etag_not_match=None, 

213 if_generation_match=None, 

214 if_generation_not_match=None, 

215 if_metageneration_match=None, 

216 if_metageneration_not_match=None, 

217 timeout=_DEFAULT_TIMEOUT, 

218 retry=DEFAULT_RETRY, 

219 soft_deleted=None, 

220 ): 

221 """Reload properties from Cloud Storage. 

222 

223 If :attr:`user_project` is set, bills the API request to that project. 

224 

225 :type client: :class:`~google.cloud.storage.client.Client` or 

226 ``NoneType`` 

227 :param client: the client to use. If not passed, falls back to the 

228 ``client`` stored on the current object. 

229 

230 :type projection: str 

231 :param projection: (Optional) If used, must be 'full' or 'noAcl'. 

232 Defaults to ``'noAcl'``. Specifies the set of 

233 properties to return. 

234 

235 :type if_etag_match: Union[str, Set[str]] 

236 :param if_etag_match: (Optional) See :ref:`using-if-etag-match` 

237 

238 :type if_etag_not_match: Union[str, Set[str]]) 

239 :param if_etag_not_match: (Optional) See :ref:`using-if-etag-not-match` 

240 

241 :type if_generation_match: long 

242 :param if_generation_match: 

243 (Optional) See :ref:`using-if-generation-match` 

244 

245 :type if_generation_not_match: long 

246 :param if_generation_not_match: 

247 (Optional) See :ref:`using-if-generation-not-match` 

248 

249 :type if_metageneration_match: long 

250 :param if_metageneration_match: 

251 (Optional) See :ref:`using-if-metageneration-match` 

252 

253 :type if_metageneration_not_match: long 

254 :param if_metageneration_not_match: 

255 (Optional) See :ref:`using-if-metageneration-not-match` 

256 

257 :type timeout: float or tuple 

258 :param timeout: 

259 (Optional) The amount of time, in seconds, to wait 

260 for the server response. See: :ref:`configuring_timeouts` 

261 

262 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy 

263 :param retry: 

264 (Optional) How to retry the RPC. See: :ref:`configuring_retries` 

265 

266 :type soft_deleted: bool 

267 :param soft_deleted: 

268 (Optional) If True, looks for a soft-deleted object. Will only return 

269 the object metadata if the object exists and is in a soft-deleted state. 

270 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True. 

271 See: https://cloud.google.com/storage/docs/soft-delete 

272 """ 

273 client = self._require_client(client) 

274 query_params = self._query_params 

275 # Pass only '?projection=noAcl' here because 'acl' and related 

276 # are handled via custom endpoints. 

277 query_params["projection"] = projection 

278 _add_generation_match_parameters( 

279 query_params, 

280 if_generation_match=if_generation_match, 

281 if_generation_not_match=if_generation_not_match, 

282 if_metageneration_match=if_metageneration_match, 

283 if_metageneration_not_match=if_metageneration_not_match, 

284 ) 

285 if soft_deleted is not None: 

286 query_params["softDeleted"] = soft_deleted 

287 # Soft delete reload requires a generation, even for targets 

288 # that don't include them in default query params (buckets). 

289 query_params["generation"] = self.generation 

290 headers = self._encryption_headers() 

291 _add_etag_match_headers( 

292 headers, if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match 

293 ) 

294 api_response = client._get_resource( 

295 self.path, 

296 query_params=query_params, 

297 headers=headers, 

298 timeout=timeout, 

299 retry=retry, 

300 _target_object=self, 

301 ) 

302 self._set_properties(api_response) 

303 

304 def _patch_property(self, name, value): 

305 """Update field of this object's properties. 

306 

307 This method will only update the field provided and will not 

308 touch the other fields. 

309 

310 It **will not** reload the properties from the server. The behavior is 

311 local only and syncing occurs via :meth:`patch`. 

312 

313 :type name: str 

314 :param name: The field name to update. 

315 

316 :type value: object 

317 :param value: The value being updated. 

318 """ 

319 self._changes.add(name) 

320 self._properties[name] = value 

321 

322 def _set_properties(self, value): 

323 """Set the properties for the current object. 

324 

325 :type value: dict or :class:`google.cloud.storage.batch._FutureDict` 

326 :param value: The properties to be set. 

327 """ 

328 self._properties = value 

329 # If the values are reset, the changes must as well. 

330 self._changes = set() 

331 

332 def patch( 

333 self, 

334 client=None, 

335 if_generation_match=None, 

336 if_generation_not_match=None, 

337 if_metageneration_match=None, 

338 if_metageneration_not_match=None, 

339 timeout=_DEFAULT_TIMEOUT, 

340 retry=DEFAULT_RETRY, 

341 override_unlocked_retention=False, 

342 ): 

343 """Sends all changed properties in a PATCH request. 

344 

345 Updates the ``_properties`` with the response from the backend. 

346 

347 If :attr:`user_project` is set, bills the API request to that project. 

348 

349 :type client: :class:`~google.cloud.storage.client.Client` or 

350 ``NoneType`` 

351 :param client: the client to use. If not passed, falls back to the 

352 ``client`` stored on the current object. 

353 

354 :type if_generation_match: long 

355 :param if_generation_match: 

356 (Optional) See :ref:`using-if-generation-match` 

357 

358 :type if_generation_not_match: long 

359 :param if_generation_not_match: 

360 (Optional) See :ref:`using-if-generation-not-match` 

361 

362 :type if_metageneration_match: long 

363 :param if_metageneration_match: 

364 (Optional) See :ref:`using-if-metageneration-match` 

365 

366 :type if_metageneration_not_match: long 

367 :param if_metageneration_not_match: 

368 (Optional) See :ref:`using-if-metageneration-not-match` 

369 

370 :type timeout: float or tuple 

371 :param timeout: 

372 (Optional) The amount of time, in seconds, to wait 

373 for the server response. See: :ref:`configuring_timeouts` 

374 

375 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy 

376 :param retry: 

377 (Optional) How to retry the RPC. See: :ref:`configuring_retries` 

378 

379 :type override_unlocked_retention: bool 

380 :param override_unlocked_retention: 

381 (Optional) override_unlocked_retention must be set to True if the operation includes 

382 a retention property that changes the mode from Unlocked to Locked, reduces the 

383 retainUntilTime, or removes the retention configuration from the object. See: 

384 https://cloud.google.com/storage/docs/json_api/v1/objects/patch 

385 """ 

386 client = self._require_client(client) 

387 query_params = self._query_params 

388 # Pass '?projection=full' here because 'PATCH' documented not 

389 # to work properly w/ 'noAcl'. 

390 query_params["projection"] = "full" 

391 if override_unlocked_retention: 

392 query_params["overrideUnlockedRetention"] = override_unlocked_retention 

393 _add_generation_match_parameters( 

394 query_params, 

395 if_generation_match=if_generation_match, 

396 if_generation_not_match=if_generation_not_match, 

397 if_metageneration_match=if_metageneration_match, 

398 if_metageneration_not_match=if_metageneration_not_match, 

399 ) 

400 update_properties = {key: self._properties[key] for key in self._changes} 

401 

402 # Make the API call. 

403 api_response = client._patch_resource( 

404 self.path, 

405 update_properties, 

406 query_params=query_params, 

407 _target_object=self, 

408 timeout=timeout, 

409 retry=retry, 

410 ) 

411 self._set_properties(api_response) 

412 

413 def update( 

414 self, 

415 client=None, 

416 if_generation_match=None, 

417 if_generation_not_match=None, 

418 if_metageneration_match=None, 

419 if_metageneration_not_match=None, 

420 timeout=_DEFAULT_TIMEOUT, 

421 retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, 

422 override_unlocked_retention=False, 

423 ): 

424 """Sends all properties in a PUT request. 

425 

426 Updates the ``_properties`` with the response from the backend. 

427 

428 If :attr:`user_project` is set, bills the API request to that project. 

429 

430 :type client: :class:`~google.cloud.storage.client.Client` or 

431 ``NoneType`` 

432 :param client: the client to use. If not passed, falls back to the 

433 ``client`` stored on the current object. 

434 

435 :type if_generation_match: long 

436 :param if_generation_match: 

437 (Optional) See :ref:`using-if-generation-match` 

438 

439 :type if_generation_not_match: long 

440 :param if_generation_not_match: 

441 (Optional) See :ref:`using-if-generation-not-match` 

442 

443 :type if_metageneration_match: long 

444 :param if_metageneration_match: 

445 (Optional) See :ref:`using-if-metageneration-match` 

446 

447 :type if_metageneration_not_match: long 

448 :param if_metageneration_not_match: 

449 (Optional) See :ref:`using-if-metageneration-not-match` 

450 

451 :type timeout: float or tuple 

452 :param timeout: 

453 (Optional) The amount of time, in seconds, to wait 

454 for the server response. See: :ref:`configuring_timeouts` 

455 

456 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy 

457 :param retry: 

458 (Optional) How to retry the RPC. See: :ref:`configuring_retries` 

459 

460 :type override_unlocked_retention: bool 

461 :param override_unlocked_retention: 

462 (Optional) override_unlocked_retention must be set to True if the operation includes 

463 a retention property that changes the mode from Unlocked to Locked, reduces the 

464 retainUntilTime, or removes the retention configuration from the object. See: 

465 https://cloud.google.com/storage/docs/json_api/v1/objects/patch 

466 """ 

467 client = self._require_client(client) 

468 

469 query_params = self._query_params 

470 query_params["projection"] = "full" 

471 if override_unlocked_retention: 

472 query_params["overrideUnlockedRetention"] = override_unlocked_retention 

473 _add_generation_match_parameters( 

474 query_params, 

475 if_generation_match=if_generation_match, 

476 if_generation_not_match=if_generation_not_match, 

477 if_metageneration_match=if_metageneration_match, 

478 if_metageneration_not_match=if_metageneration_not_match, 

479 ) 

480 

481 api_response = client._put_resource( 

482 self.path, 

483 self._properties, 

484 query_params=query_params, 

485 timeout=timeout, 

486 retry=retry, 

487 _target_object=self, 

488 ) 

489 self._set_properties(api_response) 

490 

491 

492def _scalar_property(fieldname): 

493 """Create a property descriptor around the :class:`_PropertyMixin` helpers.""" 

494 

495 def _getter(self): 

496 """Scalar property getter.""" 

497 return self._properties.get(fieldname) 

498 

499 def _setter(self, value): 

500 """Scalar property setter.""" 

501 self._patch_property(fieldname, value) 

502 

503 return property(_getter, _setter) 

504 

505 

506def _write_buffer_to_hash(buffer_object, hash_obj, digest_block_size=8192): 

507 """Read blocks from a buffer and update a hash with them. 

508 

509 :type buffer_object: bytes buffer 

510 :param buffer_object: Buffer containing bytes used to update a hash object. 

511 

512 :type hash_obj: object that implements update 

513 :param hash_obj: A hash object (MD5 or CRC32-C). 

514 

515 :type digest_block_size: int 

516 :param digest_block_size: The block size to write to the hash. 

517 Defaults to 8192. 

518 """ 

519 block = buffer_object.read(digest_block_size) 

520 

521 while len(block) > 0: 

522 hash_obj.update(block) 

523 # Update the block for the next iteration. 

524 block = buffer_object.read(digest_block_size) 

525 

526 

527def _base64_md5hash(buffer_object): 

528 """Get MD5 hash of bytes (as base64). 

529 

530 :type buffer_object: bytes buffer 

531 :param buffer_object: Buffer containing bytes used to compute an MD5 

532 hash (as base64). 

533 

534 :rtype: str 

535 :returns: A base64 encoded digest of the MD5 hash. 

536 """ 

537 if sys.version_info >= (3, 9): 

538 hash_obj = md5(usedforsecurity=False) 

539 else: 

540 hash_obj = md5() 

541 _write_buffer_to_hash(buffer_object, hash_obj) 

542 digest_bytes = hash_obj.digest() 

543 return base64.b64encode(digest_bytes) 

544 

545 

546def _add_etag_match_headers(headers, **match_parameters): 

547 """Add generation match parameters into the given parameters list. 

548 

549 :type headers: dict 

550 :param headers: Headers dict. 

551 

552 :type match_parameters: dict 

553 :param match_parameters: if*etag*match parameters to add. 

554 """ 

555 for snakecase_name, header_name in _ETAG_MATCH_PARAMETERS: 

556 value = match_parameters.get(snakecase_name) 

557 

558 if value is not None: 

559 if isinstance(value, str): 

560 value = [value] 

561 headers[header_name] = ", ".join(value) 

562 

563 

564def _add_generation_match_parameters(parameters, **match_parameters): 

565 """Add generation match parameters into the given parameters list. 

566 

567 :type parameters: list or dict 

568 :param parameters: Parameters list or dict. 

569 

570 :type match_parameters: dict 

571 :param match_parameters: if*generation*match parameters to add. 

572 

573 :raises: :exc:`ValueError` if ``parameters`` is not a ``list()`` 

574 or a ``dict()``. 

575 """ 

576 for snakecase_name, camelcase_name in _GENERATION_MATCH_PARAMETERS: 

577 value = match_parameters.get(snakecase_name) 

578 

579 if value is not None: 

580 if isinstance(parameters, list): 

581 parameters.append((camelcase_name, value)) 

582 

583 elif isinstance(parameters, dict): 

584 parameters[camelcase_name] = value 

585 

586 else: 

587 raise ValueError( 

588 "`parameters` argument should be a dict() or a list()." 

589 ) 

590 

591 

592def _raise_if_more_than_one_set(**kwargs): 

593 """Raise ``ValueError`` exception if more than one parameter was set. 

594 

595 :type error: :exc:`ValueError` 

596 :param error: Description of which fields were set 

597 

598 :raises: :class:`~ValueError` containing the fields that were set 

599 """ 

600 if sum(arg is not None for arg in kwargs.values()) > 1: 

601 escaped_keys = [f"'{name}'" for name in kwargs.keys()] 

602 

603 keys_but_last = ", ".join(escaped_keys[:-1]) 

604 last_key = escaped_keys[-1] 

605 

606 msg = f"Pass at most one of {keys_but_last} and {last_key}" 

607 

608 raise ValueError(msg) 

609 

610 

611def _bucket_bound_hostname_url(host, scheme=None): 

612 """Helper to build bucket bound hostname URL. 

613 

614 :type host: str 

615 :param host: Host name. 

616 

617 :type scheme: str 

618 :param scheme: (Optional) Web scheme. If passed, use it 

619 as a scheme in the result URL. 

620 

621 :rtype: str 

622 :returns: A bucket bound hostname URL. 

623 """ 

624 url_parts = urlsplit(host) 

625 if url_parts.scheme and url_parts.netloc: 

626 return host 

627 

628 return f"{scheme}://{host}" 

629 

630 

631def _get_invocation_id(): 

632 return "gccl-invocation-id/" + str(uuid4()) 

633 

634 

635def _get_default_headers( 

636 user_agent, 

637 content_type="application/json; charset=UTF-8", 

638 x_upload_content_type=None, 

639 command=None, 

640): 

641 """Get the headers for a request. 

642 

643 :type user_agent: str 

644 :param user_agent: The user-agent for requests. 

645 

646 :type command: str 

647 :param command: 

648 (Optional) Information about which interface for the operation was 

649 used, to be included in the X-Goog-API-Client header. Please leave 

650 as None unless otherwise directed. 

651 

652 :rtype: dict 

653 :returns: The headers to be used for the request. 

654 """ 

655 x_goog_api_client = f"{user_agent} {_get_invocation_id()}" 

656 

657 if command: 

658 x_goog_api_client += f" gccl-gcs-cmd/{command}" 

659 

660 return { 

661 "Accept": "application/json", 

662 "Accept-Encoding": "gzip, deflate", 

663 "User-Agent": user_agent, 

664 "X-Goog-API-Client": x_goog_api_client, 

665 "content-type": content_type, 

666 "x-upload-content-type": x_upload_content_type or content_type, 

667 } 

668 

669 

670def generate_random_56_bit_integer(): 

671 """Generates a secure 56 bit random integer. 

672 

673 

674 If 64 bit int is used, sometimes the random int generated is greater than 

675 max positive value of signed 64 bit int which is 2^63 -1 causing overflow 

676 issues. 

677 

678 :rtype: int 

679 :returns: A secure random 56 bit integer. 

680 """ 

681 # 7 bytes * 8 bits/byte = 56 bits 

682 random_bytes = secrets.token_bytes(7) 

683 # Convert bytes to an integer 

684 return int.from_bytes(random_bytes, "big")