Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/storage/_helpers.py: 39%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

166 statements  

1# Copyright 2014 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Helper functions for Cloud Storage utility classes. 

16 

17These are *not* part of the API. 

18""" 

19 

20import base64 

21import datetime 

22from hashlib import md5 

23import os 

24from urllib.parse import urlsplit 

25from urllib.parse import urlunsplit 

26from uuid import uuid4 

27 

28from google.auth import environment_vars 

29from google.cloud.storage.constants import _DEFAULT_TIMEOUT 

30from google.cloud.storage.retry import DEFAULT_RETRY 

31from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED 

32 

33 

34STORAGE_EMULATOR_ENV_VAR = "STORAGE_EMULATOR_HOST" # Despite name, includes scheme. 

35"""Environment variable defining host for Storage emulator.""" 

36 

37_API_ENDPOINT_OVERRIDE_ENV_VAR = "API_ENDPOINT_OVERRIDE" # Includes scheme. 

38"""This is an experimental configuration variable. Use api_endpoint instead.""" 

39 

40_API_VERSION_OVERRIDE_ENV_VAR = "API_VERSION_OVERRIDE" 

41"""This is an experimental configuration variable used for internal testing.""" 

42 

43_DEFAULT_UNIVERSE_DOMAIN = "googleapis.com" 

44 

45_STORAGE_HOST_TEMPLATE = "storage.{universe_domain}" 

46 

47_TRUE_DEFAULT_STORAGE_HOST = _STORAGE_HOST_TEMPLATE.format( 

48 universe_domain=_DEFAULT_UNIVERSE_DOMAIN 

49) 

50 

51_DEFAULT_SCHEME = "https://" 

52 

53_API_VERSION = os.getenv(_API_VERSION_OVERRIDE_ENV_VAR, "v1") 

54"""API version of the default storage host""" 

55 

56# etag match parameters in snake case and equivalent header 

57_ETAG_MATCH_PARAMETERS = ( 

58 ("if_etag_match", "If-Match"), 

59 ("if_etag_not_match", "If-None-Match"), 

60) 

61 

62# generation match parameters in camel and snake cases 

63_GENERATION_MATCH_PARAMETERS = ( 

64 ("if_generation_match", "ifGenerationMatch"), 

65 ("if_generation_not_match", "ifGenerationNotMatch"), 

66 ("if_metageneration_match", "ifMetagenerationMatch"), 

67 ("if_metageneration_not_match", "ifMetagenerationNotMatch"), 

68 ("if_source_generation_match", "ifSourceGenerationMatch"), 

69 ("if_source_generation_not_match", "ifSourceGenerationNotMatch"), 

70 ("if_source_metageneration_match", "ifSourceMetagenerationMatch"), 

71 ("if_source_metageneration_not_match", "ifSourceMetagenerationNotMatch"), 

72) 

73 

74# _NOW() returns the current local date and time. 

75# It is preferred to use timezone-aware datetimes _NOW(_UTC), 

76# which returns the current UTC date and time. 

77_NOW = datetime.datetime.now 

78_UTC = datetime.timezone.utc 

79 

80 

81def _get_storage_emulator_override(): 

82 return os.environ.get(STORAGE_EMULATOR_ENV_VAR, None) 

83 

84 

85def _get_default_storage_base_url(): 

86 return os.getenv( 

87 _API_ENDPOINT_OVERRIDE_ENV_VAR, _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST 

88 ) 

89 

90 

91def _get_api_endpoint_override(): 

92 """This is an experimental configuration variable. Use api_endpoint instead.""" 

93 if _get_default_storage_base_url() != _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST: 

94 return _get_default_storage_base_url() 

95 return None 

96 

97 

98def _virtual_hosted_style_base_url(url, bucket, trailing_slash=False): 

99 """Returns the scheme and netloc sections of the url, with the bucket 

100 prepended to the netloc. 

101 

102 Not intended for use with netlocs which include a username and password. 

103 """ 

104 parsed_url = urlsplit(url) 

105 new_netloc = f"{bucket}.{parsed_url.netloc}" 

106 base_url = urlunsplit( 

107 (parsed_url.scheme, new_netloc, "/" if trailing_slash else "", "", "") 

108 ) 

109 return base_url 

110 

111 

112def _use_client_cert(): 

113 return os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" 

114 

115 

116def _get_environ_project(): 

117 return os.getenv( 

118 environment_vars.PROJECT, 

119 os.getenv(environment_vars.LEGACY_PROJECT), 

120 ) 

121 

122 

123def _validate_name(name): 

124 """Pre-flight ``Bucket`` name validation. 

125 

126 :type name: str or :data:`NoneType` 

127 :param name: Proposed bucket name. 

128 

129 :rtype: str or :data:`NoneType` 

130 :returns: ``name`` if valid. 

131 """ 

132 if name is None: 

133 return 

134 

135 # The first and last characters must be alphanumeric. 

136 if not all([name[0].isalnum(), name[-1].isalnum()]): 

137 raise ValueError("Bucket names must start and end with a number or letter.") 

138 return name 

139 

140 

141class _PropertyMixin(object): 

142 """Abstract mixin for cloud storage classes with associated properties. 

143 

144 Non-abstract subclasses should implement: 

145 - path 

146 - client 

147 - user_project 

148 

149 :type name: str 

150 :param name: The name of the object. Bucket names must start and end with a 

151 number or letter. 

152 """ 

153 

154 def __init__(self, name=None): 

155 self.name = name 

156 self._properties = {} 

157 self._changes = set() 

158 

159 @property 

160 def path(self): 

161 """Abstract getter for the object path.""" 

162 raise NotImplementedError 

163 

164 @property 

165 def client(self): 

166 """Abstract getter for the object client.""" 

167 raise NotImplementedError 

168 

169 @property 

170 def user_project(self): 

171 """Abstract getter for the object user_project.""" 

172 raise NotImplementedError 

173 

174 def _require_client(self, client): 

175 """Check client or verify over-ride. 

176 

177 :type client: :class:`~google.cloud.storage.client.Client` or 

178 ``NoneType`` 

179 :param client: the client to use. If not passed, falls back to the 

180 ``client`` stored on the current object. 

181 

182 :rtype: :class:`google.cloud.storage.client.Client` 

183 :returns: The client passed in or the currently bound client. 

184 """ 

185 if client is None: 

186 client = self.client 

187 return client 

188 

189 def _encryption_headers(self): 

190 """Return any encryption headers needed to fetch the object. 

191 

192 .. note:: 

193 Defined here because :meth:`reload` calls it, but this method is 

194 really only relevant for :class:`~google.cloud.storage.blob.Blob`. 

195 

196 :rtype: dict 

197 :returns: a mapping of encryption-related headers. 

198 """ 

199 return {} 

200 

201 @property 

202 def _query_params(self): 

203 """Default query parameters.""" 

204 params = {} 

205 if self.user_project is not None: 

206 params["userProject"] = self.user_project 

207 return params 

208 

209 def reload( 

210 self, 

211 client=None, 

212 projection="noAcl", 

213 if_etag_match=None, 

214 if_etag_not_match=None, 

215 if_generation_match=None, 

216 if_generation_not_match=None, 

217 if_metageneration_match=None, 

218 if_metageneration_not_match=None, 

219 timeout=_DEFAULT_TIMEOUT, 

220 retry=DEFAULT_RETRY, 

221 soft_deleted=None, 

222 ): 

223 """Reload properties from Cloud Storage. 

224 

225 If :attr:`user_project` is set, bills the API request to that project. 

226 

227 :type client: :class:`~google.cloud.storage.client.Client` or 

228 ``NoneType`` 

229 :param client: the client to use. If not passed, falls back to the 

230 ``client`` stored on the current object. 

231 

232 :type projection: str 

233 :param projection: (Optional) If used, must be 'full' or 'noAcl'. 

234 Defaults to ``'noAcl'``. Specifies the set of 

235 properties to return. 

236 

237 :type if_etag_match: Union[str, Set[str]] 

238 :param if_etag_match: (Optional) See :ref:`using-if-etag-match` 

239 

240 :type if_etag_not_match: Union[str, Set[str]]) 

241 :param if_etag_not_match: (Optional) See :ref:`using-if-etag-not-match` 

242 

243 :type if_generation_match: long 

244 :param if_generation_match: 

245 (Optional) See :ref:`using-if-generation-match` 

246 

247 :type if_generation_not_match: long 

248 :param if_generation_not_match: 

249 (Optional) See :ref:`using-if-generation-not-match` 

250 

251 :type if_metageneration_match: long 

252 :param if_metageneration_match: 

253 (Optional) See :ref:`using-if-metageneration-match` 

254 

255 :type if_metageneration_not_match: long 

256 :param if_metageneration_not_match: 

257 (Optional) See :ref:`using-if-metageneration-not-match` 

258 

259 :type timeout: float or tuple 

260 :param timeout: 

261 (Optional) The amount of time, in seconds, to wait 

262 for the server response. See: :ref:`configuring_timeouts` 

263 

264 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy 

265 :param retry: 

266 (Optional) How to retry the RPC. See: :ref:`configuring_retries` 

267 

268 :type soft_deleted: bool 

269 :param soft_deleted: 

270 (Optional) If True, looks for a soft-deleted object. Will only return 

271 the object metadata if the object exists and is in a soft-deleted state. 

272 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True. 

273 See: https://cloud.google.com/storage/docs/soft-delete 

274 """ 

275 client = self._require_client(client) 

276 query_params = self._query_params 

277 # Pass only '?projection=noAcl' here because 'acl' and related 

278 # are handled via custom endpoints. 

279 query_params["projection"] = projection 

280 _add_generation_match_parameters( 

281 query_params, 

282 if_generation_match=if_generation_match, 

283 if_generation_not_match=if_generation_not_match, 

284 if_metageneration_match=if_metageneration_match, 

285 if_metageneration_not_match=if_metageneration_not_match, 

286 ) 

287 if soft_deleted is not None: 

288 query_params["softDeleted"] = soft_deleted 

289 # Soft delete reload requires a generation, even for targets 

290 # that don't include them in default query params (buckets). 

291 query_params["generation"] = self.generation 

292 headers = self._encryption_headers() 

293 _add_etag_match_headers( 

294 headers, if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match 

295 ) 

296 api_response = client._get_resource( 

297 self.path, 

298 query_params=query_params, 

299 headers=headers, 

300 timeout=timeout, 

301 retry=retry, 

302 _target_object=self, 

303 ) 

304 self._set_properties(api_response) 

305 

306 def _patch_property(self, name, value): 

307 """Update field of this object's properties. 

308 

309 This method will only update the field provided and will not 

310 touch the other fields. 

311 

312 It **will not** reload the properties from the server. The behavior is 

313 local only and syncing occurs via :meth:`patch`. 

314 

315 :type name: str 

316 :param name: The field name to update. 

317 

318 :type value: object 

319 :param value: The value being updated. 

320 """ 

321 self._changes.add(name) 

322 self._properties[name] = value 

323 

324 def _set_properties(self, value): 

325 """Set the properties for the current object. 

326 

327 :type value: dict or :class:`google.cloud.storage.batch._FutureDict` 

328 :param value: The properties to be set. 

329 """ 

330 self._properties = value 

331 # If the values are reset, the changes must as well. 

332 self._changes = set() 

333 

334 def patch( 

335 self, 

336 client=None, 

337 if_generation_match=None, 

338 if_generation_not_match=None, 

339 if_metageneration_match=None, 

340 if_metageneration_not_match=None, 

341 timeout=_DEFAULT_TIMEOUT, 

342 retry=DEFAULT_RETRY, 

343 override_unlocked_retention=False, 

344 ): 

345 """Sends all changed properties in a PATCH request. 

346 

347 Updates the ``_properties`` with the response from the backend. 

348 

349 If :attr:`user_project` is set, bills the API request to that project. 

350 

351 :type client: :class:`~google.cloud.storage.client.Client` or 

352 ``NoneType`` 

353 :param client: the client to use. If not passed, falls back to the 

354 ``client`` stored on the current object. 

355 

356 :type if_generation_match: long 

357 :param if_generation_match: 

358 (Optional) See :ref:`using-if-generation-match` 

359 

360 :type if_generation_not_match: long 

361 :param if_generation_not_match: 

362 (Optional) See :ref:`using-if-generation-not-match` 

363 

364 :type if_metageneration_match: long 

365 :param if_metageneration_match: 

366 (Optional) See :ref:`using-if-metageneration-match` 

367 

368 :type if_metageneration_not_match: long 

369 :param if_metageneration_not_match: 

370 (Optional) See :ref:`using-if-metageneration-not-match` 

371 

372 :type timeout: float or tuple 

373 :param timeout: 

374 (Optional) The amount of time, in seconds, to wait 

375 for the server response. See: :ref:`configuring_timeouts` 

376 

377 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy 

378 :param retry: 

379 (Optional) How to retry the RPC. See: :ref:`configuring_retries` 

380 

381 :type override_unlocked_retention: bool 

382 :param override_unlocked_retention: 

383 (Optional) override_unlocked_retention must be set to True if the operation includes 

384 a retention property that changes the mode from Unlocked to Locked, reduces the 

385 retainUntilTime, or removes the retention configuration from the object. See: 

386 https://cloud.google.com/storage/docs/json_api/v1/objects/patch 

387 """ 

388 client = self._require_client(client) 

389 query_params = self._query_params 

390 # Pass '?projection=full' here because 'PATCH' documented not 

391 # to work properly w/ 'noAcl'. 

392 query_params["projection"] = "full" 

393 if override_unlocked_retention: 

394 query_params["overrideUnlockedRetention"] = override_unlocked_retention 

395 _add_generation_match_parameters( 

396 query_params, 

397 if_generation_match=if_generation_match, 

398 if_generation_not_match=if_generation_not_match, 

399 if_metageneration_match=if_metageneration_match, 

400 if_metageneration_not_match=if_metageneration_not_match, 

401 ) 

402 update_properties = {key: self._properties[key] for key in self._changes} 

403 

404 # Make the API call. 

405 api_response = client._patch_resource( 

406 self.path, 

407 update_properties, 

408 query_params=query_params, 

409 _target_object=self, 

410 timeout=timeout, 

411 retry=retry, 

412 ) 

413 self._set_properties(api_response) 

414 

415 def update( 

416 self, 

417 client=None, 

418 if_generation_match=None, 

419 if_generation_not_match=None, 

420 if_metageneration_match=None, 

421 if_metageneration_not_match=None, 

422 timeout=_DEFAULT_TIMEOUT, 

423 retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, 

424 override_unlocked_retention=False, 

425 ): 

426 """Sends all properties in a PUT request. 

427 

428 Updates the ``_properties`` with the response from the backend. 

429 

430 If :attr:`user_project` is set, bills the API request to that project. 

431 

432 :type client: :class:`~google.cloud.storage.client.Client` or 

433 ``NoneType`` 

434 :param client: the client to use. If not passed, falls back to the 

435 ``client`` stored on the current object. 

436 

437 :type if_generation_match: long 

438 :param if_generation_match: 

439 (Optional) See :ref:`using-if-generation-match` 

440 

441 :type if_generation_not_match: long 

442 :param if_generation_not_match: 

443 (Optional) See :ref:`using-if-generation-not-match` 

444 

445 :type if_metageneration_match: long 

446 :param if_metageneration_match: 

447 (Optional) See :ref:`using-if-metageneration-match` 

448 

449 :type if_metageneration_not_match: long 

450 :param if_metageneration_not_match: 

451 (Optional) See :ref:`using-if-metageneration-not-match` 

452 

453 :type timeout: float or tuple 

454 :param timeout: 

455 (Optional) The amount of time, in seconds, to wait 

456 for the server response. See: :ref:`configuring_timeouts` 

457 

458 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy 

459 :param retry: 

460 (Optional) How to retry the RPC. See: :ref:`configuring_retries` 

461 

462 :type override_unlocked_retention: bool 

463 :param override_unlocked_retention: 

464 (Optional) override_unlocked_retention must be set to True if the operation includes 

465 a retention property that changes the mode from Unlocked to Locked, reduces the 

466 retainUntilTime, or removes the retention configuration from the object. See: 

467 https://cloud.google.com/storage/docs/json_api/v1/objects/patch 

468 """ 

469 client = self._require_client(client) 

470 

471 query_params = self._query_params 

472 query_params["projection"] = "full" 

473 if override_unlocked_retention: 

474 query_params["overrideUnlockedRetention"] = override_unlocked_retention 

475 _add_generation_match_parameters( 

476 query_params, 

477 if_generation_match=if_generation_match, 

478 if_generation_not_match=if_generation_not_match, 

479 if_metageneration_match=if_metageneration_match, 

480 if_metageneration_not_match=if_metageneration_not_match, 

481 ) 

482 

483 api_response = client._put_resource( 

484 self.path, 

485 self._properties, 

486 query_params=query_params, 

487 timeout=timeout, 

488 retry=retry, 

489 _target_object=self, 

490 ) 

491 self._set_properties(api_response) 

492 

493 

494def _scalar_property(fieldname): 

495 """Create a property descriptor around the :class:`_PropertyMixin` helpers.""" 

496 

497 def _getter(self): 

498 """Scalar property getter.""" 

499 return self._properties.get(fieldname) 

500 

501 def _setter(self, value): 

502 """Scalar property setter.""" 

503 self._patch_property(fieldname, value) 

504 

505 return property(_getter, _setter) 

506 

507 

508def _write_buffer_to_hash(buffer_object, hash_obj, digest_block_size=8192): 

509 """Read blocks from a buffer and update a hash with them. 

510 

511 :type buffer_object: bytes buffer 

512 :param buffer_object: Buffer containing bytes used to update a hash object. 

513 

514 :type hash_obj: object that implements update 

515 :param hash_obj: A hash object (MD5 or CRC32-C). 

516 

517 :type digest_block_size: int 

518 :param digest_block_size: The block size to write to the hash. 

519 Defaults to 8192. 

520 """ 

521 block = buffer_object.read(digest_block_size) 

522 

523 while len(block) > 0: 

524 hash_obj.update(block) 

525 # Update the block for the next iteration. 

526 block = buffer_object.read(digest_block_size) 

527 

528 

529def _base64_md5hash(buffer_object): 

530 """Get MD5 hash of bytes (as base64). 

531 

532 :type buffer_object: bytes buffer 

533 :param buffer_object: Buffer containing bytes used to compute an MD5 

534 hash (as base64). 

535 

536 :rtype: str 

537 :returns: A base64 encoded digest of the MD5 hash. 

538 """ 

539 hash_obj = md5() 

540 _write_buffer_to_hash(buffer_object, hash_obj) 

541 digest_bytes = hash_obj.digest() 

542 return base64.b64encode(digest_bytes) 

543 

544 

545def _add_etag_match_headers(headers, **match_parameters): 

546 """Add generation match parameters into the given parameters list. 

547 

548 :type headers: dict 

549 :param headers: Headers dict. 

550 

551 :type match_parameters: dict 

552 :param match_parameters: if*etag*match parameters to add. 

553 """ 

554 for snakecase_name, header_name in _ETAG_MATCH_PARAMETERS: 

555 value = match_parameters.get(snakecase_name) 

556 

557 if value is not None: 

558 if isinstance(value, str): 

559 value = [value] 

560 headers[header_name] = ", ".join(value) 

561 

562 

563def _add_generation_match_parameters(parameters, **match_parameters): 

564 """Add generation match parameters into the given parameters list. 

565 

566 :type parameters: list or dict 

567 :param parameters: Parameters list or dict. 

568 

569 :type match_parameters: dict 

570 :param match_parameters: if*generation*match parameters to add. 

571 

572 :raises: :exc:`ValueError` if ``parameters`` is not a ``list()`` 

573 or a ``dict()``. 

574 """ 

575 for snakecase_name, camelcase_name in _GENERATION_MATCH_PARAMETERS: 

576 value = match_parameters.get(snakecase_name) 

577 

578 if value is not None: 

579 if isinstance(parameters, list): 

580 parameters.append((camelcase_name, value)) 

581 

582 elif isinstance(parameters, dict): 

583 parameters[camelcase_name] = value 

584 

585 else: 

586 raise ValueError( 

587 "`parameters` argument should be a dict() or a list()." 

588 ) 

589 

590 

591def _raise_if_more_than_one_set(**kwargs): 

592 """Raise ``ValueError`` exception if more than one parameter was set. 

593 

594 :type error: :exc:`ValueError` 

595 :param error: Description of which fields were set 

596 

597 :raises: :class:`~ValueError` containing the fields that were set 

598 """ 

599 if sum(arg is not None for arg in kwargs.values()) > 1: 

600 escaped_keys = [f"'{name}'" for name in kwargs.keys()] 

601 

602 keys_but_last = ", ".join(escaped_keys[:-1]) 

603 last_key = escaped_keys[-1] 

604 

605 msg = f"Pass at most one of {keys_but_last} and {last_key}" 

606 

607 raise ValueError(msg) 

608 

609 

610def _bucket_bound_hostname_url(host, scheme=None): 

611 """Helper to build bucket bound hostname URL. 

612 

613 :type host: str 

614 :param host: Host name. 

615 

616 :type scheme: str 

617 :param scheme: (Optional) Web scheme. If passed, use it 

618 as a scheme in the result URL. 

619 

620 :rtype: str 

621 :returns: A bucket bound hostname URL. 

622 """ 

623 url_parts = urlsplit(host) 

624 if url_parts.scheme and url_parts.netloc: 

625 return host 

626 

627 return f"{scheme}://{host}" 

628 

629 

630def _get_invocation_id(): 

631 return "gccl-invocation-id/" + str(uuid4()) 

632 

633 

634def _get_default_headers( 

635 user_agent, 

636 content_type="application/json; charset=UTF-8", 

637 x_upload_content_type=None, 

638 command=None, 

639): 

640 """Get the headers for a request. 

641 

642 :type user_agent: str 

643 :param user_agent: The user-agent for requests. 

644 

645 :type command: str 

646 :param command: 

647 (Optional) Information about which interface for the operation was 

648 used, to be included in the X-Goog-API-Client header. Please leave 

649 as None unless otherwise directed. 

650 

651 :rtype: dict 

652 :returns: The headers to be used for the request. 

653 """ 

654 x_goog_api_client = f"{user_agent} {_get_invocation_id()}" 

655 

656 if command: 

657 x_goog_api_client += f" gccl-gcs-cmd/{command}" 

658 

659 return { 

660 "Accept": "application/json", 

661 "Accept-Encoding": "gzip, deflate", 

662 "User-Agent": user_agent, 

663 "X-Goog-API-Client": x_goog_api_client, 

664 "content-type": content_type, 

665 "x-upload-content-type": x_upload_content_type or content_type, 

666 }