Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/storage/_media/_helpers.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

120 statements  

1# Copyright 2017 Google Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Shared utilities used by both downloads and uploads.""" 

16 

17from __future__ import absolute_import 

18 

19import base64 

20import hashlib 

21import logging 

22 

23from urllib.parse import parse_qs 

24from urllib.parse import urlencode 

25from urllib.parse import urlsplit 

26from urllib.parse import urlunsplit 

27 

28from google.cloud.storage import retry 

29from google.cloud.storage.exceptions import InvalidResponse 

30 

31 

32RANGE_HEADER = "range" 

33CONTENT_RANGE_HEADER = "content-range" 

34CONTENT_ENCODING_HEADER = "content-encoding" 

35 

36_SLOW_CRC32C_WARNING = ( 

37 "Currently using crcmod in pure python form. This is a slow " 

38 "implementation. Python 3 has a faster implementation, `google-crc32c`, " 

39 "which will be used if it is installed." 

40) 

41_GENERATION_HEADER = "x-goog-generation" 

42_HASH_HEADER = "x-goog-hash" 

43_STORED_CONTENT_ENCODING_HEADER = "x-goog-stored-content-encoding" 

44 

45_MISSING_CHECKSUM = """\ 

46No {checksum_type} checksum was returned from the service while downloading {} 

47(which happens for composite objects), so client-side content integrity 

48checking is not being performed.""" 

49_LOGGER = logging.getLogger(__name__) 

50 

51 

52def do_nothing(): 

53 """Simple default callback.""" 

54 

55 

56def header_required(response, name, get_headers, callback=do_nothing): 

57 """Checks that a specific header is in a headers dictionary. 

58 

59 Args: 

60 response (object): An HTTP response object, expected to have a 

61 ``headers`` attribute that is a ``Mapping[str, str]``. 

62 name (str): The name of a required header. 

63 get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers 

64 from an HTTP response. 

65 callback (Optional[Callable]): A callback that takes no arguments, 

66 to be executed when an exception is being raised. 

67 

68 Returns: 

69 str: The desired header. 

70 

71 Raises: 

72 ~google.cloud.storage.exceptions.InvalidResponse: If the header 

73 is missing. 

74 """ 

75 headers = get_headers(response) 

76 if name not in headers: 

77 callback() 

78 raise InvalidResponse(response, "Response headers must contain header", name) 

79 

80 return headers[name] 

81 

82 

83def require_status_code(response, status_codes, get_status_code, callback=do_nothing): 

84 """Require a response has a status code among a list. 

85 

86 Args: 

87 response (object): The HTTP response object. 

88 status_codes (tuple): The acceptable status codes. 

89 get_status_code (Callable[Any, int]): Helper to get a status code 

90 from a response. 

91 callback (Optional[Callable]): A callback that takes no arguments, 

92 to be executed when an exception is being raised. 

93 

94 Returns: 

95 int: The status code. 

96 

97 Raises: 

98 ~google.cloud.storage.exceptions.InvalidResponse: If the status code 

99 is not one of the values in ``status_codes``. 

100 """ 

101 status_code = get_status_code(response) 

102 if status_code not in status_codes: 

103 if status_code not in retry._RETRYABLE_STATUS_CODES: 

104 callback() 

105 raise InvalidResponse( 

106 response, 

107 "Request failed with status code", 

108 status_code, 

109 "Expected one of", 

110 *status_codes 

111 ) 

112 return status_code 

113 

114 

115def _get_metadata_key(checksum_type): 

116 if checksum_type == "md5": 

117 return "md5Hash" 

118 else: 

119 return checksum_type 

120 

121 

122def prepare_checksum_digest(digest_bytestring): 

123 """Convert a checksum object into a digest encoded for an HTTP header. 

124 

125 Args: 

126 bytes: A checksum digest bytestring. 

127 

128 Returns: 

129 str: A base64 string representation of the input. 

130 """ 

131 encoded_digest = base64.b64encode(digest_bytestring) 

132 # NOTE: ``b64encode`` returns ``bytes``, but HTTP headers expect ``str``. 

133 return encoded_digest.decode("utf-8") 

134 

135 

136def _get_expected_checksum(response, get_headers, media_url, checksum_type): 

137 """Get the expected checksum and checksum object for the download response. 

138 

139 Args: 

140 response (~requests.Response): The HTTP response object. 

141 get_headers (callable: response->dict): returns response headers. 

142 media_url (str): The URL containing the media to be downloaded. 

143 checksum_type Optional(str): The checksum type to read from the headers, 

144 exactly as it will appear in the headers (case-sensitive). Must be 

145 "md5", "crc32c" or None. 

146 

147 Returns: 

148 Tuple (Optional[str], object): The expected checksum of the response, 

149 if it can be detected from the ``X-Goog-Hash`` header, and the 

150 appropriate checksum object for the expected checksum. 

151 """ 

152 if checksum_type not in ["md5", "crc32c", None]: 

153 raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``") 

154 elif checksum_type in ["md5", "crc32c"]: 

155 headers = get_headers(response) 

156 expected_checksum = _parse_checksum_header( 

157 headers.get(_HASH_HEADER), response, checksum_label=checksum_type 

158 ) 

159 

160 if expected_checksum is None: 

161 msg = _MISSING_CHECKSUM.format( 

162 media_url, checksum_type=checksum_type.upper() 

163 ) 

164 _LOGGER.info(msg) 

165 checksum_object = _DoNothingHash() 

166 else: 

167 checksum_object = _get_checksum_object(checksum_type) 

168 else: 

169 expected_checksum = None 

170 checksum_object = _DoNothingHash() 

171 

172 return (expected_checksum, checksum_object) 

173 

174 

175def _get_uploaded_checksum_from_headers(response, get_headers, checksum_type): 

176 """Get the computed checksum and checksum object from the response headers. 

177 

178 Args: 

179 response (~requests.Response): The HTTP response object. 

180 get_headers (callable: response->dict): returns response headers. 

181 checksum_type Optional(str): The checksum type to read from the headers, 

182 exactly as it will appear in the headers (case-sensitive). Must be 

183 "md5", "crc32c" or None. 

184 

185 Returns: 

186 Tuple (Optional[str], object): The checksum of the response, 

187 if it can be detected from the ``X-Goog-Hash`` header, and the 

188 appropriate checksum object for the expected checksum. 

189 """ 

190 if checksum_type not in ["md5", "crc32c", None]: 

191 raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``") 

192 elif checksum_type in ["md5", "crc32c"]: 

193 headers = get_headers(response) 

194 remote_checksum = _parse_checksum_header( 

195 headers.get(_HASH_HEADER), response, checksum_label=checksum_type 

196 ) 

197 else: 

198 remote_checksum = None 

199 

200 return remote_checksum 

201 

202 

203def _parse_checksum_header(header_value, response, checksum_label): 

204 """Parses the checksum header from an ``X-Goog-Hash`` value. 

205 

206 .. _header reference: https://cloud.google.com/storage/docs/\ 

207 xml-api/reference-headers#xgooghash 

208 

209 Expects ``header_value`` (if not :data:`None`) to be in one of the three 

210 following formats: 

211 

212 * ``crc32c=n03x6A==`` 

213 * ``md5=Ojk9c3dhfxgoKVVHYwFbHQ==`` 

214 * ``crc32c=n03x6A==,md5=Ojk9c3dhfxgoKVVHYwFbHQ==`` 

215 

216 See the `header reference`_ for more information. 

217 

218 Args: 

219 header_value (Optional[str]): The ``X-Goog-Hash`` header from 

220 a download response. 

221 response (~requests.Response): The HTTP response object. 

222 checksum_label (str): The label of the header value to read, as in the 

223 examples above. Typically "md5" or "crc32c" 

224 

225 Returns: 

226 Optional[str]: The expected checksum of the response, if it 

227 can be detected from the ``X-Goog-Hash`` header; otherwise, None. 

228 

229 Raises: 

230 ~google.cloud.storage.exceptions.InvalidResponse: If there are 

231 multiple checksums of the requested type in ``header_value``. 

232 """ 

233 if header_value is None: 

234 return None 

235 

236 matches = [] 

237 for checksum in header_value.split(","): 

238 name, value = checksum.split("=", 1) 

239 # Official docs say "," is the separator, but real-world responses have encountered ", " 

240 if name.lstrip() == checksum_label: 

241 matches.append(value) 

242 

243 if len(matches) == 0: 

244 return None 

245 elif len(matches) == 1: 

246 return matches[0] 

247 else: 

248 raise InvalidResponse( 

249 response, 

250 "X-Goog-Hash header had multiple ``{}`` values.".format(checksum_label), 

251 header_value, 

252 matches, 

253 ) 

254 

255 

256def _get_checksum_object(checksum_type): 

257 """Respond with a checksum object for a supported type, if not None. 

258 

259 Raises ValueError if checksum_type is unsupported. 

260 """ 

261 if checksum_type == "md5": 

262 return hashlib.md5() 

263 elif checksum_type == "crc32c": 

264 # In order to support platforms that don't have google_crc32c 

265 # support, only perform the import on demand. 

266 import google_crc32c 

267 

268 return google_crc32c.Checksum() 

269 elif checksum_type is None: 

270 return None 

271 else: 

272 raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``") 

273 

274 

275def _is_crc32c_available_and_fast(): 

276 """Return True if the google_crc32c C extension is installed. 

277 

278 Return False if either the package is not installed, or if only the 

279 pure-Python version is installed. 

280 """ 

281 try: 

282 import google_crc32c 

283 

284 if google_crc32c.implementation == "c": 

285 return True 

286 except Exception: 

287 pass 

288 return False 

289 

290 

291def _parse_generation_header(response, get_headers): 

292 """Parses the generation header from an ``X-Goog-Generation`` value. 

293 

294 Args: 

295 response (~requests.Response): The HTTP response object. 

296 get_headers (callable: response->dict): returns response headers. 

297 

298 Returns: 

299 Optional[long]: The object generation from the response, if it 

300 can be detected from the ``X-Goog-Generation`` header; otherwise, None. 

301 """ 

302 headers = get_headers(response) 

303 object_generation = headers.get(_GENERATION_HEADER, None) 

304 

305 if object_generation is None: 

306 return None 

307 else: 

308 return int(object_generation) 

309 

310 

311def _get_generation_from_url(media_url): 

312 """Retrieve the object generation query param specified in the media url. 

313 

314 Args: 

315 media_url (str): The URL containing the media to be downloaded. 

316 

317 Returns: 

318 long: The object generation from the media url if exists; otherwise, None. 

319 """ 

320 

321 _, _, _, query, _ = urlsplit(media_url) 

322 query_params = parse_qs(query) 

323 object_generation = query_params.get("generation", None) 

324 

325 if object_generation is None: 

326 return None 

327 else: 

328 return int(object_generation[0]) 

329 

330 

331def add_query_parameters(media_url, query_params): 

332 """Add query parameters to a base url. 

333 

334 Args: 

335 media_url (str): The URL containing the media to be downloaded. 

336 query_params (dict): Names and values of the query parameters to add. 

337 

338 Returns: 

339 str: URL with additional query strings appended. 

340 """ 

341 

342 if len(query_params) == 0: 

343 return media_url 

344 

345 scheme, netloc, path, query, frag = urlsplit(media_url) 

346 params = parse_qs(query) 

347 new_params = {**params, **query_params} 

348 query = urlencode(new_params, doseq=True) 

349 return urlunsplit((scheme, netloc, path, query, frag)) 

350 

351 

352def _is_decompressive_transcoding(response, get_headers): 

353 """Returns True if the object was served decompressed. This happens when the 

354 "x-goog-stored-content-encoding" header is "gzip" and "content-encoding" header 

355 is not "gzip". See more at: https://cloud.google.com/storage/docs/transcoding#transcoding_and_gzip 

356 Args: 

357 response (~requests.Response): The HTTP response object. 

358 get_headers (callable: response->dict): returns response headers. 

359 Returns: 

360 bool: Returns True if decompressive transcoding has occurred; otherwise, False. 

361 """ 

362 headers = get_headers(response) 

363 return ( 

364 headers.get(_STORED_CONTENT_ENCODING_HEADER) == "gzip" 

365 and headers.get(CONTENT_ENCODING_HEADER) != "gzip" 

366 ) 

367 

368 

369class _DoNothingHash(object): 

370 """Do-nothing hash object. 

371 

372 Intended as a stand-in for ``hashlib.md5`` or a crc32c checksum 

373 implementation in cases where it isn't necessary to compute the hash. 

374 """ 

375 

376 def update(self, unused_chunk): 

377 """Do-nothing ``update`` method. 

378 

379 Intended to match the interface of ``hashlib.md5`` and other checksums. 

380 

381 Args: 

382 unused_chunk (bytes): A chunk of data. 

383 """