Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/bigquery/retry.py: 58%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

53 statements  

1# Copyright 2018 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15import logging 

16 

17from google.api_core import exceptions 

18from google.api_core import retry 

19import google.api_core.future.polling 

20from google.auth import exceptions as auth_exceptions # type: ignore 

21import requests.exceptions 

22 

23_LOGGER = logging.getLogger(__name__) 

24 

25_RETRYABLE_REASONS = frozenset( 

26 ["rateLimitExceeded", "backendError", "internalError", "badGateway"] 

27) 

28 

29_UNSTRUCTURED_RETRYABLE_TYPES = ( 

30 ConnectionError, 

31 exceptions.TooManyRequests, 

32 exceptions.InternalServerError, 

33 exceptions.BadGateway, 

34 exceptions.ServiceUnavailable, 

35 requests.exceptions.ChunkedEncodingError, 

36 requests.exceptions.ConnectionError, 

37 requests.exceptions.Timeout, 

38 auth_exceptions.TransportError, 

39) 

40 

41_DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes 

42 

43# Ambiguous errors (e.g. internalError, backendError, rateLimitExceeded) retry 

44# until the full `_DEFAULT_RETRY_DEADLINE`. This is because the 

45# `jobs.getQueryResults` REST API translates a job failure into an HTTP error. 

46# 

47# TODO(https://github.com/googleapis/python-bigquery/issues/1903): Investigate 

48# if we can fail early for ambiguous errors in `QueryJob.result()`'s call to 

49# the `jobs.getQueryResult` API. 

50# 

51# We need `_DEFAULT_JOB_DEADLINE` to be some multiple of 

52# `_DEFAULT_RETRY_DEADLINE` to allow for a few retries after the retry 

53# timeout is reached. 

54# 

55# Note: This multiple should actually be a multiple of 

56# (2 * _DEFAULT_RETRY_DEADLINE). After an ambiguous exception, the first 

57# call from `job_retry()` refreshes the job state without actually restarting 

58# the query. The second `job_retry()` actually restarts the query. For a more 

59# detailed explanation, see the comments where we set `restart_query_job = True` 

60# in `QueryJob.result()`'s inner `is_job_done()` function. 

61_DEFAULT_JOB_DEADLINE = 2.0 * (2.0 * _DEFAULT_RETRY_DEADLINE) 

62 

63 

64def _should_retry(exc): 

65 """Predicate for determining when to retry. 

66 

67 We retry if and only if the 'reason' is in _RETRYABLE_REASONS or is 

68 in _UNSTRUCTURED_RETRYABLE_TYPES. 

69 """ 

70 try: 

71 reason = exc.errors[0]["reason"] 

72 except (AttributeError, IndexError, TypeError, KeyError): 

73 # Fallback for when errors attribute is missing, empty, or not a dict 

74 # or doesn't contain "reason" (e.g. gRPC exceptions). 

75 _LOGGER.debug("Inspecting unstructured error for retry: %r", exc) 

76 return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES) 

77 

78 return reason in _RETRYABLE_REASONS 

79 

80 

81DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE) 

82"""The default retry object. 

83 

84Any method with a ``retry`` parameter will be retried automatically, 

85with reasonable defaults. To disable retry, pass ``retry=None``. 

86To modify the default retry behavior, call a ``with_XXX`` method 

87on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, 

88pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. 

89""" 

90 

91 

92def _should_retry_get_job_conflict(exc): 

93 """Predicate for determining when to retry a jobs.get call after a conflict error. 

94 

95 Sometimes we get a 404 after a Conflict. In this case, we 

96 have pretty high confidence that by retrying the 404, we'll 

97 (hopefully) eventually recover the job. 

98 https://github.com/googleapis/python-bigquery/issues/2134 

99 

100 Note: we may be able to extend this to user-specified predicates 

101 after https://github.com/googleapis/python-api-core/issues/796 

102 to tweak existing Retry object predicates. 

103 """ 

104 return isinstance(exc, exceptions.NotFound) or _should_retry(exc) 

105 

106 

107# Pick a deadline smaller than our other deadlines since we want to timeout 

108# before those expire. 

109_DEFAULT_GET_JOB_CONFLICT_DEADLINE = _DEFAULT_RETRY_DEADLINE / 3.0 

110_DEFAULT_GET_JOB_CONFLICT_RETRY = retry.Retry( 

111 predicate=_should_retry_get_job_conflict, 

112 deadline=_DEFAULT_GET_JOB_CONFLICT_DEADLINE, 

113) 

114"""Private, may be removed in future.""" 

115 

116 

117# Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We 

118# briefly had a default timeout, but even setting it at more than twice the 

119# theoretical server-side default timeout of 2 minutes was not enough for 

120# complex queries. See: 

121# https://github.com/googleapis/python-bigquery/issues/970#issuecomment-921934647 

122DEFAULT_TIMEOUT = None 

123"""The default API timeout. 

124 

125This is the time to wait per request. To adjust the total wait time, set a 

126deadline on the retry object. 

127""" 

128 

129job_retry_reasons = ( 

130 "jobBackendError", 

131 "jobInternalError", 

132 "jobRateLimitExceeded", 

133) 

134 

135 

136def _job_should_retry(exc): 

137 # Sometimes we have ambiguous errors, such as 'backendError' which could 

138 # be due to an API problem or a job problem. For these, make sure we retry 

139 # our is_job_done() function. 

140 # 

141 # Note: This won't restart the job unless we know for sure it's because of 

142 # the job status and set restart_query_job = True in that loop. This means 

143 # that we might end up calling this predicate twice for the same job 

144 # but from different paths: (1) from jobs.getQueryResults RetryError and 

145 # (2) from translating the job error from the body of a jobs.get response. 

146 # 

147 # Note: If we start retrying job types other than queries where we don't 

148 # call the problematic getQueryResults API to check the status, we need 

149 # to provide a different predicate, as there shouldn't be ambiguous 

150 # errors in those cases. 

151 if isinstance(exc, exceptions.RetryError): 

152 exc = exc.cause 

153 

154 # Per https://github.com/googleapis/python-bigquery/issues/1929, sometimes 

155 # retriable errors make their way here. Because of the separate 

156 # `restart_query_job` logic to make sure we aren't restarting non-failed 

157 # jobs, it should be safe to continue and not totally fail our attempt at 

158 # waiting for the query to complete. 

159 if _should_retry(exc): 

160 return True 

161 

162 if not hasattr(exc, "errors") or len(exc.errors) == 0: 

163 return False 

164 

165 reason = exc.errors[0]["reason"] 

166 return reason in job_retry_reasons 

167 

168 

169DEFAULT_JOB_RETRY = retry.Retry( 

170 predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE 

171) 

172""" 

173The default job retry object. 

174""" 

175 

176 

177def _query_job_insert_should_retry(exc): 

178 # Per https://github.com/googleapis/python-bigquery/issues/2134, sometimes 

179 # we get a 404 error. In this case, if we get this far, assume that the job 

180 # doesn't actually exist and try again. We can't add 404 to the default 

181 # job_retry because that happens for errors like "this table does not 

182 # exist", which probably won't resolve with a retry. 

183 if isinstance(exc, exceptions.RetryError): 

184 exc = exc.cause 

185 

186 if isinstance(exc, exceptions.NotFound): 

187 message = exc.message 

188 # Don't try to retry table/dataset not found, just job not found. 

189 # The URL contains jobs, so use whitespace to disambiguate. 

190 return message is not None and " job" in message.lower() 

191 

192 return _job_should_retry(exc) 

193 

194 

195_DEFAULT_QUERY_JOB_INSERT_RETRY = retry.Retry( 

196 predicate=_query_job_insert_should_retry, 

197 # jobs.insert doesn't wait for the job to complete, so we don't need the 

198 # long _DEFAULT_JOB_DEADLINE for this part. 

199 deadline=_DEFAULT_RETRY_DEADLINE, 

200) 

201"""Private, may be removed in future.""" 

202 

203 

204DEFAULT_GET_JOB_TIMEOUT = 128 

205""" 

206Default timeout for Client.get_job(). 

207""" 

208 

209POLLING_DEFAULT_VALUE = google.api_core.future.polling.PollingFuture._DEFAULT_VALUE 

210""" 

211Default value defined in google.api_core.future.polling.PollingFuture. 

212"""