Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/bigquery/retry.py: 59%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

49 statements  

1# Copyright 2018 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15from google.api_core import exceptions 

16from google.api_core import retry 

17import google.api_core.future.polling 

18from google.auth import exceptions as auth_exceptions # type: ignore 

19import requests.exceptions 

20 

21 

22_RETRYABLE_REASONS = frozenset( 

23 ["rateLimitExceeded", "backendError", "internalError", "badGateway"] 

24) 

25 

26_UNSTRUCTURED_RETRYABLE_TYPES = ( 

27 ConnectionError, 

28 exceptions.TooManyRequests, 

29 exceptions.InternalServerError, 

30 exceptions.BadGateway, 

31 exceptions.ServiceUnavailable, 

32 requests.exceptions.ChunkedEncodingError, 

33 requests.exceptions.ConnectionError, 

34 requests.exceptions.Timeout, 

35 auth_exceptions.TransportError, 

36) 

37 

38_DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes 

39 

40# Ambiguous errors (e.g. internalError, backendError, rateLimitExceeded) retry 

41# until the full `_DEFAULT_RETRY_DEADLINE`. This is because the 

42# `jobs.getQueryResults` REST API translates a job failure into an HTTP error. 

43# 

44# TODO(https://github.com/googleapis/python-bigquery/issues/1903): Investigate 

45# if we can fail early for ambiguous errors in `QueryJob.result()`'s call to 

46# the `jobs.getQueryResult` API. 

47# 

48# We need `_DEFAULT_JOB_DEADLINE` to be some multiple of 

49# `_DEFAULT_RETRY_DEADLINE` to allow for a few retries after the retry 

50# timeout is reached. 

51# 

52# Note: This multiple should actually be a multiple of 

53# (2 * _DEFAULT_RETRY_DEADLINE). After an ambiguous exception, the first 

54# call from `job_retry()` refreshes the job state without actually restarting 

55# the query. The second `job_retry()` actually restarts the query. For a more 

56# detailed explanation, see the comments where we set `restart_query_job = True` 

57# in `QueryJob.result()`'s inner `is_job_done()` function. 

58_DEFAULT_JOB_DEADLINE = 2.0 * (2.0 * _DEFAULT_RETRY_DEADLINE) 

59 

60 

61def _should_retry(exc): 

62 """Predicate for determining when to retry. 

63 

64 We retry if and only if the 'reason' is 'backendError' 

65 or 'rateLimitExceeded'. 

66 """ 

67 if not hasattr(exc, "errors") or len(exc.errors) == 0: 

68 # Check for unstructured error returns, e.g. from GFE 

69 return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES) 

70 

71 reason = exc.errors[0]["reason"] 

72 return reason in _RETRYABLE_REASONS 

73 

74 

75DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE) 

76"""The default retry object. 

77 

78Any method with a ``retry`` parameter will be retried automatically, 

79with reasonable defaults. To disable retry, pass ``retry=None``. 

80To modify the default retry behavior, call a ``with_XXX`` method 

81on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, 

82pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. 

83""" 

84 

85 

86def _should_retry_get_job_conflict(exc): 

87 """Predicate for determining when to retry a jobs.get call after a conflict error. 

88 

89 Sometimes we get a 404 after a Conflict. In this case, we 

90 have pretty high confidence that by retrying the 404, we'll 

91 (hopefully) eventually recover the job. 

92 https://github.com/googleapis/python-bigquery/issues/2134 

93 

94 Note: we may be able to extend this to user-specified predicates 

95 after https://github.com/googleapis/python-api-core/issues/796 

96 to tweak existing Retry object predicates. 

97 """ 

98 return isinstance(exc, exceptions.NotFound) or _should_retry(exc) 

99 

100 

101# Pick a deadline smaller than our other deadlines since we want to timeout 

102# before those expire. 

103_DEFAULT_GET_JOB_CONFLICT_DEADLINE = _DEFAULT_RETRY_DEADLINE / 3.0 

104_DEFAULT_GET_JOB_CONFLICT_RETRY = retry.Retry( 

105 predicate=_should_retry_get_job_conflict, 

106 deadline=_DEFAULT_GET_JOB_CONFLICT_DEADLINE, 

107) 

108"""Private, may be removed in future.""" 

109 

110 

111# Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We 

112# briefly had a default timeout, but even setting it at more than twice the 

113# theoretical server-side default timeout of 2 minutes was not enough for 

114# complex queries. See: 

115# https://github.com/googleapis/python-bigquery/issues/970#issuecomment-921934647 

116DEFAULT_TIMEOUT = None 

117"""The default API timeout. 

118 

119This is the time to wait per request. To adjust the total wait time, set a 

120deadline on the retry object. 

121""" 

122 

123job_retry_reasons = ( 

124 "rateLimitExceeded", 

125 "backendError", 

126 "internalError", 

127 "jobBackendError", 

128 "jobInternalError", 

129 "jobRateLimitExceeded", 

130) 

131 

132 

133def _job_should_retry(exc): 

134 # Sometimes we have ambiguous errors, such as 'backendError' which could 

135 # be due to an API problem or a job problem. For these, make sure we retry 

136 # our is_job_done() function. 

137 # 

138 # Note: This won't restart the job unless we know for sure it's because of 

139 # the job status and set restart_query_job = True in that loop. This means 

140 # that we might end up calling this predicate twice for the same job 

141 # but from different paths: (1) from jobs.getQueryResults RetryError and 

142 # (2) from translating the job error from the body of a jobs.get response. 

143 # 

144 # Note: If we start retrying job types other than queries where we don't 

145 # call the problematic getQueryResults API to check the status, we need 

146 # to provide a different predicate, as there shouldn't be ambiguous 

147 # errors in those cases. 

148 if isinstance(exc, exceptions.RetryError): 

149 exc = exc.cause 

150 

151 # Per https://github.com/googleapis/python-bigquery/issues/1929, sometimes 

152 # retriable errors make their way here. Because of the separate 

153 # `restart_query_job` logic to make sure we aren't restarting non-failed 

154 # jobs, it should be safe to continue and not totally fail our attempt at 

155 # waiting for the query to complete. 

156 if _should_retry(exc): 

157 return True 

158 

159 if not hasattr(exc, "errors") or len(exc.errors) == 0: 

160 return False 

161 

162 reason = exc.errors[0]["reason"] 

163 return reason in job_retry_reasons 

164 

165 

166DEFAULT_JOB_RETRY = retry.Retry( 

167 predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE 

168) 

169""" 

170The default job retry object. 

171""" 

172 

173 

174def _query_job_insert_should_retry(exc): 

175 # Per https://github.com/googleapis/python-bigquery/issues/2134, sometimes 

176 # we get a 404 error. In this case, if we get this far, assume that the job 

177 # doesn't actually exist and try again. We can't add 404 to the default 

178 # job_retry because that happens for errors like "this table does not 

179 # exist", which probably won't resolve with a retry. 

180 if isinstance(exc, exceptions.RetryError): 

181 exc = exc.cause 

182 

183 if isinstance(exc, exceptions.NotFound): 

184 message = exc.message 

185 # Don't try to retry table/dataset not found, just job not found. 

186 # The URL contains jobs, so use whitespace to disambiguate. 

187 return message is not None and " job" in message.lower() 

188 

189 return _job_should_retry(exc) 

190 

191 

192_DEFAULT_QUERY_JOB_INSERT_RETRY = retry.Retry( 

193 predicate=_query_job_insert_should_retry, 

194 # jobs.insert doesn't wait for the job to complete, so we don't need the 

195 # long _DEFAULT_JOB_DEADLINE for this part. 

196 deadline=_DEFAULT_RETRY_DEADLINE, 

197) 

198"""Private, may be removed in future.""" 

199 

200 

201DEFAULT_GET_JOB_TIMEOUT = 128 

202""" 

203Default timeout for Client.get_job(). 

204""" 

205 

206POLLING_DEFAULT_VALUE = google.api_core.future.polling.PollingFuture._DEFAULT_VALUE 

207""" 

208Default value defined in google.api_core.future.polling.PollingFuture. 

209"""