Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/gradient_checker_v2.py: 15%

136 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Gradient checker for functions. 

16 

17The gradient checker verifies numerically that an function properly 

18computes the gradients 

19""" 

20import numpy as np 

21 

22from tensorflow.python.eager import backprop 

23from tensorflow.python.eager import context 

24from tensorflow.python.framework import dtypes 

25from tensorflow.python.framework import indexed_slices 

26from tensorflow.python.framework import ops 

27from tensorflow.python.ops import array_ops 

28from tensorflow.python.platform import tf_logging as logging 

29from tensorflow.python.util.tf_export import tf_export 

30 

31 

32def _product(t): 

33 if isinstance(t, int): 

34 return t 

35 else: 

36 y = 1 

37 for x in t: 

38 y *= x 

39 return y 

40 

41 

42def _eval_indexed_slices(a): 

43 """Converts IndexedSlices to IndexedSlicesValue with numpy indices/values. 

44 

45 When eager execution is enabled, converts IndexedSlices 

46 to IndexedSlicesValue with numpy indices/values. 

47 

48 Args: 

49 a: any value. 

50 

51 Returns: 

52 If a is IndexedSlices and eager execution is enabled, calls numpy() on a's 

53 fields. Otherwise returns a unchanged. 

54 """ 

55 if (isinstance(a, indexed_slices.IndexedSlices) and 

56 context.executing_eagerly()): 

57 return indexed_slices.IndexedSlicesValue( 

58 indices=[x.numpy() for x in a.indices], 

59 values=[x.numpy() for x in a.values], 

60 dense_shape=a.dense_shape) 

61 return a 

62 

63 

64def _to_numpy(a): 

65 """Converts Tensors, EagerTensors, and IndexedSlicesValue to numpy arrays. 

66 

67 Args: 

68 a: any value. 

69 

70 Returns: 

71 If a is EagerTensor or Tensor, returns the evaluation of a by calling 

72 numpy() or run(). If a is IndexedSlicesValue, constructs the corresponding 

73 dense numpy array. Otherwise returns a unchanged. 

74 """ 

75 if isinstance(a, ops.EagerTensor): 

76 return a.numpy() 

77 if isinstance(a, ops.Tensor): 

78 sess = ops.get_default_session() 

79 return sess.run(a) 

80 if isinstance(a, indexed_slices.IndexedSlicesValue): 

81 arr = np.zeros(a.dense_shape) 

82 assert len(a.values) == len(a.indices), ( 

83 "IndexedSlicesValue has %s value slices but %s indices\n%s" % 

84 (a.values, a.indices, a)) 

85 for values_slice, index in zip(a.values, a.indices): 

86 assert 0 <= index < len(arr), ( 

87 "IndexedSlicesValue has invalid index %s\n%s" % (index, a)) 

88 arr[index] += values_slice 

89 return arr 

90 return a 

91 

92 

93def _prepare(f, xs_dtypes, xs_shapes): 

94 """Return a function that executes 'f'. 

95 

96 In TF 2.x, this is the same as `f`. 

97 In TF 1.x, returns a Python function that executes the graph defined by `f` 

98 in a Session. 

99 

100 Args: 

101 f: the function. 

102 xs_dtypes: dtypes of f's arguments. 

103 xs_shapes: shapes of f's arguments. 

104 

105 Returns: 

106 """ 

107 if context.executing_eagerly(): 

108 

109 def decorated_eager(*xs_data): 

110 return f(*map(ops.convert_to_tensor, xs_data)) 

111 

112 return decorated_eager 

113 xs = [ 

114 array_ops.placeholder(x_dtype, shape=x_shape) 

115 for x_dtype, x_shape in zip(xs_dtypes, xs_shapes) 

116 ] 

117 y = f(*xs) 

118 sess = ops.get_default_session() 

119 

120 def decorated_graph(*xs_data): 

121 xs_data = [_to_numpy(a) for a in xs_data] 

122 return sess.run(y, feed_dict=dict(zip(xs, xs_data))) 

123 

124 return decorated_graph 

125 

126 

127def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param): 

128 """Computes the theoretical Jacobian for f regarding xs[param]. 

129 

130 One can think of the relation among f, xs and y as y = f(xs). 

131 

132 Args: 

133 f: the function. 

134 y_shape: the shape of the result. 

135 y_dtype: the dtype of the result. 

136 xs: a list of tensors. 

137 param: the index of the target parameter. 

138 

139 Returns: 

140 A 2-d numpy array representing the Jacobian. It has "y_size" rows 

141 and "x_size" columns where "x_size" is the number of elements in xs[param] 

142 and "y_size" is the number of elements in the result. 

143 

144 Raises: 

145 ValueError: If result is empty but the gradient is nonzero. 

146 """ 

147 x = xs[param] 

148 # Complex vectors are treated as vectors of twice as many reals. 

149 x_shape = tuple(x.shape) + (2,) if x.dtype.is_complex else x.shape 

150 y_factor = 2 if y_dtype.is_complex else 1 

151 

152 # To compute the jacobian, we treat x and y as one-dimensional vectors. 

153 x_size = _product(x_shape) 

154 x_val_size = _product(x_shape[1:]) # This is used for sparse gradients 

155 y_size = _product(y_shape) * y_factor 

156 

157 # Allocate 2-D Jacobian, with y dimensions smashed into the first 

158 # dimension and x dimensions smashed into the second. 

159 jacobian = np.zeros((y_size, x_size), dtype=x.dtype.real_dtype.as_numpy_dtype) 

160 

161 # For each of the entry of dy, we set this to be 1 and 

162 # everything else to be 0 and compute the gradients -- this will give us one 

163 # row of the Jacobian matrix. 

164 dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype) 

165 dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype) 

166 grad_fn_unprep = backprop.gradients_function(f, [param]) 

167 grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy), 

168 [y_dtype] + [z.dtype for z in xs], 

169 [None] + [z.shape for z in xs]) 

170 for row in range(y_size): 

171 dy_data_flat[row] = 1 

172 grad = _to_numpy(grad_fn(dy_data, *xs)[0]) 

173 grad = _eval_indexed_slices(grad) 

174 if isinstance(grad, indexed_slices.IndexedSlicesValue): 

175 for i, v in zip(grad.indices, grad.values): 

176 c_begin = i * x_val_size 

177 c_end = c_begin + x_val_size 

178 jacobian[row, c_begin:c_end] += v.flat 

179 elif grad is not None: 

180 jacobian[row, :] = grad.ravel().view(jacobian.dtype) 

181 # This reset of `dy_data_flat` needs to happen after `grad` is copied to 

182 # `jacobian` because `grad` and `dy_data_flat` may share memory. 

183 dy_data_flat[row] = 0 

184 

185 # If the output is empty, run the gradients at least once and make sure 

186 # they produce zeros. 

187 if y_size == 0: # don't use 'not y_size', because y_size may not be an int 

188 grad = _to_numpy(grad_fn(dy_data, *xs)[0]) 

189 if grad.shape != x.shape: 

190 raise ValueError("Empty gradient has wrong shape: expected %s, got %s" % 

191 (x.shape, grad.shape)) 

192 if np.any(grad): 

193 raise ValueError("Empty tensor with nonzero gradients") 

194 

195 logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian) 

196 return jacobian 

197 

198 

199def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta): 

200 """Computes the numeric Jacobian for f regarding xs[param]. 

201 

202 One can think of the relation among f, xs and y as y = f(xs). 

203 

204 Args: 

205 f: the function. 

206 y_size: the number of elements of the result. 

207 y_dtype: the dtype of the result. 

208 xs: a list of tensors. 

209 param: the index of the target parameter. 

210 delta: the amount of perturbation we give to the input. 

211 

212 Returns: 

213 A 2-d numpy array representing the Jacobian. It has "y_size" rows 

214 and "x_size" columns where "x_size" is the number of elements in xs[param] 

215 and "y_size" is the number of elements in the result. 

216 """ 

217 x_shape = xs[param].shape 

218 x_dtype = xs[param].dtype 

219 

220 # To compute the jacobian, we treat x and y as one-dimensional vectors 

221 x_size = _product(x_shape) * (2 if x_dtype.is_complex else 1) 

222 y_size = y_size * (2 if y_dtype.is_complex else 1) 

223 x_dtype = x_dtype.real_dtype.as_numpy_dtype 

224 y_dtype = y_dtype.real_dtype.as_numpy_dtype 

225 

226 xs_dtypes = [x.dtype for x in xs] 

227 xs_shapes = [x.shape for x in xs] 

228 # Converts xs to numpy arrays to do in-place perturbation. 

229 # Calls asarray() to avoid copying in ravel() later. 

230 xs = [np.asarray(_to_numpy(x)) for x in xs] 

231 x = xs[param] 

232 

233 # Make sure we have the right types 

234 scale = np.asarray(2 * delta, dtype=y_dtype)[()] 

235 

236 jacobian = np.zeros((y_size, x_size), dtype=x_dtype) 

237 

238 # For each of the entry of x, we slightly perturbs this by adding and 

239 # subtracting a delta and then compute difference between the outputs. This 

240 # will give us one column of the Jacobian matrix. 

241 f = _prepare(f, xs_dtypes, xs_shapes) 

242 for col in range(x_size): 

243 original = x.ravel().view(x_dtype)[col] 

244 x.ravel().view(x_dtype)[col] += delta 

245 y_pos = _to_numpy(f(*xs)) 

246 x.ravel().view(x_dtype)[col] = original 

247 x.ravel().view(x_dtype)[col] -= delta 

248 y_neg = _to_numpy(f(*xs)) 

249 x.ravel().view(x_dtype)[col] = original 

250 diff = (y_pos - y_neg) / scale 

251 jacobian[:, col] = diff.ravel().view(y_dtype) 

252 

253 logging.vlog(1, "Numeric Jacobian =\n%s", jacobian) 

254 return jacobian 

255 

256 

257def _compute_gradient(f, y_shape, y_dtype, xs, param, delta): 

258 """Computes the theoretical and numerical jacobian.""" 

259 x = xs[param] 

260 t = x.dtype 

261 allowed_types = [ 

262 dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64, 

263 dtypes.complex64, dtypes.complex128 

264 ] 

265 assert t.base_dtype in allowed_types, ("Cannot compute gradient for " 

266 "unsupported type %s of argument %s" % 

267 (t.name, param)) 

268 t2 = y_dtype 

269 assert t2.base_dtype in allowed_types, ("Cannot compute gradient for " 

270 "unsupported type %s of y" % t2.name) 

271 y_size = _product(y_shape) 

272 jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param) 

273 jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta) 

274 return jacob_t, jacob_n 

275 

276 

277def _compute_gradient_list(f, xs, delta): 

278 """Compute gradients for a list of x values.""" 

279 # convert xs to tensors so that dtype and shape have uniform types 

280 xs = [ops.convert_to_tensor(x) for x in xs] 

281 # run the function to get info of the result 

282 xs_dtypes = [x.dtype for x in xs] 

283 xs_shapes = [x.shape for x in xs] 

284 f_temp = _prepare(f, xs_dtypes, xs_shapes) 

285 y = f_temp(*xs) 

286 return tuple( 

287 zip(*[ 

288 _compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype), xs, i, delta) 

289 for i in range(len(xs)) 

290 ])) 

291 

292 

293@tf_export("test.compute_gradient", v1=[]) 

294def compute_gradient(f, x, delta=None): 

295 """Computes the theoretical and numeric Jacobian of `f`. 

296 

297 With y = f(x), computes the theoretical and numeric Jacobian dy/dx. 

298 

299 Args: 

300 f: the function. 

301 x: the arguments for the function as a list or tuple of values convertible 

302 to a Tensor. 

303 delta: (optional) perturbation used to compute numeric Jacobian. 

304 

305 Returns: 

306 A pair of lists, where the first is a list of 2-d numpy arrays representing 

307 the theoretical Jacobians for each argument, and the second list is the 

308 numerical ones. Each 2-d array has "y_size" rows 

309 and "x_size" columns where "x_size" is the number of elements in the 

310 corresponding argument and "y_size" is the number of elements in f(x). 

311 

312 Raises: 

313 ValueError: If result is empty but the gradient is nonzero. 

314 ValueError: If x is not list, but any other type. 

315 

316 Example: 

317 

318 >>> @tf.function 

319 ... def test_func(x): 

320 ... return x*x 

321 ... 

322 >>> 

323 >>> class MyTest(tf.test.TestCase): 

324 ... 

325 ... def test_gradient_of_test_func(self): 

326 ... theoretical, numerical = tf.test.compute_gradient(test_func, [1.0]) 

327 ... # ((array([[2.]], dtype=float32),), 

328 ... # (array([[2.000004]], dtype=float32),)) 

329 ... self.assertAllClose(theoretical, numerical) 

330 

331 """ 

332 if not isinstance(x, (list, tuple)): 

333 raise ValueError( 

334 "`x` must be a list or tuple of values convertible to a Tensor " 

335 "(arguments to `f`), not a %s" % type(x)) 

336 if delta is None: 

337 # By default, we use a step size for the central finite difference 

338 # approximation that is exactly representable as a binary floating 

339 # point number, since this reduces the amount of noise due to rounding 

340 # in the approximation of some functions. 

341 delta = 1.0 / 1024 

342 return _compute_gradient_list(f, x, delta) 

343 

344 

345def max_error(grad1, grad2): 

346 """Computes maximum elementwise gap. 

347 

348 Computes the maximum elementwise gap between two lists of tensors of the same 

349 shape. 

350 

351 Args: 

352 grad1: a lists of tensors. 

353 grad2: a lists of tensors with the same shape as grad1. 

354 

355 Returns: 

356 The maximum elementwise gap between the two. 

357 """ 

358 error = 0 

359 for j_t, j_n in zip(grad1, grad2): 

360 if j_t.size or j_n.size: # Handle zero size tensors correctly 

361 error = np.maximum(error, np.fabs(j_t - j_n).max()) 

362 return error