Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/gradient

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

16"""Gradient checker for any ops, graphs.

18The gradient checker verifies numerically that an op/graph properly

19computes the gradients

20"""

21import numpy as np

23from tensorflow.python.framework import constant_op

24from tensorflow.python.framework import dtypes

25from tensorflow.python.framework import indexed_slices

26from tensorflow.python.framework import ops

27from tensorflow.python.ops import array_ops

28from tensorflow.python.ops import gradients

29from tensorflow.python.ops import math_ops

30from tensorflow.python.platform import tf_logging as logging

31from tensorflow.python.util import deprecation

32from tensorflow.python.util.tf_export import tf_export

35def _product(t):

36 if isinstance(t, int):

37 return t

38 else:

39 y = 1

40 for x in t:

41 y *= x

42 return y

45def _extra_feeds(extra_feed_dict, new_feeds):

46 if not extra_feed_dict:

47 return new_feeds

48 r = {}

49 r.update(extra_feed_dict)

50 r.update(new_feeds)

51 return r

54def _compute_theoretical_jacobian(x, x_shape, x_data, dy, dy_shape, dx,

55 extra_feed_dict):

56 """Computes the theoretical Jacobian for dy/dx.

58 Computes the theoretical Jacobian using the ops generated by

59 compute_gradient().

61 Args:

62 x: the tensor "x".

63 x_shape: the dimensions of x as a tuple or an array of ints.

64 x_data: a numpy parray as the input data for x

65 dy: the tensor "dy".

66 dy_shape: the dimensions of dy as a tuple or an array of ints.

67 dx: Tensor or IndexedSlices representing dx

68 extra_feed_dict: dict that allows fixing specified tensor values

69 during the jacobian calculation.

71 Returns:

72 A 2-d numpy array representing the Jacobian for dy/dx. It has "x_size" rows

73 and "dy_size" columns where "x_size" is the number of elements in x and

74 "dy_size" is the number of elements in dy.

76 Raises:

77 ValueError: If `dy` is empty but the gradient is nonzero.

78 """

79 # Complex vectors are treated as vectors of twice as many reals.

80 if x.dtype.is_complex:

81 x_shape = tuple(x_shape) + (2,)

82 dy_factor = 2 if dy.dtype.is_complex else 1

84 # To compute the jacobian, we treat x and y as one-dimensional vectors.

85 x_size = _product(x_shape)

86 x_val_size = _product(x_shape[1:]) # This is used for sparse gradients

87 dy_size = _product(dy_shape) * dy_factor

89 # Allocate 2-D Jacobian, with x dimensions smashed into the first

90 # dimension and y dimensions smashed into the second.

91 jacobian = np.zeros((x_size, dy_size),

92 dtype=x.dtype.real_dtype.as_numpy_dtype)

94 # For each of the entry of dy, we set this to be 1 and

95 # everything else to be 0 and compute the backprop -- this will give us one

96 # one column of the Jacobian matrix.

97 dy_data = np.zeros(dy_shape, dtype=dy.dtype.as_numpy_dtype)

98 dy_data_flat = dy_data.ravel().view(dy.dtype.real_dtype.as_numpy_dtype)

99 sess = ops.get_default_session()

100 for col in range(dy_size):

101 dy_data_flat[col] = 1

102 if isinstance(dx, indexed_slices.IndexedSlices):

103 backprop_indices, backprop_values = sess.run(

104 [dx.indices, dx.values],

105 feed_dict=_extra_feeds(extra_feed_dict, {x: x_data, dy: dy_data}))

106 for i, v in zip(backprop_indices, backprop_values):

107 r_begin = i * x_val_size

108 r_end = r_begin + x_val_size

109 jacobian[r_begin:r_end, col] += v.flat

110 else:

111 assert isinstance(dx, ops.Tensor), "dx = " + str(dx)

112 backprop = sess.run(

113 dx, feed_dict=_extra_feeds(extra_feed_dict, {x: x_data, dy: dy_data}))

114 jacobian[:, col] = backprop.ravel().view(jacobian.dtype)

115 dy_data_flat[col] = 0

116

117 # If the output is empty, run the gradients at least once and make sure

118 # they produce zeros.

119 if not dy_size:

120 backprop = sess.run(

121 dx, feed_dict=_extra_feeds(extra_feed_dict, {x: x_data, dy: dy_data}))

122 if backprop.shape != x_data.shape:

123 raise ValueError("Empty gradient has wrong shape: expected %s, got %s" %

124 (x_data.shape, backprop.shape))

125 if np.any(backprop):

126 raise ValueError("Empty tensor with nonzero gradients")

127

128 logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian)

129 return jacobian

130

131

132def _compute_numeric_jacobian(x, x_shape, x_data, y, y_shape, delta,

133 extra_feed_dict):

134 """Computes the numeric Jacobian for dy/dx.

135

136 Computes the numeric Jacobian by slightly perturbing the inputs and

137 measuring the differences on the output.

138

139 Args:

140 x: the tensor "x".

141 x_shape: the dimensions of x as a tuple or an array of ints.

142 x_data: a numpy array as the input data for x

143 y: the tensor "y".

144 y_shape: the dimensions of y as a tuple or an array of ints.

145 delta: the amount of perturbation we give to the input

146 extra_feed_dict: dict that allows fixing specified tensor values

147 during the jacobian calculation.

148

149 Returns:

150 A 2-d numpy array representing the Jacobian for dy/dx. It has "x_size" rows

151 and "y_size" columns where "x_size" is the number of elements in x and

152 "y_size" is the number of elements in y.

153 """

154 # bfloat16 doesn't have enough bits to represent high precision numbers such

155 # as delta. Convert to float32 here. Since numeric_jacobian is expected to

156 # be the groundtruth to compare against, it shouldn't lose any information.

157 if x.dtype == dtypes.bfloat16:

158 x = math_ops.cast(x, dtypes.float32) # TODO(wangpeng): Now that the new x

159 # is an output of the old x, isn't feeding to the new x a mistake?

160 if y.dtype == dtypes.bfloat16:

161 y = math_ops.cast(y, dtypes.float32)

162 if x_data.dtype == dtypes.bfloat16.as_numpy_dtype:

163 x_data = x_data.astype(np.float32)

164

165 # To compute the jacobian, we treat x and y as one-dimensional vectors

166 x_size = _product(x_shape) * (2 if x.dtype.is_complex else 1)

167 y_size = _product(y_shape) * (2 if y.dtype.is_complex else 1)

168 x_dtype = x.dtype.real_dtype.as_numpy_dtype

169 y_dtype = y.dtype.real_dtype.as_numpy_dtype

170

171 # Make sure we have the right types

172 x_data = np.asarray(x_data, dtype=x.dtype.as_numpy_dtype)

173 scale = np.asarray(2 * delta, dtype=y_dtype)[()]

174

175 jacobian = np.zeros((x_size, y_size), dtype=x_dtype)

176 # For each of the entry of x, we slightly perturbs this by adding and

177 # subtracting a delta and then compute difference between the outputs. This

178 # will give us one row of the Jacobian matrix.

179 for row in range(x_size):

180 x_pos = x_data.copy()

181 x_neg = x_data.copy()

182 x_pos.ravel().view(x_dtype)[row] += delta

183 y_pos = y.eval(feed_dict=_extra_feeds(extra_feed_dict, {x: x_pos}))

184 x_neg.ravel().view(x_dtype)[row] -= delta

185 y_neg = y.eval(feed_dict=_extra_feeds(extra_feed_dict, {x: x_neg}))

186 diff = (y_pos - y_neg) / scale

187 jacobian[row, :] = diff.ravel().view(y_dtype)

188

189 logging.vlog(1, "Numeric Jacobian =\n%s", jacobian)

190 return jacobian

191

192

193def _compute_dx_and_dy(x, y, y_shape):

194 """Returns a node to compute gradient of y wrt x."""

195 # We make up a dy so that we can compute the gradients. We don't really use

196 # the value of dy -- we will always feed it. We need to add an identity node

197 # so that we can always feed it properly. Otherwise, for the Add operation,

198 # dx is the same as dy and we cannot fetch the tensor that we are feeding.

199 with x.graph.as_default():

200 dy_orig = constant_op.constant(1.0, shape=y_shape, dtype=y.dtype)

201 dy = array_ops.identity(dy_orig)

202 # We compute the gradients for y wrt. x

203 grads = gradients.gradients(y, x, dy)

204 assert len(grads) == 1

205 return grads[0], dy_orig

206

207

208def _compute_gradient(x,

209 x_shape,

210 dx,

211 y,

212 y_shape,

213 dy,

214 x_init_value=None,

215 delta=1e-3,

216 extra_feed_dict=None):

217 """Computes the theoretical and numerical jacobian."""

218 t = dtypes.as_dtype(x.dtype)

219 allowed_types = [dtypes.float16, dtypes.bfloat16, dtypes.float32,

220 dtypes.float64, dtypes.complex64, dtypes.complex128]

221 assert t.base_dtype in allowed_types, "Don't support type %s for x" % t.name

222 t2 = dtypes.as_dtype(y.dtype)

223 assert t2.base_dtype in allowed_types, "Don't support type %s for y" % t2.name

224

225 if x_init_value is not None:

226 i_shape = list(x_init_value.shape)

227 assert(list(x_shape) == i_shape), "x_shape = %s, init_data shape = %s" % (

228 x_shape, i_shape)

229 x_data = x_init_value

230 else:

231 x_data = np.random.random_sample(x_shape).astype(t.as_numpy_dtype)

232 if t.is_complex:

233 x_data.imag = np.random.random_sample(x_shape)

234

235 jacob_t = _compute_theoretical_jacobian(

236 x, x_shape, x_data, dy, y_shape, dx, extra_feed_dict=extra_feed_dict)

237 jacob_n = _compute_numeric_jacobian(

238 x, x_shape, x_data, y, y_shape, delta, extra_feed_dict=extra_feed_dict)

239 return jacob_t, jacob_n

240

241

242def _compute_gradient_list(x,

243 x_shape,

244 y,

245 y_shape,

246 x_init_value=None,

247 delta=1e-3,

248 init_targets=None,

249 extra_feed_dict=None):

250 """Compute gradients for a list of x values."""

251 assert isinstance(x, list)

252 dx, dy = zip(*[_compute_dx_and_dy(xi, y, y_shape) for xi in x])

253

254 if init_targets is not None:

255 assert isinstance(init_targets, (list, tuple))

256 for init in init_targets:

257 init.run()

258 if x_init_value is None:

259 x_init_value = [None] * len(x)

260 # pylint: disable=g-complex-comprehension

261 ret = [_compute_gradient(xi, x_shapei, dxi, y, y_shape, dyi, x_init_valuei,

262 delta, extra_feed_dict=extra_feed_dict)

263 for xi, x_shapei, dxi, dyi, x_init_valuei in zip(x, x_shape, dx, dy,

264 x_init_value)]

265 return ret

266

267

268@tf_export(v1=["test.compute_gradient"])

269@deprecation.deprecated(

270 date=None,

271 instructions="Use tf.test.compute_gradient in 2.0, which has better "

272 "support for functions. Note that the two versions have different usage, "

273 "so code change is needed.")

274def compute_gradient(x,

275 x_shape,

276 y,

277 y_shape,

278 x_init_value=None,

279 delta=1e-3,

280 init_targets=None,

281 extra_feed_dict=None):

282 """Computes and returns the theoretical and numerical Jacobian.

283

284 If `x` or `y` is complex, the Jacobian will still be real but the

285 corresponding Jacobian dimension(s) will be twice as large. This is required

286 even if both input and output is complex since TensorFlow graphs are not

287 necessarily holomorphic, and may have gradients not expressible as complex

288 numbers. For example, if `x` is complex with shape `[m]` and `y` is complex

289 with shape `[n]`, each Jacobian `J` will have shape `[m * 2, n * 2]` with

290

291 J[:m, :n] = d(Re y)/d(Re x)

292 J[:m, n:] = d(Im y)/d(Re x)

293 J[m:, :n] = d(Re y)/d(Im x)

294 J[m:, n:] = d(Im y)/d(Im x)

295

296 Args:

297 x: a tensor or list of tensors

298 x_shape: the dimensions of x as a tuple or an array of ints. If x is a list,

299 then this is the list of shapes.

300 y: a tensor

301 y_shape: the dimensions of y as a tuple or an array of ints.

302 x_init_value: (optional) a numpy array of the same shape as "x"

303 representing the initial value of x. If x is a list, this should be a list

304 of numpy arrays. If this is none, the function will pick a random tensor

305 as the initial value.

306 delta: (optional) the amount of perturbation.

307 init_targets: list of targets to run to initialize model params.

308 extra_feed_dict: dict that allows fixing specified tensor values

309 during the Jacobian calculation.

310

311 Returns:

312 Two 2-d numpy arrays representing the theoretical and numerical

313 Jacobian for dy/dx. Each has "x_size" rows and "y_size" columns

314 where "x_size" is the number of elements in x and "y_size" is the

315 number of elements in y. If x is a list, returns a list of two numpy arrays.

316 """

317 # TODO(mrry): remove argument `init_targets`

318 if extra_feed_dict is None:

319 extra_feed_dict = {}

320

321 if isinstance(x, list):

322 return _compute_gradient_list(x, x_shape, y, y_shape, x_init_value, delta,

323 init_targets, extra_feed_dict=extra_feed_dict)

324 else:

325 if init_targets is not None:

326 assert isinstance(init_targets, (list, tuple))

327 for init in init_targets:

328 init.run()

329 dx, dy = _compute_dx_and_dy(x, y, y_shape)

330 ret = _compute_gradient(x, x_shape, dx, y, y_shape, dy, x_init_value, delta,

331 extra_feed_dict=extra_feed_dict)

332 return ret

333

334

335def _compute_error(grad):

336 if isinstance(grad, tuple):

337 grad = [grad]

338 error = 0

339 for j_t, j_n in grad:

340 if j_t.size or j_n.size: # Handle zero size tensors correctly

341 error = np.maximum(error, np.fabs(j_t - j_n).max())

342 return error

343

344

345@tf_export(v1=["test.compute_gradient_error"])

346@deprecation.deprecated(

347 date=None,

348 instructions="Use tf.test.compute_gradient in 2.0, which has better "

349 "support for functions. Note that the two versions have different usage, "

350 "so code change is needed.")

351def compute_gradient_error(x,

352 x_shape,

353 y,

354 y_shape,

355 x_init_value=None,

356 delta=1e-3,

357 init_targets=None,

358 extra_feed_dict=None):

359 """Computes the gradient error.

360

361 Computes the maximum error for dy/dx between the computed Jacobian and the

362 numerically estimated Jacobian.

363

364 This function will modify the tensors passed in as it adds more operations

365 and hence changing the consumers of the operations of the input tensors.

366

367 This function adds operations to the current session. To compute the error

368 using a particular device, such as a GPU, use the standard methods for

369 setting a device (e.g. using with sess.graph.device() or setting a device

370 function in the session constructor).

371

372 Args:

373 x: a tensor or list of tensors

374 x_shape: the dimensions of x as a tuple or an array of ints. If x is a list,

375 then this is the list of shapes.

376 y: a tensor

377 y_shape: the dimensions of y as a tuple or an array of ints.

378 x_init_value: (optional) a numpy array of the same shape as "x"

379 representing the initial value of x. If x is a list, this should be a list

380 of numpy arrays. If this is none, the function will pick a random tensor

381 as the initial value.

382 delta: (optional) the amount of perturbation.

383 init_targets: list of targets to run to initialize model params.

384 extra_feed_dict: dict that allows fixing specified tensor values

385 during the Jacobian calculation.

386

387 Returns:

388 The maximum error in between the two Jacobians.

389 """

390 grad = compute_gradient(x, x_shape, y, y_shape, x_init_value, delta,

391 init_targets, extra_feed_dict=extra_feed_dict)

392 return _compute_error(grad)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/gradient_checker.py: 18%

143 statements