Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/training/adadelta.py: 35%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

16"""Adadelta for TensorFlow."""

17from tensorflow.python.framework import ops

18from tensorflow.python.ops import math_ops

19from tensorflow.python.training import optimizer

20from tensorflow.python.training import training_ops

21from tensorflow.python.util.tf_export import tf_export

24@tf_export(v1=["train.AdadeltaOptimizer"])

25class AdadeltaOptimizer(optimizer.Optimizer):

26 """Optimizer that implements the Adadelta algorithm.

28 References:

29 ADADELTA - An Adaptive Learning Rate Method:

30 [Zeiler, 2012](http://arxiv.org/abs/1212.5701)

31 ([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))

33 @compatibility(TF2)

34 tf.compat.v1.train.AdadeltaOptimizer is compatible with eager mode and

35 `tf.function`.

36 When eager execution is enabled, `learning_rate`, `rho`,

37 and `epsilon` can each be a callable that

38 takes no arguments and returns the actual value to use. This can be useful

39 for changing these values across different invocations of optimizer

40 functions.

42 To switch to native TF2 style, use [`tf.keras.optimizers.Adadelta`]

43 (https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adadelta)

44 instead. Please notice that due to the implementation differences,

45 `tf.keras.optimizers.Adadelta` and

46 `tf.compat.v1.train.AdadeltaOptimizer` may have slight differences in

47 floating point numerics even though the formula used for the variable

48 updates still matches.

50 #### Structural mapping to native TF2

52 Before:

54 ```python

55 optimizer = tf.compat.v1.train.AdadeltaOptimizer(

56 learning_rate=learning_rate,

57 rho=rho,

58 epsilon=epsilon)

59 ```

61 After:

63 ```python

64 optimizer = tf.keras.optimizers.Adadelta(

65 learning_rate=learning_rate,

66 rho=rho,

67 epsilon=epsilon)

68 ```

70 #### How to map arguments

71 | TF1 Arg Name | TF2 Arg Name | Note |

72 | ------------------ | ------------- | ------------------------------- |

73 | `learning_rate` | `learning_rate`| Be careful of setting |

74 : : : learning_rate tensor value computed from the global step. :

75 : : : In TF1 this was usually meant to imply a dynamic learning rate and :

76 : : : would recompute in each step. In TF2 (eager + function) it will :

77 : : : treat it as a scalar value that only gets computed once instead of :

78 : : : a symbolic placeholder to be computed each time. :

79 | `rho` | `rho` | - |

80 | `epsilon` | `epsilon` | Default value is 1e-08 in TF1, |

81 : : : but 1e-07 in TF2. :

82 | `use_locking` | - | Not applicable in TF2. |

84 #### Before & after usage example

85 Before:

87 ```python

88 x = tf.Variable([1,2,3], dtype=tf.float32)

89 grad = tf.constant([0.1, 0.2, 0.3])

90 optimizer = tf.compat.v1.train.AdadeltaOptimizer(learning_rate=0.001)

91 optimizer.apply_gradients(zip([grad], [x]))

92 ```

94 After:

96 ```python

97 x = tf.Variable([1,2,3], dtype=tf.float32)

98 grad = tf.constant([0.1, 0.2, 0.3])

99 optimizer = tf.keras.optimizers.Adadelta(learning_rate=0.001)

100 optimizer.apply_gradients(zip([grad], [x]))

101 ```

102

103 @end_compatibility

104 """

105

106 def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-8,

107 use_locking=False, name="Adadelta"):

108 """Construct a new Adadelta optimizer.

109

110 Args:

111 learning_rate: A `Tensor` or a floating point value. The learning rate.

112 To match the exact form in the original paper use 1.0.

113 rho: A `Tensor` or a floating point value. The decay rate.

114 epsilon: A `Tensor` or a floating point value. A constant epsilon used

115 to better conditioning the grad update.

116 use_locking: If `True` use locks for update operations.

117 name: Optional name prefix for the operations created when applying

118 gradients. Defaults to "Adadelta".

119

120

121 """

122 super(AdadeltaOptimizer, self).__init__(use_locking, name)

123 self._lr = learning_rate

124 self._rho = rho

125 self._epsilon = epsilon

126

127 # Tensor versions of the constructor arguments, created in _prepare().

128 self._lr_t = None

129 self._rho_t = None

130 self._epsilon_t = None

131

132 def _create_slots(self, var_list):

133 for v in var_list:

134 self._zeros_slot(v, "accum", self._name)

135 self._zeros_slot(v, "accum_update", self._name)

136

137 def _prepare(self):

138 lr = self._call_if_callable(self._lr)

139 rho = self._call_if_callable(self._rho)

140 epsilon = self._call_if_callable(self._epsilon)

141

142 self._lr_t = ops.convert_to_tensor(lr, name="lr")

143 self._rho_t = ops.convert_to_tensor(rho, name="rho")

144 self._epsilon_t = ops.convert_to_tensor(epsilon, name="epsilon")

145

146 def _apply_dense(self, grad, var):

147 accum = self.get_slot(var, "accum")

148 accum_update = self.get_slot(var, "accum_update")

149 return training_ops.apply_adadelta(

150 var,

151 accum,

152 accum_update,

153 math_ops.cast(self._lr_t, var.dtype.base_dtype),

154 math_ops.cast(self._rho_t, var.dtype.base_dtype),

155 math_ops.cast(self._epsilon_t, var.dtype.base_dtype),

156 grad,

157 use_locking=self._use_locking)

158

159 def _resource_apply_dense(self, grad, var):

160 accum = self.get_slot(var, "accum")

161 accum_update = self.get_slot(var, "accum_update")

162 return training_ops.resource_apply_adadelta(

163 var.handle,

164 accum.handle,

165 accum_update.handle,

166 math_ops.cast(self._lr_t, grad.dtype.base_dtype),

167 math_ops.cast(self._rho_t, grad.dtype.base_dtype),

168 math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),

169 grad,

170 use_locking=self._use_locking)

171

172 def _apply_sparse(self, grad, var):

173 accum = self.get_slot(var, "accum")

174 accum_update = self.get_slot(var, "accum_update")

175 return training_ops.sparse_apply_adadelta(

176 var,

177 accum,

178 accum_update,

179 math_ops.cast(self._lr_t, var.dtype.base_dtype),

180 math_ops.cast(self._rho_t, var.dtype.base_dtype),

181 math_ops.cast(self._epsilon_t, var.dtype.base_dtype),

182 grad.values,

183 grad.indices,

184 use_locking=self._use_locking)

185

186 def _resource_apply_sparse(self, grad, var, indices):

187 accum = self.get_slot(var, "accum")

188 accum_update = self.get_slot(var, "accum_update")

189 return training_ops.resource_sparse_apply_adadelta(

190 var.handle,

191 accum.handle,

192 accum_update.handle,

193 math_ops.cast(self._lr_t, grad.dtype),

194 math_ops.cast(self._rho_t, grad.dtype),

195 math_ops.cast(self._epsilon_t, grad.dtype),

196 grad,

197 indices,

198 use_locking=self._use_locking)