Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/training/adadelta.py: 35%

43 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15 

16"""Adadelta for TensorFlow.""" 

17from tensorflow.python.framework import ops 

18from tensorflow.python.ops import math_ops 

19from tensorflow.python.training import optimizer 

20from tensorflow.python.training import training_ops 

21from tensorflow.python.util.tf_export import tf_export 

22 

23 

24@tf_export(v1=["train.AdadeltaOptimizer"]) 

25class AdadeltaOptimizer(optimizer.Optimizer): 

26 """Optimizer that implements the Adadelta algorithm. 

27 

28 References: 

29 ADADELTA - An Adaptive Learning Rate Method: 

30 [Zeiler, 2012](http://arxiv.org/abs/1212.5701) 

31 ([pdf](http://arxiv.org/pdf/1212.5701v1.pdf)) 

32 

33 @compatibility(TF2) 

34 tf.compat.v1.train.AdadeltaOptimizer is compatible with eager mode and 

35 `tf.function`. 

36 When eager execution is enabled, `learning_rate`, `rho`, 

37 and `epsilon` can each be a callable that 

38 takes no arguments and returns the actual value to use. This can be useful 

39 for changing these values across different invocations of optimizer 

40 functions. 

41 

42 To switch to native TF2 style, use [`tf.keras.optimizers.Adadelta`] 

43 (https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adadelta) 

44 instead. Please notice that due to the implementation differences, 

45 `tf.keras.optimizers.Adadelta` and 

46 `tf.compat.v1.train.AdadeltaOptimizer` may have slight differences in 

47 floating point numerics even though the formula used for the variable 

48 updates still matches. 

49 

50 #### Structural mapping to native TF2 

51 

52 Before: 

53 

54 ```python 

55 optimizer = tf.compat.v1.train.AdadeltaOptimizer( 

56 learning_rate=learning_rate, 

57 rho=rho, 

58 epsilon=epsilon) 

59 ``` 

60 

61 After: 

62 

63 ```python 

64 optimizer = tf.keras.optimizers.Adadelta( 

65 learning_rate=learning_rate, 

66 rho=rho, 

67 epsilon=epsilon) 

68 ``` 

69 

70 #### How to map arguments 

71 | TF1 Arg Name | TF2 Arg Name | Note | 

72 | ------------------ | ------------- | ------------------------------- | 

73 | `learning_rate` | `learning_rate`| Be careful of setting | 

74 : : : learning_rate tensor value computed from the global step. : 

75 : : : In TF1 this was usually meant to imply a dynamic learning rate and : 

76 : : : would recompute in each step. In TF2 (eager + function) it will : 

77 : : : treat it as a scalar value that only gets computed once instead of : 

78 : : : a symbolic placeholder to be computed each time. : 

79 | `rho` | `rho` | - | 

80 | `epsilon` | `epsilon` | Default value is 1e-08 in TF1, | 

81 : : : but 1e-07 in TF2. : 

82 | `use_locking` | - | Not applicable in TF2. | 

83 

84 #### Before & after usage example 

85 Before: 

86 

87 ```python 

88 x = tf.Variable([1,2,3], dtype=tf.float32) 

89 grad = tf.constant([0.1, 0.2, 0.3]) 

90 optimizer = tf.compat.v1.train.AdadeltaOptimizer(learning_rate=0.001) 

91 optimizer.apply_gradients(zip([grad], [x])) 

92 ``` 

93 

94 After: 

95 

96 ```python 

97 x = tf.Variable([1,2,3], dtype=tf.float32) 

98 grad = tf.constant([0.1, 0.2, 0.3]) 

99 optimizer = tf.keras.optimizers.Adadelta(learning_rate=0.001) 

100 optimizer.apply_gradients(zip([grad], [x])) 

101 ``` 

102 

103 @end_compatibility 

104 """ 

105 

106 def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-8, 

107 use_locking=False, name="Adadelta"): 

108 """Construct a new Adadelta optimizer. 

109 

110 Args: 

111 learning_rate: A `Tensor` or a floating point value. The learning rate. 

112 To match the exact form in the original paper use 1.0. 

113 rho: A `Tensor` or a floating point value. The decay rate. 

114 epsilon: A `Tensor` or a floating point value. A constant epsilon used 

115 to better conditioning the grad update. 

116 use_locking: If `True` use locks for update operations. 

117 name: Optional name prefix for the operations created when applying 

118 gradients. Defaults to "Adadelta". 

119 

120 

121 """ 

122 super(AdadeltaOptimizer, self).__init__(use_locking, name) 

123 self._lr = learning_rate 

124 self._rho = rho 

125 self._epsilon = epsilon 

126 

127 # Tensor versions of the constructor arguments, created in _prepare(). 

128 self._lr_t = None 

129 self._rho_t = None 

130 self._epsilon_t = None 

131 

132 def _create_slots(self, var_list): 

133 for v in var_list: 

134 self._zeros_slot(v, "accum", self._name) 

135 self._zeros_slot(v, "accum_update", self._name) 

136 

137 def _prepare(self): 

138 lr = self._call_if_callable(self._lr) 

139 rho = self._call_if_callable(self._rho) 

140 epsilon = self._call_if_callable(self._epsilon) 

141 

142 self._lr_t = ops.convert_to_tensor(lr, name="lr") 

143 self._rho_t = ops.convert_to_tensor(rho, name="rho") 

144 self._epsilon_t = ops.convert_to_tensor(epsilon, name="epsilon") 

145 

146 def _apply_dense(self, grad, var): 

147 accum = self.get_slot(var, "accum") 

148 accum_update = self.get_slot(var, "accum_update") 

149 return training_ops.apply_adadelta( 

150 var, 

151 accum, 

152 accum_update, 

153 math_ops.cast(self._lr_t, var.dtype.base_dtype), 

154 math_ops.cast(self._rho_t, var.dtype.base_dtype), 

155 math_ops.cast(self._epsilon_t, var.dtype.base_dtype), 

156 grad, 

157 use_locking=self._use_locking) 

158 

159 def _resource_apply_dense(self, grad, var): 

160 accum = self.get_slot(var, "accum") 

161 accum_update = self.get_slot(var, "accum_update") 

162 return training_ops.resource_apply_adadelta( 

163 var.handle, 

164 accum.handle, 

165 accum_update.handle, 

166 math_ops.cast(self._lr_t, grad.dtype.base_dtype), 

167 math_ops.cast(self._rho_t, grad.dtype.base_dtype), 

168 math_ops.cast(self._epsilon_t, grad.dtype.base_dtype), 

169 grad, 

170 use_locking=self._use_locking) 

171 

172 def _apply_sparse(self, grad, var): 

173 accum = self.get_slot(var, "accum") 

174 accum_update = self.get_slot(var, "accum_update") 

175 return training_ops.sparse_apply_adadelta( 

176 var, 

177 accum, 

178 accum_update, 

179 math_ops.cast(self._lr_t, var.dtype.base_dtype), 

180 math_ops.cast(self._rho_t, var.dtype.base_dtype), 

181 math_ops.cast(self._epsilon_t, var.dtype.base_dtype), 

182 grad.values, 

183 grad.indices, 

184 use_locking=self._use_locking) 

185 

186 def _resource_apply_sparse(self, grad, var, indices): 

187 accum = self.get_slot(var, "accum") 

188 accum_update = self.get_slot(var, "accum_update") 

189 return training_ops.resource_sparse_apply_adadelta( 

190 var.handle, 

191 accum.handle, 

192 accum_update.handle, 

193 math_ops.cast(self._lr_t, grad.dtype), 

194 math_ops.cast(self._rho_t, grad.dtype), 

195 math_ops.cast(self._epsilon_t, grad.dtype), 

196 grad, 

197 indices, 

198 use_locking=self._use_locking)