Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/training/adagrad.py: 42%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

16"""Adagrad for TensorFlow."""

17from tensorflow.python.framework import ops

18from tensorflow.python.ops import array_ops

19from tensorflow.python.ops import gen_array_ops

20from tensorflow.python.ops import init_ops

21from tensorflow.python.ops import math_ops

22from tensorflow.python.training import optimizer

23from tensorflow.python.training import training_ops

24from tensorflow.python.util.tf_export import tf_export

27@tf_export(v1=["train.AdagradOptimizer"])

28class AdagradOptimizer(optimizer.Optimizer):

29 """Optimizer that implements the Adagrad algorithm.

31 References:

32 Adaptive Subgradient Methods for Online Learning and Stochastic Optimization

33 :[Duchi et al., 2011](http://jmlr.org/papers/v12/duchi11a.html)

34 ([pdf](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf))

36 @compatibility(TF2)

37 tf.compat.v1.train.AdagradOptimizer is compatible with eager mode and

38 `tf.function`.

39 When eager execution is enabled, `learning_rate`,

40 `initial_accumulator_value`, and `epsilon` can each be a callable that

41 takes no arguments and returns the actual value to use. This can be useful

42 for changing these values across different invocations of optimizer

43 functions.

45 To switch to native TF2 style, use [`tf.keras.optimizers.Adagrad`]

46 (https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adagrad)

47 instead. Please notice that due to the implementation differences,

48 `tf.keras.optimizers.Adagrad` and

49 `tf.compat.v1.train.AdagradOptimizer` may have slight differences in

50 floating point numerics even though the formula used for the variable

51 updates still matches.

53 #### Structural mapping to native TF2

55 Before:

57 ```python

58 optimizer = tf.compat.v1.train.AdagradOptimizer(

59 learning_rate=learning_rate,

60 initial_accumulator_value=initial_accumulator_value)

61 ```

63 After:

65 ```python

66 optimizer = tf.keras.optimizers.Adagrad(

67 learning_rate=learning_rate,

68 initial_accumulator_value=initial_accumulator_value,

69 epsilon=1e-07)

70 ```

72 #### How to map arguments

73 | TF1 Arg Name | TF2 Arg Name | Note |

74 | ------------------ | ------------- | ------------------------------- |

75 | `learning_rate` | `learning_rate` | Be careful of setting |

76 : : : learning_rate tensor value computed from the global step. :

77 : : : In TF1 this was usually meant to imply a dynamic learning rate and :

78 : : : would recompute in each step. In TF2 (eager + function) it will :

79 : : : treat it as a scalar value that only gets computed once instead of :

80 : : : a symbolic placeholder to be computed each time. :

81 | `initial_accumulator_value` | `initial_accumulator_value` | The |

82 : : : argument can be value of zero in TF2, which is not accepted in TF1.|

83 | - | `epsilon` | `epsilon` is become configurable in TF2. The |

84 : : : defualt value is changed from 1e-8 to 1e-7 :

85 | `use_locking` | - | Not applicable in TF2. |

87 #### Before & after usage example

88 Before:

90 ```python

91 x = tf.Variable([1,2,3], dtype=tf.float32)

92 grad = tf.constant([0.1, 0.2, 0.3])

93 optimizer = tf.compat.v1.train.AdagradOptimizer(learning_rate=0.001)

94 optimizer.apply_gradients(zip([grad], [x]))

95 ```

97 After:

99 ```python

100 x = tf.Variable([1,2,3], dtype=tf.float32)

101 grad = tf.constant([0.1, 0.2, 0.3])

102 optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.001)

103 optimizer.apply_gradients(zip([grad], [x]))

104 ```

105

106 @end_compatibility

107 """

108

109 def __init__(self, learning_rate, initial_accumulator_value=0.1,

110 use_locking=False, name="Adagrad"):

111 """Construct a new Adagrad optimizer.

112

113 Args:

114 learning_rate: A `Tensor` or a floating point value. The learning rate.

115 initial_accumulator_value: A floating point value.

116 Starting value for the accumulators, must be positive.

117 use_locking: If `True` use locks for update operations.

118 name: Optional name prefix for the operations created when applying

119 gradients. Defaults to "Adagrad".

120

121 Raises:

122 ValueError: If the `initial_accumulator_value` is invalid.

123

124 """

125 if initial_accumulator_value <= 0.0:

126 raise ValueError("initial_accumulator_value must be positive: %s" %

127 initial_accumulator_value)

128 super(AdagradOptimizer, self).__init__(use_locking, name)

129 self._learning_rate = learning_rate

130 self._initial_accumulator_value = initial_accumulator_value

131 # Created in Initialize.

132 self._learning_rate_tensor = None

133

134 def _create_slots(self, var_list):

135 for v in var_list:

136 dtype = v.dtype.base_dtype

137 if v.get_shape().is_fully_defined():

138 init = init_ops.constant_initializer(self._initial_accumulator_value,

139 dtype=dtype)

140 else:

141 init = self._init_constant_op(v, dtype)

142 self._get_or_make_slot_with_initializer(v, init, v.get_shape(), dtype,

143 "accumulator", self._name)

144

145 def _init_constant_op(self, v, dtype):

146 def init():

147 # Use a Tensor instead of initializer if variable does not have

148 # static shape.

149 init_constant = gen_array_ops.fill(array_ops.shape(v),

150 self._initial_accumulator_value)

151 return math_ops.cast(init_constant, dtype)

152 return init

153

154 def _prepare(self):

155 learning_rate = self._call_if_callable(self._learning_rate)

156 self._learning_rate_tensor = ops.convert_to_tensor(

157 learning_rate, name="learning_rate")

158

159 def _apply_dense(self, grad, var):

160 acc = self.get_slot(var, "accumulator")

161 return training_ops.apply_adagrad(

162 var,

163 acc,

164 math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),

165 grad,

166 use_locking=self._use_locking)

167

168 def _resource_apply_dense(self, grad, var):

169 acc = self.get_slot(var, "accumulator")

170 return training_ops.resource_apply_adagrad(

171 var.handle,

172 acc.handle,

173 math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype),

174 grad,

175 use_locking=self._use_locking)

176

177 def _apply_sparse(self, grad, var):

178 acc = self.get_slot(var, "accumulator")

179 return training_ops.sparse_apply_adagrad(

180 var,

181 acc,

182 math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),

183 grad.values,

184 grad.indices,

185 use_locking=self._use_locking)

186

187 def _resource_apply_sparse(self, grad, var, indices):

188 acc = self.get_slot(var, "accumulator")

189 return training_ops.resource_sparse_apply_adagrad(

190 var.handle,

191 acc.handle,

192 math_ops.cast(self._learning_rate_tensor, grad.dtype),

193 grad,

194 indices,

195 use_locking=self._use_locking)