Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/training/adagrad.py: 42%

45 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15 

16"""Adagrad for TensorFlow.""" 

17from tensorflow.python.framework import ops 

18from tensorflow.python.ops import array_ops 

19from tensorflow.python.ops import gen_array_ops 

20from tensorflow.python.ops import init_ops 

21from tensorflow.python.ops import math_ops 

22from tensorflow.python.training import optimizer 

23from tensorflow.python.training import training_ops 

24from tensorflow.python.util.tf_export import tf_export 

25 

26 

27@tf_export(v1=["train.AdagradOptimizer"]) 

28class AdagradOptimizer(optimizer.Optimizer): 

29 """Optimizer that implements the Adagrad algorithm. 

30 

31 References: 

32 Adaptive Subgradient Methods for Online Learning and Stochastic Optimization 

33 :[Duchi et al., 2011](http://jmlr.org/papers/v12/duchi11a.html) 

34 ([pdf](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)) 

35 

36 @compatibility(TF2) 

37 tf.compat.v1.train.AdagradOptimizer is compatible with eager mode and 

38 `tf.function`. 

39 When eager execution is enabled, `learning_rate`, 

40 `initial_accumulator_value`, and `epsilon` can each be a callable that 

41 takes no arguments and returns the actual value to use. This can be useful 

42 for changing these values across different invocations of optimizer 

43 functions. 

44 

45 To switch to native TF2 style, use [`tf.keras.optimizers.Adagrad`] 

46 (https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adagrad) 

47 instead. Please notice that due to the implementation differences, 

48 `tf.keras.optimizers.Adagrad` and 

49 `tf.compat.v1.train.AdagradOptimizer` may have slight differences in 

50 floating point numerics even though the formula used for the variable 

51 updates still matches. 

52 

53 #### Structural mapping to native TF2 

54 

55 Before: 

56 

57 ```python 

58 optimizer = tf.compat.v1.train.AdagradOptimizer( 

59 learning_rate=learning_rate, 

60 initial_accumulator_value=initial_accumulator_value) 

61 ``` 

62 

63 After: 

64 

65 ```python 

66 optimizer = tf.keras.optimizers.Adagrad( 

67 learning_rate=learning_rate, 

68 initial_accumulator_value=initial_accumulator_value, 

69 epsilon=1e-07) 

70 ``` 

71 

72 #### How to map arguments 

73 | TF1 Arg Name | TF2 Arg Name | Note | 

74 | ------------------ | ------------- | ------------------------------- | 

75 | `learning_rate` | `learning_rate` | Be careful of setting | 

76 : : : learning_rate tensor value computed from the global step. : 

77 : : : In TF1 this was usually meant to imply a dynamic learning rate and : 

78 : : : would recompute in each step. In TF2 (eager + function) it will : 

79 : : : treat it as a scalar value that only gets computed once instead of : 

80 : : : a symbolic placeholder to be computed each time. : 

81 | `initial_accumulator_value` | `initial_accumulator_value` | The | 

82 : : : argument can be value of zero in TF2, which is not accepted in TF1.| 

83 | - | `epsilon` | `epsilon` is become configurable in TF2. The | 

84 : : : defualt value is changed from 1e-8 to 1e-7 : 

85 | `use_locking` | - | Not applicable in TF2. | 

86 

87 #### Before & after usage example 

88 Before: 

89 

90 ```python 

91 x = tf.Variable([1,2,3], dtype=tf.float32) 

92 grad = tf.constant([0.1, 0.2, 0.3]) 

93 optimizer = tf.compat.v1.train.AdagradOptimizer(learning_rate=0.001) 

94 optimizer.apply_gradients(zip([grad], [x])) 

95 ``` 

96 

97 After: 

98 

99 ```python 

100 x = tf.Variable([1,2,3], dtype=tf.float32) 

101 grad = tf.constant([0.1, 0.2, 0.3]) 

102 optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.001) 

103 optimizer.apply_gradients(zip([grad], [x])) 

104 ``` 

105 

106 @end_compatibility 

107 """ 

108 

109 def __init__(self, learning_rate, initial_accumulator_value=0.1, 

110 use_locking=False, name="Adagrad"): 

111 """Construct a new Adagrad optimizer. 

112 

113 Args: 

114 learning_rate: A `Tensor` or a floating point value. The learning rate. 

115 initial_accumulator_value: A floating point value. 

116 Starting value for the accumulators, must be positive. 

117 use_locking: If `True` use locks for update operations. 

118 name: Optional name prefix for the operations created when applying 

119 gradients. Defaults to "Adagrad". 

120 

121 Raises: 

122 ValueError: If the `initial_accumulator_value` is invalid. 

123 

124 """ 

125 if initial_accumulator_value <= 0.0: 

126 raise ValueError("initial_accumulator_value must be positive: %s" % 

127 initial_accumulator_value) 

128 super(AdagradOptimizer, self).__init__(use_locking, name) 

129 self._learning_rate = learning_rate 

130 self._initial_accumulator_value = initial_accumulator_value 

131 # Created in Initialize. 

132 self._learning_rate_tensor = None 

133 

134 def _create_slots(self, var_list): 

135 for v in var_list: 

136 dtype = v.dtype.base_dtype 

137 if v.get_shape().is_fully_defined(): 

138 init = init_ops.constant_initializer(self._initial_accumulator_value, 

139 dtype=dtype) 

140 else: 

141 init = self._init_constant_op(v, dtype) 

142 self._get_or_make_slot_with_initializer(v, init, v.get_shape(), dtype, 

143 "accumulator", self._name) 

144 

145 def _init_constant_op(self, v, dtype): 

146 def init(): 

147 # Use a Tensor instead of initializer if variable does not have 

148 # static shape. 

149 init_constant = gen_array_ops.fill(array_ops.shape(v), 

150 self._initial_accumulator_value) 

151 return math_ops.cast(init_constant, dtype) 

152 return init 

153 

154 def _prepare(self): 

155 learning_rate = self._call_if_callable(self._learning_rate) 

156 self._learning_rate_tensor = ops.convert_to_tensor( 

157 learning_rate, name="learning_rate") 

158 

159 def _apply_dense(self, grad, var): 

160 acc = self.get_slot(var, "accumulator") 

161 return training_ops.apply_adagrad( 

162 var, 

163 acc, 

164 math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), 

165 grad, 

166 use_locking=self._use_locking) 

167 

168 def _resource_apply_dense(self, grad, var): 

169 acc = self.get_slot(var, "accumulator") 

170 return training_ops.resource_apply_adagrad( 

171 var.handle, 

172 acc.handle, 

173 math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype), 

174 grad, 

175 use_locking=self._use_locking) 

176 

177 def _apply_sparse(self, grad, var): 

178 acc = self.get_slot(var, "accumulator") 

179 return training_ops.sparse_apply_adagrad( 

180 var, 

181 acc, 

182 math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), 

183 grad.values, 

184 grad.indices, 

185 use_locking=self._use_locking) 

186 

187 def _resource_apply_sparse(self, grad, var, indices): 

188 acc = self.get_slot(var, "accumulator") 

189 return training_ops.resource_sparse_apply_adagrad( 

190 var.handle, 

191 acc.handle, 

192 math_ops.cast(self._learning_rate_tensor, grad.dtype), 

193 grad, 

194 indices, 

195 use_locking=self._use_locking)