Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/training/adagrad.py: 42%
45 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
16"""Adagrad for TensorFlow."""
17from tensorflow.python.framework import ops
18from tensorflow.python.ops import array_ops
19from tensorflow.python.ops import gen_array_ops
20from tensorflow.python.ops import init_ops
21from tensorflow.python.ops import math_ops
22from tensorflow.python.training import optimizer
23from tensorflow.python.training import training_ops
24from tensorflow.python.util.tf_export import tf_export
27@tf_export(v1=["train.AdagradOptimizer"])
28class AdagradOptimizer(optimizer.Optimizer):
29 """Optimizer that implements the Adagrad algorithm.
31 References:
32 Adaptive Subgradient Methods for Online Learning and Stochastic Optimization
33 :[Duchi et al., 2011](http://jmlr.org/papers/v12/duchi11a.html)
34 ([pdf](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf))
36 @compatibility(TF2)
37 tf.compat.v1.train.AdagradOptimizer is compatible with eager mode and
38 `tf.function`.
39 When eager execution is enabled, `learning_rate`,
40 `initial_accumulator_value`, and `epsilon` can each be a callable that
41 takes no arguments and returns the actual value to use. This can be useful
42 for changing these values across different invocations of optimizer
43 functions.
45 To switch to native TF2 style, use [`tf.keras.optimizers.Adagrad`]
46 (https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adagrad)
47 instead. Please notice that due to the implementation differences,
48 `tf.keras.optimizers.Adagrad` and
49 `tf.compat.v1.train.AdagradOptimizer` may have slight differences in
50 floating point numerics even though the formula used for the variable
51 updates still matches.
53 #### Structural mapping to native TF2
55 Before:
57 ```python
58 optimizer = tf.compat.v1.train.AdagradOptimizer(
59 learning_rate=learning_rate,
60 initial_accumulator_value=initial_accumulator_value)
61 ```
63 After:
65 ```python
66 optimizer = tf.keras.optimizers.Adagrad(
67 learning_rate=learning_rate,
68 initial_accumulator_value=initial_accumulator_value,
69 epsilon=1e-07)
70 ```
72 #### How to map arguments
73 | TF1 Arg Name | TF2 Arg Name | Note |
74 | ------------------ | ------------- | ------------------------------- |
75 | `learning_rate` | `learning_rate` | Be careful of setting |
76 : : : learning_rate tensor value computed from the global step. :
77 : : : In TF1 this was usually meant to imply a dynamic learning rate and :
78 : : : would recompute in each step. In TF2 (eager + function) it will :
79 : : : treat it as a scalar value that only gets computed once instead of :
80 : : : a symbolic placeholder to be computed each time. :
81 | `initial_accumulator_value` | `initial_accumulator_value` | The |
82 : : : argument can be value of zero in TF2, which is not accepted in TF1.|
83 | - | `epsilon` | `epsilon` is become configurable in TF2. The |
84 : : : defualt value is changed from 1e-8 to 1e-7 :
85 | `use_locking` | - | Not applicable in TF2. |
87 #### Before & after usage example
88 Before:
90 ```python
91 x = tf.Variable([1,2,3], dtype=tf.float32)
92 grad = tf.constant([0.1, 0.2, 0.3])
93 optimizer = tf.compat.v1.train.AdagradOptimizer(learning_rate=0.001)
94 optimizer.apply_gradients(zip([grad], [x]))
95 ```
97 After:
99 ```python
100 x = tf.Variable([1,2,3], dtype=tf.float32)
101 grad = tf.constant([0.1, 0.2, 0.3])
102 optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.001)
103 optimizer.apply_gradients(zip([grad], [x]))
104 ```
106 @end_compatibility
107 """
109 def __init__(self, learning_rate, initial_accumulator_value=0.1,
110 use_locking=False, name="Adagrad"):
111 """Construct a new Adagrad optimizer.
113 Args:
114 learning_rate: A `Tensor` or a floating point value. The learning rate.
115 initial_accumulator_value: A floating point value.
116 Starting value for the accumulators, must be positive.
117 use_locking: If `True` use locks for update operations.
118 name: Optional name prefix for the operations created when applying
119 gradients. Defaults to "Adagrad".
121 Raises:
122 ValueError: If the `initial_accumulator_value` is invalid.
124 """
125 if initial_accumulator_value <= 0.0:
126 raise ValueError("initial_accumulator_value must be positive: %s" %
127 initial_accumulator_value)
128 super(AdagradOptimizer, self).__init__(use_locking, name)
129 self._learning_rate = learning_rate
130 self._initial_accumulator_value = initial_accumulator_value
131 # Created in Initialize.
132 self._learning_rate_tensor = None
134 def _create_slots(self, var_list):
135 for v in var_list:
136 dtype = v.dtype.base_dtype
137 if v.get_shape().is_fully_defined():
138 init = init_ops.constant_initializer(self._initial_accumulator_value,
139 dtype=dtype)
140 else:
141 init = self._init_constant_op(v, dtype)
142 self._get_or_make_slot_with_initializer(v, init, v.get_shape(), dtype,
143 "accumulator", self._name)
145 def _init_constant_op(self, v, dtype):
146 def init():
147 # Use a Tensor instead of initializer if variable does not have
148 # static shape.
149 init_constant = gen_array_ops.fill(array_ops.shape(v),
150 self._initial_accumulator_value)
151 return math_ops.cast(init_constant, dtype)
152 return init
154 def _prepare(self):
155 learning_rate = self._call_if_callable(self._learning_rate)
156 self._learning_rate_tensor = ops.convert_to_tensor(
157 learning_rate, name="learning_rate")
159 def _apply_dense(self, grad, var):
160 acc = self.get_slot(var, "accumulator")
161 return training_ops.apply_adagrad(
162 var,
163 acc,
164 math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
165 grad,
166 use_locking=self._use_locking)
168 def _resource_apply_dense(self, grad, var):
169 acc = self.get_slot(var, "accumulator")
170 return training_ops.resource_apply_adagrad(
171 var.handle,
172 acc.handle,
173 math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype),
174 grad,
175 use_locking=self._use_locking)
177 def _apply_sparse(self, grad, var):
178 acc = self.get_slot(var, "accumulator")
179 return training_ops.sparse_apply_adagrad(
180 var,
181 acc,
182 math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
183 grad.values,
184 grad.indices,
185 use_locking=self._use_locking)
187 def _resource_apply_sparse(self, grad, var, indices):
188 acc = self.get_slot(var, "accumulator")
189 return training_ops.resource_sparse_apply_adagrad(
190 var.handle,
191 acc.handle,
192 math_ops.cast(self._learning_rate_tensor, grad.dtype),
193 grad,
194 indices,
195 use_locking=self._use_locking)