Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/training/adadelta.py: 35%
43 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
16"""Adadelta for TensorFlow."""
17from tensorflow.python.framework import ops
18from tensorflow.python.ops import math_ops
19from tensorflow.python.training import optimizer
20from tensorflow.python.training import training_ops
21from tensorflow.python.util.tf_export import tf_export
24@tf_export(v1=["train.AdadeltaOptimizer"])
25class AdadeltaOptimizer(optimizer.Optimizer):
26 """Optimizer that implements the Adadelta algorithm.
28 References:
29 ADADELTA - An Adaptive Learning Rate Method:
30 [Zeiler, 2012](http://arxiv.org/abs/1212.5701)
31 ([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
33 @compatibility(TF2)
34 tf.compat.v1.train.AdadeltaOptimizer is compatible with eager mode and
35 `tf.function`.
36 When eager execution is enabled, `learning_rate`, `rho`,
37 and `epsilon` can each be a callable that
38 takes no arguments and returns the actual value to use. This can be useful
39 for changing these values across different invocations of optimizer
40 functions.
42 To switch to native TF2 style, use [`tf.keras.optimizers.Adadelta`]
43 (https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adadelta)
44 instead. Please notice that due to the implementation differences,
45 `tf.keras.optimizers.Adadelta` and
46 `tf.compat.v1.train.AdadeltaOptimizer` may have slight differences in
47 floating point numerics even though the formula used for the variable
48 updates still matches.
50 #### Structural mapping to native TF2
52 Before:
54 ```python
55 optimizer = tf.compat.v1.train.AdadeltaOptimizer(
56 learning_rate=learning_rate,
57 rho=rho,
58 epsilon=epsilon)
59 ```
61 After:
63 ```python
64 optimizer = tf.keras.optimizers.Adadelta(
65 learning_rate=learning_rate,
66 rho=rho,
67 epsilon=epsilon)
68 ```
70 #### How to map arguments
71 | TF1 Arg Name | TF2 Arg Name | Note |
72 | ------------------ | ------------- | ------------------------------- |
73 | `learning_rate` | `learning_rate`| Be careful of setting |
74 : : : learning_rate tensor value computed from the global step. :
75 : : : In TF1 this was usually meant to imply a dynamic learning rate and :
76 : : : would recompute in each step. In TF2 (eager + function) it will :
77 : : : treat it as a scalar value that only gets computed once instead of :
78 : : : a symbolic placeholder to be computed each time. :
79 | `rho` | `rho` | - |
80 | `epsilon` | `epsilon` | Default value is 1e-08 in TF1, |
81 : : : but 1e-07 in TF2. :
82 | `use_locking` | - | Not applicable in TF2. |
84 #### Before & after usage example
85 Before:
87 ```python
88 x = tf.Variable([1,2,3], dtype=tf.float32)
89 grad = tf.constant([0.1, 0.2, 0.3])
90 optimizer = tf.compat.v1.train.AdadeltaOptimizer(learning_rate=0.001)
91 optimizer.apply_gradients(zip([grad], [x]))
92 ```
94 After:
96 ```python
97 x = tf.Variable([1,2,3], dtype=tf.float32)
98 grad = tf.constant([0.1, 0.2, 0.3])
99 optimizer = tf.keras.optimizers.Adadelta(learning_rate=0.001)
100 optimizer.apply_gradients(zip([grad], [x]))
101 ```
103 @end_compatibility
104 """
106 def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-8,
107 use_locking=False, name="Adadelta"):
108 """Construct a new Adadelta optimizer.
110 Args:
111 learning_rate: A `Tensor` or a floating point value. The learning rate.
112 To match the exact form in the original paper use 1.0.
113 rho: A `Tensor` or a floating point value. The decay rate.
114 epsilon: A `Tensor` or a floating point value. A constant epsilon used
115 to better conditioning the grad update.
116 use_locking: If `True` use locks for update operations.
117 name: Optional name prefix for the operations created when applying
118 gradients. Defaults to "Adadelta".
121 """
122 super(AdadeltaOptimizer, self).__init__(use_locking, name)
123 self._lr = learning_rate
124 self._rho = rho
125 self._epsilon = epsilon
127 # Tensor versions of the constructor arguments, created in _prepare().
128 self._lr_t = None
129 self._rho_t = None
130 self._epsilon_t = None
132 def _create_slots(self, var_list):
133 for v in var_list:
134 self._zeros_slot(v, "accum", self._name)
135 self._zeros_slot(v, "accum_update", self._name)
137 def _prepare(self):
138 lr = self._call_if_callable(self._lr)
139 rho = self._call_if_callable(self._rho)
140 epsilon = self._call_if_callable(self._epsilon)
142 self._lr_t = ops.convert_to_tensor(lr, name="lr")
143 self._rho_t = ops.convert_to_tensor(rho, name="rho")
144 self._epsilon_t = ops.convert_to_tensor(epsilon, name="epsilon")
146 def _apply_dense(self, grad, var):
147 accum = self.get_slot(var, "accum")
148 accum_update = self.get_slot(var, "accum_update")
149 return training_ops.apply_adadelta(
150 var,
151 accum,
152 accum_update,
153 math_ops.cast(self._lr_t, var.dtype.base_dtype),
154 math_ops.cast(self._rho_t, var.dtype.base_dtype),
155 math_ops.cast(self._epsilon_t, var.dtype.base_dtype),
156 grad,
157 use_locking=self._use_locking)
159 def _resource_apply_dense(self, grad, var):
160 accum = self.get_slot(var, "accum")
161 accum_update = self.get_slot(var, "accum_update")
162 return training_ops.resource_apply_adadelta(
163 var.handle,
164 accum.handle,
165 accum_update.handle,
166 math_ops.cast(self._lr_t, grad.dtype.base_dtype),
167 math_ops.cast(self._rho_t, grad.dtype.base_dtype),
168 math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
169 grad,
170 use_locking=self._use_locking)
172 def _apply_sparse(self, grad, var):
173 accum = self.get_slot(var, "accum")
174 accum_update = self.get_slot(var, "accum_update")
175 return training_ops.sparse_apply_adadelta(
176 var,
177 accum,
178 accum_update,
179 math_ops.cast(self._lr_t, var.dtype.base_dtype),
180 math_ops.cast(self._rho_t, var.dtype.base_dtype),
181 math_ops.cast(self._epsilon_t, var.dtype.base_dtype),
182 grad.values,
183 grad.indices,
184 use_locking=self._use_locking)
186 def _resource_apply_sparse(self, grad, var, indices):
187 accum = self.get_slot(var, "accum")
188 accum_update = self.get_slot(var, "accum_update")
189 return training_ops.resource_sparse_apply_adadelta(
190 var.handle,
191 accum.handle,
192 accum_update.handle,
193 math_ops.cast(self._lr_t, grad.dtype),
194 math_ops.cast(self._rho_t, grad.dtype),
195 math_ops.cast(self._epsilon_t, grad.dtype),
196 grad,
197 indices,
198 use_locking=self._use_locking)