Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/optimizers/adagrad.py: 36%

39 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2021 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Adagrad optimizer implementation.""" 

16 

17import tensorflow.compat.v2 as tf 

18 

19from keras.src import initializers 

20from keras.src.optimizers import optimizer 

21from keras.src.saving.object_registration import register_keras_serializable 

22 

23# isort: off 

24from tensorflow.python.util.tf_export import keras_export 

25 

26 

27@register_keras_serializable() 

28@keras_export( 

29 "keras.optimizers.experimental.Adagrad", 

30 "keras.optimizers.Adagrad", 

31 "keras.dtensor.experimental.optimizers.Adagrad", 

32 v1=[], 

33) 

34class Adagrad(optimizer.Optimizer): 

35 r"""Optimizer that implements the Adagrad algorithm. 

36 

37 Adagrad is an optimizer with parameter-specific learning rates, 

38 which are adapted relative to how frequently a parameter gets 

39 updated during training. The more updates a parameter receives, 

40 the smaller the updates. 

41 

42 Args: 

43 learning_rate: Initial value for the learning rate: 

44 either a floating point value, 

45 or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance. 

46 Defaults to 0.001. 

47 Note that `Adagrad` tends to benefit from higher initial learning rate 

48 values compared to other optimizers. 

49 To match the exact form in the original paper, use 1.0. 

50 initial_accumulator_value: Floating point value. 

51 Starting value for the accumulators (per-parameter momentum values). 

52 Must be non-negative. 

53 epsilon: Small floating point value used to maintain numerical stability. 

54 {{base_optimizer_keyword_args}} 

55 

56 Reference: 

57 - [Duchi et al., 2011]( 

58 http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf). 

59 """ 

60 

61 def __init__( 

62 self, 

63 learning_rate=0.001, 

64 initial_accumulator_value=0.1, 

65 epsilon=1e-7, 

66 weight_decay=None, 

67 clipnorm=None, 

68 clipvalue=None, 

69 global_clipnorm=None, 

70 use_ema=False, 

71 ema_momentum=0.99, 

72 ema_overwrite_frequency=None, 

73 jit_compile=True, 

74 name="Adagrad", 

75 **kwargs 

76 ): 

77 super().__init__( 

78 weight_decay=weight_decay, 

79 clipnorm=clipnorm, 

80 clipvalue=clipvalue, 

81 global_clipnorm=global_clipnorm, 

82 use_ema=use_ema, 

83 ema_momentum=ema_momentum, 

84 ema_overwrite_frequency=ema_overwrite_frequency, 

85 jit_compile=jit_compile, 

86 name=name, 

87 **kwargs 

88 ) 

89 self._learning_rate = self._build_learning_rate(learning_rate) 

90 self.initial_accumulator_value = initial_accumulator_value 

91 self.epsilon = epsilon 

92 

93 def build(self, var_list): 

94 super().build(var_list) 

95 if hasattr(self, "_built") and self._built: 

96 return 

97 self._built = True 

98 self._accumulators = [] 

99 initializer = initializers.Constant(self.initial_accumulator_value) 

100 for var in var_list: 

101 self._accumulators.append( 

102 self.add_variable_from_reference( 

103 var, 

104 "accumulator", 

105 initial_value=initializer(shape=var.shape, dtype=var.dtype), 

106 ) 

107 ) 

108 

109 def update_step(self, grad, variable): 

110 """Update step given gradient and the associated model variable.""" 

111 lr = tf.cast(self.learning_rate, variable.dtype) 

112 

113 var_key = self._var_key(variable) 

114 accumulator = self._accumulators[self._index_dict[var_key]] 

115 

116 if isinstance(grad, tf.IndexedSlices): 

117 # Sparse gradients. 

118 accumulator.scatter_add( 

119 tf.IndexedSlices(grad.values * grad.values, grad.indices) 

120 ) 

121 sparse_accumulator = tf.gather(accumulator, indices=grad.indices) 

122 sparse_denominator = tf.sqrt(sparse_accumulator + self.epsilon) 

123 variable.scatter_add( 

124 tf.IndexedSlices( 

125 -lr * grad.values / sparse_denominator, grad.indices 

126 ) 

127 ) 

128 else: 

129 # Dense gradients. 

130 accumulator.assign_add(grad * grad) 

131 variable.assign_sub(lr * grad / tf.sqrt(accumulator + self.epsilon)) 

132 

133 def get_config(self): 

134 config = super().get_config() 

135 

136 config.update( 

137 { 

138 "learning_rate": self._serialize_hyperparameter( 

139 self._learning_rate 

140 ), 

141 "initial_accumulator_value": self.initial_accumulator_value, 

142 "epsilon": self.epsilon, 

143 } 

144 ) 

145 return config 

146 

147 

148Adagrad.__doc__ = Adagrad.__doc__.replace( 

149 "{{base_optimizer_keyword_args}}", optimizer.base_optimizer_keyword_args 

150) 

151