Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/optimizers/schedules/learning_rate_schedule.py: 23%

264 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Various learning rate schedule functions.""" 

16 

17import abc 

18import math 

19 

20import tensorflow.compat.v2 as tf 

21 

22from keras.src import backend 

23from keras.src.saving import serialization_lib 

24from keras.src.saving.legacy import serialization as legacy_serialization 

25 

26# isort: off 

27from tensorflow.python.util.tf_export import keras_export 

28 

29 

30@keras_export("keras.optimizers.schedules.LearningRateSchedule") 

31class LearningRateSchedule: 

32 """The learning rate schedule base class. 

33 

34 You can use a learning rate schedule to modulate how the learning rate 

35 of your optimizer changes over time. 

36 

37 Several built-in learning rate schedules are available, such as 

38 `tf.keras.optimizers.schedules.ExponentialDecay` or 

39 `tf.keras.optimizers.schedules.PiecewiseConstantDecay`: 

40 

41 ```python 

42 lr_schedule = keras.optimizers.schedules.ExponentialDecay( 

43 initial_learning_rate=1e-2, 

44 decay_steps=10000, 

45 decay_rate=0.9) 

46 optimizer = keras.optimizers.SGD(learning_rate=lr_schedule) 

47 ``` 

48 

49 A `LearningRateSchedule` instance can be passed in as the `learning_rate` 

50 argument of any optimizer. 

51 

52 To implement your own schedule object, you should implement the `__call__` 

53 method, which takes a `step` argument (scalar integer tensor, the 

54 current training step count). 

55 Like for any other Keras object, you can also optionally 

56 make your object serializable by implementing the `get_config` 

57 and `from_config` methods. 

58 

59 Example: 

60 

61 ```python 

62 class MyLRSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): 

63 

64 def __init__(self, initial_learning_rate): 

65 self.initial_learning_rate = initial_learning_rate 

66 

67 def __call__(self, step): 

68 return self.initial_learning_rate / (step + 1) 

69 

70 optimizer = tf.keras.optimizers.SGD(learning_rate=MyLRSchedule(0.1)) 

71 ``` 

72 """ 

73 

74 @abc.abstractmethod 

75 def __call__(self, step): 

76 raise NotImplementedError( 

77 f"Learning rate schedule '{self.__class__.__name__}' " 

78 "must override `__call__(self, step)`." 

79 ) 

80 

81 @abc.abstractmethod 

82 def get_config(self): 

83 raise NotImplementedError( 

84 f"Learning rate schedule '{self.__class__.__name__}' " 

85 "must override `get_config()` in order to be serializable." 

86 ) 

87 

88 @classmethod 

89 def from_config(cls, config): 

90 """Instantiates a `LearningRateSchedule` from its config. 

91 

92 Args: 

93 config: Output of `get_config()`. 

94 

95 Returns: 

96 A `LearningRateSchedule` instance. 

97 """ 

98 return cls(**config) 

99 

100 

101@keras_export("keras.optimizers.schedules.ExponentialDecay") 

102class ExponentialDecay(LearningRateSchedule): 

103 """A LearningRateSchedule that uses an exponential decay schedule. 

104 

105 When training a model, it is often useful to lower the learning rate as 

106 the training progresses. This schedule applies an exponential decay function 

107 to an optimizer step, given a provided initial learning rate. 

108 

109 The schedule is a 1-arg callable that produces a decayed learning 

110 rate when passed the current optimizer step. This can be useful for changing 

111 the learning rate value across different invocations of optimizer functions. 

112 It is computed as: 

113 

114 ```python 

115 def decayed_learning_rate(step): 

116 return initial_learning_rate * decay_rate ^ (step / decay_steps) 

117 ``` 

118 

119 If the argument `staircase` is `True`, then `step / decay_steps` is 

120 an integer division and the decayed learning rate follows a 

121 staircase function. 

122 

123 You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` 

124 as the learning rate. 

125 Example: When fitting a Keras model, decay every 100000 steps with a base 

126 of 0.96: 

127 

128 ```python 

129 initial_learning_rate = 0.1 

130 lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( 

131 initial_learning_rate, 

132 decay_steps=100000, 

133 decay_rate=0.96, 

134 staircase=True) 

135 

136 model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule), 

137 loss='sparse_categorical_crossentropy', 

138 metrics=['accuracy']) 

139 

140 model.fit(data, labels, epochs=5) 

141 ``` 

142 

143 The learning rate schedule is also serializable and deserializable using 

144 `tf.keras.optimizers.schedules.serialize` and 

145 `tf.keras.optimizers.schedules.deserialize`. 

146 

147 Returns: 

148 A 1-arg callable learning rate schedule that takes the current optimizer 

149 step and outputs the decayed learning rate, a scalar `Tensor` of the same 

150 type as `initial_learning_rate`. 

151 """ 

152 

153 def __init__( 

154 self, 

155 initial_learning_rate, 

156 decay_steps, 

157 decay_rate, 

158 staircase=False, 

159 name=None, 

160 ): 

161 """Applies exponential decay to the learning rate. 

162 

163 Args: 

164 initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a 

165 Python number. The initial learning rate. 

166 decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. 

167 Must be positive. See the decay computation above. 

168 decay_rate: A scalar `float32` or `float64` `Tensor` or a 

169 Python number. The decay rate. 

170 staircase: Boolean. If `True` decay the learning rate at discrete 

171 intervals 

172 name: String. Optional name of the operation. Defaults to 

173 'ExponentialDecay'. 

174 """ 

175 super().__init__() 

176 self.initial_learning_rate = initial_learning_rate 

177 self.decay_steps = decay_steps 

178 self.decay_rate = decay_rate 

179 self.staircase = staircase 

180 self.name = name 

181 

182 def __call__(self, step): 

183 with tf.name_scope(self.name or "ExponentialDecay") as name: 

184 initial_learning_rate = tf.convert_to_tensor( 

185 self.initial_learning_rate, name="initial_learning_rate" 

186 ) 

187 dtype = initial_learning_rate.dtype 

188 decay_steps = tf.cast(self.decay_steps, dtype) 

189 decay_rate = tf.cast(self.decay_rate, dtype) 

190 

191 global_step_recomp = tf.cast(step, dtype) 

192 p = global_step_recomp / decay_steps 

193 if self.staircase: 

194 p = tf.floor(p) 

195 return tf.multiply( 

196 initial_learning_rate, tf.pow(decay_rate, p), name=name 

197 ) 

198 

199 def get_config(self): 

200 return { 

201 "initial_learning_rate": self.initial_learning_rate, 

202 "decay_steps": self.decay_steps, 

203 "decay_rate": self.decay_rate, 

204 "staircase": self.staircase, 

205 "name": self.name, 

206 } 

207 

208 

209@keras_export("keras.optimizers.schedules.PiecewiseConstantDecay") 

210class PiecewiseConstantDecay(LearningRateSchedule): 

211 """A LearningRateSchedule that uses a piecewise constant decay schedule. 

212 

213 The function returns a 1-arg callable to compute the piecewise constant 

214 when passed the current optimizer step. This can be useful for changing the 

215 learning rate value across different invocations of optimizer functions. 

216 

217 Example: use a learning rate that's 1.0 for the first 100001 steps, 0.5 

218 for the next 10000 steps, and 0.1 for any additional steps. 

219 

220 ```python 

221 step = tf.Variable(0, trainable=False) 

222 boundaries = [100000, 110000] 

223 values = [1.0, 0.5, 0.1] 

224 learning_rate_fn = keras.optimizers.schedules.PiecewiseConstantDecay( 

225 boundaries, values) 

226 

227 # Later, whenever we perform an optimization step, we pass in the step. 

228 learning_rate = learning_rate_fn(step) 

229 ``` 

230 

231 You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` 

232 as the learning rate. The learning rate schedule is also serializable and 

233 deserializable using `tf.keras.optimizers.schedules.serialize` and 

234 `tf.keras.optimizers.schedules.deserialize`. 

235 

236 Returns: 

237 A 1-arg callable learning rate schedule that takes the current optimizer 

238 step and outputs the decayed learning rate, a scalar `Tensor` of the same 

239 type as the boundary tensors. 

240 

241 The output of the 1-arg function that takes the `step` 

242 is `values[0]` when `step <= boundaries[0]`, 

243 `values[1]` when `step > boundaries[0]` and `step <= boundaries[1]`, ..., 

244 and values[-1] when `step > boundaries[-1]`. 

245 """ 

246 

247 def __init__(self, boundaries, values, name=None): 

248 """Piecewise constant from boundaries and interval values. 

249 

250 Args: 

251 boundaries: A list of `Tensor`s or `int`s or `float`s with strictly 

252 increasing entries, and with all elements having the same type as 

253 the optimizer step. 

254 values: A list of `Tensor`s or `float`s or `int`s that specifies the 

255 values for the intervals defined by `boundaries`. It should have one 

256 more element than `boundaries`, and all elements should have the 

257 same type. 

258 name: A string. Optional name of the operation. Defaults to 

259 'PiecewiseConstant'. 

260 

261 Raises: 

262 ValueError: if the number of elements in the lists do not match. 

263 """ 

264 super().__init__() 

265 

266 if len(boundaries) != len(values) - 1: 

267 raise ValueError( 

268 "The length of boundaries should be 1 less than the length of " 

269 f"values. Received: boundaries={boundaries} of length " 

270 f"{len(boundaries)}, and values={values} " 

271 f"of length {len(values)}." 

272 ) 

273 

274 self.boundaries = boundaries 

275 self.values = values 

276 self.name = name 

277 

278 def __call__(self, step): 

279 with tf.name_scope(self.name or "PiecewiseConstant"): 

280 boundaries = tf.nest.map_structure( 

281 tf.convert_to_tensor, tf.nest.flatten(self.boundaries) 

282 ) 

283 values = tf.nest.map_structure( 

284 tf.convert_to_tensor, tf.nest.flatten(self.values) 

285 ) 

286 x_recomp = tf.convert_to_tensor(step) 

287 for i, b in enumerate(boundaries): 

288 if b.dtype.base_dtype != x_recomp.dtype.base_dtype: 

289 # We cast the boundaries to have the same type as the step 

290 b = tf.cast(b, x_recomp.dtype.base_dtype) 

291 boundaries[i] = b 

292 pred_fn_pairs = [] 

293 pred_fn_pairs.append((x_recomp <= boundaries[0], lambda: values[0])) 

294 pred_fn_pairs.append( 

295 (x_recomp > boundaries[-1], lambda: values[-1]) 

296 ) 

297 for low, high, v in zip( 

298 boundaries[:-1], boundaries[1:], values[1:-1] 

299 ): 

300 # Need to bind v here; can do this with lambda v=v: ... 

301 pred = (x_recomp > low) & (x_recomp <= high) 

302 pred_fn_pairs.append((pred, lambda v=v: v)) 

303 

304 # The default isn't needed here because our conditions are mutually 

305 # exclusive and exhaustive, but tf.case requires it. 

306 default = lambda: values[0] 

307 return tf.case(pred_fn_pairs, default, exclusive=True) 

308 

309 def get_config(self): 

310 return { 

311 "boundaries": self.boundaries, 

312 "values": self.values, 

313 "name": self.name, 

314 } 

315 

316 

317@keras_export("keras.optimizers.schedules.PolynomialDecay") 

318class PolynomialDecay(LearningRateSchedule): 

319 """A LearningRateSchedule that uses a polynomial decay schedule. 

320 

321 It is commonly observed that a monotonically decreasing learning rate, whose 

322 degree of change is carefully chosen, results in a better performing model. 

323 This schedule applies a polynomial decay function to an optimizer step, 

324 given a provided `initial_learning_rate`, to reach an `end_learning_rate` 

325 in the given `decay_steps`. 

326 

327 It requires a `step` value to compute the decayed learning rate. You 

328 can just pass a TensorFlow variable that you increment at each training 

329 step. 

330 

331 The schedule is a 1-arg callable that produces a decayed learning rate 

332 when passed the current optimizer step. This can be useful for changing the 

333 learning rate value across different invocations of optimizer functions. 

334 It is computed as: 

335 

336 ```python 

337 def decayed_learning_rate(step): 

338 step = min(step, decay_steps) 

339 return ((initial_learning_rate - end_learning_rate) * 

340 (1 - step / decay_steps) ^ (power) 

341 ) + end_learning_rate 

342 ``` 

343 

344 If `cycle` is True then a multiple of `decay_steps` is used, the first one 

345 that is bigger than `step`. 

346 

347 ```python 

348 def decayed_learning_rate(step): 

349 decay_steps = decay_steps * ceil(step / decay_steps) 

350 return ((initial_learning_rate - end_learning_rate) * 

351 (1 - step / decay_steps) ^ (power) 

352 ) + end_learning_rate 

353 ``` 

354 

355 You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` 

356 as the learning rate. 

357 Example: Fit a model while decaying from 0.1 to 0.01 in 10000 steps using 

358 sqrt (i.e. power=0.5): 

359 

360 ```python 

361 ... 

362 starter_learning_rate = 0.1 

363 end_learning_rate = 0.01 

364 decay_steps = 10000 

365 learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay( 

366 starter_learning_rate, 

367 decay_steps, 

368 end_learning_rate, 

369 power=0.5) 

370 

371 model.compile(optimizer=tf.keras.optimizers.SGD( 

372 learning_rate=learning_rate_fn), 

373 loss='sparse_categorical_crossentropy', 

374 metrics=['accuracy']) 

375 

376 model.fit(data, labels, epochs=5) 

377 ``` 

378 

379 The learning rate schedule is also serializable and deserializable using 

380 `tf.keras.optimizers.schedules.serialize` and 

381 `tf.keras.optimizers.schedules.deserialize`. 

382 

383 Returns: 

384 A 1-arg callable learning rate schedule that takes the current optimizer 

385 step and outputs the decayed learning rate, a scalar `Tensor` of the same 

386 type as `initial_learning_rate`. 

387 """ 

388 

389 def __init__( 

390 self, 

391 initial_learning_rate, 

392 decay_steps, 

393 end_learning_rate=0.0001, 

394 power=1.0, 

395 cycle=False, 

396 name=None, 

397 ): 

398 """Applies a polynomial decay to the learning rate. 

399 

400 Args: 

401 initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a 

402 Python number. The initial learning rate. 

403 decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. 

404 Must be positive. See the decay computation above. 

405 end_learning_rate: A scalar `float32` or `float64` `Tensor` or a 

406 Python number. The minimal end learning rate. 

407 power: A scalar `float32` or `float64` `Tensor` or a 

408 Python number. The power of the polynomial. Defaults to `1.0`. 

409 cycle: A boolean, whether it should cycle beyond decay_steps. 

410 name: String. Optional name of the operation. Defaults to 

411 'PolynomialDecay'. 

412 """ 

413 super().__init__() 

414 

415 self.initial_learning_rate = initial_learning_rate 

416 self.decay_steps = decay_steps 

417 self.end_learning_rate = end_learning_rate 

418 self.power = power 

419 self.cycle = cycle 

420 self.name = name 

421 

422 def __call__(self, step): 

423 with tf.name_scope(self.name or "PolynomialDecay") as name: 

424 initial_learning_rate = tf.convert_to_tensor( 

425 self.initial_learning_rate, name="initial_learning_rate" 

426 ) 

427 dtype = initial_learning_rate.dtype 

428 end_learning_rate = tf.cast(self.end_learning_rate, dtype) 

429 power = tf.cast(self.power, dtype) 

430 

431 global_step_recomp = tf.cast(step, dtype) 

432 decay_steps_recomp = tf.cast(self.decay_steps, dtype) 

433 if self.cycle: 

434 # Find the first multiple of decay_steps that is bigger than 

435 # global_step. If global_step is zero set the multiplier to 1 

436 multiplier = tf.where( 

437 tf.equal(global_step_recomp, 0), 

438 1.0, 

439 tf.math.ceil(global_step_recomp / self.decay_steps), 

440 ) 

441 decay_steps_recomp = tf.multiply(decay_steps_recomp, multiplier) 

442 else: 

443 # Make sure that the global_step used is not bigger than 

444 # decay_steps. 

445 global_step_recomp = tf.minimum( 

446 global_step_recomp, decay_steps_recomp 

447 ) 

448 

449 p = tf.divide(global_step_recomp, decay_steps_recomp) 

450 return tf.add( 

451 tf.multiply( 

452 initial_learning_rate - end_learning_rate, 

453 tf.pow(1 - p, power), 

454 ), 

455 end_learning_rate, 

456 name=name, 

457 ) 

458 

459 def get_config(self): 

460 return { 

461 "initial_learning_rate": self.initial_learning_rate, 

462 "decay_steps": self.decay_steps, 

463 "end_learning_rate": self.end_learning_rate, 

464 "power": self.power, 

465 "cycle": self.cycle, 

466 "name": self.name, 

467 } 

468 

469 

470@keras_export("keras.optimizers.schedules.InverseTimeDecay") 

471class InverseTimeDecay(LearningRateSchedule): 

472 """A LearningRateSchedule that uses an inverse time decay schedule. 

473 

474 When training a model, it is often useful to lower the learning rate as 

475 the training progresses. This schedule applies the inverse decay function 

476 to an optimizer step, given a provided initial learning rate. 

477 It requires a `step` value to compute the decayed learning rate. You can 

478 just pass a TensorFlow variable that you increment at each training step. 

479 

480 The schedule is a 1-arg callable that produces a decayed learning 

481 rate when passed the current optimizer step. This can be useful for changing 

482 the learning rate value across different invocations of optimizer functions. 

483 It is computed as: 

484 

485 ```python 

486 def decayed_learning_rate(step): 

487 return initial_learning_rate / (1 + decay_rate * step / decay_step) 

488 ``` 

489 

490 or, if `staircase` is `True`, as: 

491 

492 ```python 

493 def decayed_learning_rate(step): 

494 return initial_learning_rate / (1 + decay_rate * floor(step / decay_step)) 

495 ``` 

496 

497 You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` 

498 as the learning rate. 

499 Example: Fit a Keras model when decaying 1/t with a rate of 0.5: 

500 

501 ```python 

502 ... 

503 initial_learning_rate = 0.1 

504 decay_steps = 1.0 

505 decay_rate = 0.5 

506 learning_rate_fn = keras.optimizers.schedules.InverseTimeDecay( 

507 initial_learning_rate, decay_steps, decay_rate) 

508 

509 model.compile(optimizer=tf.keras.optimizers.SGD( 

510 learning_rate=learning_rate_fn), 

511 loss='sparse_categorical_crossentropy', 

512 metrics=['accuracy']) 

513 

514 model.fit(data, labels, epochs=5) 

515 ``` 

516 

517 Returns: 

518 A 1-arg callable learning rate schedule that takes the current optimizer 

519 step and outputs the decayed learning rate, a scalar `Tensor` of the same 

520 type as `initial_learning_rate`. 

521 """ 

522 

523 def __init__( 

524 self, 

525 initial_learning_rate, 

526 decay_steps, 

527 decay_rate, 

528 staircase=False, 

529 name=None, 

530 ): 

531 """Applies inverse time decay to the initial learning rate. 

532 

533 Args: 

534 initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a 

535 Python number. The initial learning rate. 

536 decay_steps: How often to apply decay. 

537 decay_rate: A Python number. The decay rate. 

538 staircase: Whether to apply decay in a discrete staircase, as opposed 

539 to continuous, fashion. 

540 name: String. Optional name of the operation. Defaults to 

541 'InverseTimeDecay'. 

542 """ 

543 super().__init__() 

544 

545 self.initial_learning_rate = initial_learning_rate 

546 self.decay_steps = decay_steps 

547 self.decay_rate = decay_rate 

548 self.staircase = staircase 

549 self.name = name 

550 

551 def __call__(self, step): 

552 with tf.name_scope(self.name or "InverseTimeDecay") as name: 

553 initial_learning_rate = tf.convert_to_tensor( 

554 self.initial_learning_rate, name="initial_learning_rate" 

555 ) 

556 dtype = initial_learning_rate.dtype 

557 decay_steps = tf.cast(self.decay_steps, dtype) 

558 decay_rate = tf.cast(self.decay_rate, dtype) 

559 

560 global_step_recomp = tf.cast(step, dtype) 

561 p = global_step_recomp / decay_steps 

562 if self.staircase: 

563 p = tf.floor(p) 

564 const = tf.cast(tf.constant(1), dtype) 

565 denom = tf.add(const, tf.multiply(decay_rate, p)) 

566 return tf.divide(initial_learning_rate, denom, name=name) 

567 

568 def get_config(self): 

569 return { 

570 "initial_learning_rate": self.initial_learning_rate, 

571 "decay_steps": self.decay_steps, 

572 "decay_rate": self.decay_rate, 

573 "staircase": self.staircase, 

574 "name": self.name, 

575 } 

576 

577 

578@keras_export( 

579 "keras.optimizers.schedules.CosineDecay", "keras.experimental.CosineDecay" 

580) 

581class CosineDecay(LearningRateSchedule): 

582 """A LearningRateSchedule that uses a cosine decay with optional warmup. 

583 

584 See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), 

585 SGDR: Stochastic Gradient Descent with Warm Restarts. 

586 

587 For the idea of a linear warmup of our learning rate, 

588 see [Goyal et al.](https://arxiv.org/pdf/1706.02677.pdf). 

589 

590 When we begin training a model, we often want an initial increase in our 

591 learning rate followed by a decay. If `warmup_target` is an int, this 

592 schedule applies a linear increase per optimizer step to our learning rate 

593 from `initial_learning_rate` to `warmup_target` for a duration of 

594 `warmup_steps`. Afterwards, it applies a cosine decay function taking our 

595 learning rate from `warmup_target` to `alpha` for a duration of 

596 `decay_steps`. If `warmup_target` is None we skip warmup and our decay 

597 will take our learning rate from `initial_learning_rate` to `alpha`. 

598 It requires a `step` value to compute the learning rate. You can 

599 just pass a TensorFlow variable that you increment at each training step. 

600 

601 The schedule is a 1-arg callable that produces a warmup followed by a 

602 decayed learning rate when passed the current optimizer step. This can be 

603 useful for changing the learning rate value across different invocations of 

604 optimizer functions. 

605 

606 Our warmup is computed as: 

607 

608 ```python 

609 def warmup_learning_rate(step): 

610 completed_fraction = step / warmup_steps 

611 total_delta = target_warmup - initial_learning_rate 

612 return completed_fraction * total_delta 

613 ``` 

614 

615 And our decay is computed as: 

616 

617 ```python 

618 if warmup_target is None: 

619 initial_decay_lr = initial_learning_rate 

620 else: 

621 initial_decay_lr = warmup_target 

622 

623 def decayed_learning_rate(step): 

624 step = min(step, decay_steps) 

625 cosine_decay = 0.5 * (1 + cos(pi * step / decay_steps)) 

626 decayed = (1 - alpha) * cosine_decay + alpha 

627 return initial_decay_lr * decayed 

628 ``` 

629 

630 Example usage without warmup: 

631 

632 ```python 

633 decay_steps = 1000 

634 initial_learning_rate = 0.1 

635 lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay( 

636 initial_learning_rate, decay_steps) 

637 ``` 

638 

639 Example usage with warmup: 

640 

641 ```python 

642 decay_steps = 1000 

643 initial_learning_rate = 0 

644 warmup_steps = 1000 

645 target_learning_rate = 0.1 

646 lr_warmup_decayed_fn = tf.keras.optimizers.schedules.CosineDecay( 

647 initial_learning_rate, decay_steps, warmup_target=target_learning_rate, 

648 warmup_steps=warmup_steps 

649 ) 

650 ``` 

651 

652 You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` 

653 as the learning rate. The learning rate schedule is also serializable and 

654 deserializable using `tf.keras.optimizers.schedules.serialize` and 

655 `tf.keras.optimizers.schedules.deserialize`. 

656 

657 Returns: 

658 A 1-arg callable learning rate schedule that takes the current optimizer 

659 step and outputs the decayed learning rate, a scalar `Tensor` of the same 

660 type as `initial_learning_rate`. 

661 """ 

662 

663 def __init__( 

664 self, 

665 initial_learning_rate, 

666 decay_steps, 

667 alpha=0.0, 

668 name=None, 

669 warmup_target=None, 

670 warmup_steps=0, 

671 ): 

672 """Applies cosine decay to the learning rate. 

673 

674 Args: 

675 initial_learning_rate: A scalar `float32` or `float64` `Tensor` or a 

676 Python int. The initial learning rate. 

677 decay_steps: A scalar `int32` or `int64` `Tensor` or a Python int. 

678 Number of steps to decay over. 

679 alpha: A scalar `float32` or `float64` `Tensor` or a Python int. 

680 Minimum learning rate value for decay as a fraction of 

681 `initial_learning_rate`. 

682 name: String. Optional name of the operation. Defaults to 

683 'CosineDecay'. 

684 warmup_target: None or a scalar `float32` or `float64` `Tensor` or a 

685 Python int. The target learning rate for our warmup phase. Will cast 

686 to the `initial_learning_rate` datatype. Setting to None will skip 

687 warmup and begins decay phase from `initial_learning_rate`. 

688 Otherwise scheduler will warmup from `initial_learning_rate` to 

689 `warmup_target`. 

690 warmup_steps: A scalar `int32` or `int64` `Tensor` or a Python int. 

691 Number of steps to warmup over. 

692 """ 

693 super().__init__() 

694 

695 self.initial_learning_rate = initial_learning_rate 

696 self.decay_steps = decay_steps 

697 self.alpha = alpha 

698 self.name = name 

699 self.warmup_steps = warmup_steps 

700 self.warmup_target = warmup_target 

701 

702 def _decay_function(self, step, decay_steps, decay_from_lr, dtype): 

703 with tf.name_scope(self.name or "CosineDecay"): 

704 completed_fraction = step / decay_steps 

705 tf_pi = tf.constant(math.pi, dtype=dtype) 

706 cosine_decayed = 0.5 * (1.0 + tf.cos(tf_pi * completed_fraction)) 

707 decayed = (1 - self.alpha) * cosine_decayed + self.alpha 

708 return tf.multiply(decay_from_lr, decayed) 

709 

710 def _warmup_function( 

711 self, step, warmup_steps, warmup_target, initial_learning_rate 

712 ): 

713 with tf.name_scope(self.name or "CosineDecay"): 

714 completed_fraction = step / warmup_steps 

715 total_step_delta = warmup_target - initial_learning_rate 

716 return total_step_delta * completed_fraction + initial_learning_rate 

717 

718 def __call__(self, step): 

719 with tf.name_scope(self.name or "CosineDecay"): 

720 initial_learning_rate = tf.convert_to_tensor( 

721 self.initial_learning_rate, name="initial_learning_rate" 

722 ) 

723 dtype = initial_learning_rate.dtype 

724 decay_steps = tf.cast(self.decay_steps, dtype) 

725 global_step_recomp = tf.cast(step, dtype) 

726 

727 if self.warmup_target is None: 

728 global_step_recomp = tf.minimum(global_step_recomp, decay_steps) 

729 return self._decay_function( 

730 global_step_recomp, 

731 decay_steps, 

732 initial_learning_rate, 

733 dtype, 

734 ) 

735 

736 warmup_target = tf.cast(self.warmup_target, dtype) 

737 warmup_steps = tf.cast(self.warmup_steps, dtype) 

738 

739 global_step_recomp = tf.minimum( 

740 global_step_recomp, decay_steps + warmup_steps 

741 ) 

742 

743 return tf.cond( 

744 global_step_recomp < warmup_steps, 

745 lambda: self._warmup_function( 

746 global_step_recomp, 

747 warmup_steps, 

748 warmup_target, 

749 initial_learning_rate, 

750 ), 

751 lambda: self._decay_function( 

752 global_step_recomp - warmup_steps, 

753 decay_steps, 

754 warmup_target, 

755 dtype, 

756 ), 

757 ) 

758 

759 def get_config(self): 

760 return { 

761 "initial_learning_rate": self.initial_learning_rate, 

762 "decay_steps": self.decay_steps, 

763 "alpha": self.alpha, 

764 "name": self.name, 

765 "warmup_target": self.warmup_target, 

766 "warmup_steps": self.warmup_steps, 

767 } 

768 

769 

770@keras_export( 

771 "keras.optimizers.schedules.CosineDecayRestarts", 

772 "keras.experimental.CosineDecayRestarts", 

773) 

774class CosineDecayRestarts(LearningRateSchedule): 

775 """A LearningRateSchedule that uses a cosine decay schedule with restarts. 

776 

777 See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), 

778 SGDR: Stochastic Gradient Descent with Warm Restarts. 

779 

780 When training a model, it is often useful to lower the learning rate as 

781 the training progresses. This schedule applies a cosine decay function with 

782 restarts to an optimizer step, given a provided initial learning rate. 

783 It requires a `step` value to compute the decayed learning rate. You can 

784 just pass a TensorFlow variable that you increment at each training step. 

785 

786 The schedule is a 1-arg callable that produces a decayed learning 

787 rate when passed the current optimizer step. This can be useful for changing 

788 the learning rate value across different invocations of optimizer functions. 

789 

790 The learning rate multiplier first decays 

791 from 1 to `alpha` for `first_decay_steps` steps. Then, a warm 

792 restart is performed. Each new warm restart runs for `t_mul` times more 

793 steps and with `m_mul` times initial learning rate as the new learning rate. 

794 

795 Example usage: 

796 ```python 

797 first_decay_steps = 1000 

798 lr_decayed_fn = ( 

799 tf.keras.optimizers.schedules.CosineDecayRestarts( 

800 initial_learning_rate, 

801 first_decay_steps)) 

802 ``` 

803 

804 You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` 

805 as the learning rate. The learning rate schedule is also serializable and 

806 deserializable using `tf.keras.optimizers.schedules.serialize` and 

807 `tf.keras.optimizers.schedules.deserialize`. 

808 

809 Returns: 

810 A 1-arg callable learning rate schedule that takes the current optimizer 

811 step and outputs the decayed learning rate, a scalar `Tensor` of the same 

812 type as `initial_learning_rate`. 

813 """ 

814 

815 def __init__( 

816 self, 

817 initial_learning_rate, 

818 first_decay_steps, 

819 t_mul=2.0, 

820 m_mul=1.0, 

821 alpha=0.0, 

822 name=None, 

823 ): 

824 """Applies cosine decay with restarts to the learning rate. 

825 

826 Args: 

827 initial_learning_rate: A scalar `float32` or `float64` Tensor or a 

828 Python number. The initial learning rate. 

829 first_decay_steps: A scalar `int32` or `int64` `Tensor` or a Python 

830 number. Number of steps to decay over. 

831 t_mul: A scalar `float32` or `float64` `Tensor` or a Python number. 

832 Used to derive the number of iterations in the i-th period. 

833 m_mul: A scalar `float32` or `float64` `Tensor` or a Python number. 

834 Used to derive the initial learning rate of the i-th period. 

835 alpha: A scalar `float32` or `float64` Tensor or a Python number. 

836 Minimum learning rate value as a fraction of the 

837 initial_learning_rate. 

838 name: String. Optional name of the operation. Defaults to 'SGDRDecay'. 

839 """ 

840 super().__init__() 

841 

842 self.initial_learning_rate = initial_learning_rate 

843 self.first_decay_steps = first_decay_steps 

844 self._t_mul = t_mul 

845 self._m_mul = m_mul 

846 self.alpha = alpha 

847 self.name = name 

848 

849 def __call__(self, step): 

850 with tf.name_scope(self.name or "SGDRDecay") as name: 

851 initial_learning_rate = tf.convert_to_tensor( 

852 self.initial_learning_rate, name="initial_learning_rate" 

853 ) 

854 dtype = initial_learning_rate.dtype 

855 first_decay_steps = tf.cast(self.first_decay_steps, dtype) 

856 alpha = tf.cast(self.alpha, dtype) 

857 t_mul = tf.cast(self._t_mul, dtype) 

858 m_mul = tf.cast(self._m_mul, dtype) 

859 

860 global_step_recomp = tf.cast(step, dtype) 

861 completed_fraction = global_step_recomp / first_decay_steps 

862 

863 def compute_step(completed_fraction, geometric=False): 

864 """Helper for `cond` operation.""" 

865 if geometric: 

866 i_restart = tf.floor( 

867 tf.math.log(1.0 - completed_fraction * (1.0 - t_mul)) 

868 / tf.math.log(t_mul) 

869 ) 

870 

871 sum_r = (1.0 - t_mul**i_restart) / (1.0 - t_mul) 

872 completed_fraction = ( 

873 completed_fraction - sum_r 

874 ) / t_mul**i_restart 

875 

876 else: 

877 i_restart = tf.floor(completed_fraction) 

878 completed_fraction -= i_restart 

879 

880 return i_restart, completed_fraction 

881 

882 i_restart, completed_fraction = tf.cond( 

883 tf.equal(t_mul, 1.0), 

884 lambda: compute_step(completed_fraction, geometric=False), 

885 lambda: compute_step(completed_fraction, geometric=True), 

886 ) 

887 

888 m_fac = m_mul**i_restart 

889 cosine_decayed = ( 

890 0.5 

891 * m_fac 

892 * ( 

893 1.0 

894 + tf.cos( 

895 tf.constant(math.pi, dtype=dtype) * completed_fraction 

896 ) 

897 ) 

898 ) 

899 decayed = (1 - alpha) * cosine_decayed + alpha 

900 

901 return tf.multiply(initial_learning_rate, decayed, name=name) 

902 

903 def get_config(self): 

904 return { 

905 "initial_learning_rate": self.initial_learning_rate, 

906 "first_decay_steps": self.first_decay_steps, 

907 "t_mul": self._t_mul, 

908 "m_mul": self._m_mul, 

909 "alpha": self.alpha, 

910 "name": self.name, 

911 } 

912 

913 

914# Note: this code is still used by V1 APIs. 

915class LinearCosineDecay(LearningRateSchedule): 

916 """A LearningRateSchedule that uses a linear cosine decay schedule. 

917 

918 See [Bello et al., ICML2017] Neural Optimizer Search with RL. 

919 https://arxiv.org/abs/1709.07417 

920 

921 For the idea of warm starts here controlled by `num_periods`, 

922 see [Loshchilov & Hutter, ICLR2016] SGDR: Stochastic Gradient Descent 

923 with Warm Restarts. https://arxiv.org/abs/1608.03983 

924 

925 Note that linear cosine decay is more aggressive than cosine decay and 

926 larger initial learning rates can typically be used. 

927 

928 When training a model, it is often recommended to lower the learning rate as 

929 the training progresses. This schedule applies a linear cosine decay 

930 function to an optimizer step, given a provided initial learning rate. 

931 It requires a `step` value to compute the decayed learning rate. You can 

932 just pass a TensorFlow variable that you increment at each training step. 

933 

934 The schedule is a 1-arg callable that produces a decayed learning 

935 rate when passed the current optimizer step. This can be useful for changing 

936 the learning rate value across different invocations of optimizer functions. 

937 It is computed as: 

938 

939 ```python 

940 def decayed_learning_rate(step): 

941 step = min(step, decay_steps) 

942 linear_decay = (decay_steps - step) / decay_steps 

943 cosine_decay = 0.5 * ( 

944 1 + cos(pi * 2 * num_periods * step / decay_steps)) 

945 decayed = (alpha + linear_decay) * cosine_decay + beta 

946 return initial_learning_rate * decayed 

947 ``` 

948 

949 Example usage: 

950 ```python 

951 decay_steps = 1000 

952 lr_decayed_fn = ( 

953 tf.keras.experimental.LinearCosineDecay( 

954 initial_learning_rate, decay_steps)) 

955 ``` 

956 

957 You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` 

958 as the learning rate. The learning rate schedule is also serializable and 

959 deserializable using `tf.keras.optimizers.schedules.serialize` and 

960 `tf.keras.optimizers.schedules.deserialize`. 

961 

962 Returns: 

963 A 1-arg callable learning rate schedule that takes the current optimizer 

964 step and outputs the decayed learning rate, a scalar `Tensor` of the same 

965 type as `initial_learning_rate`. 

966 """ 

967 

968 def __init__( 

969 self, 

970 initial_learning_rate, 

971 decay_steps, 

972 num_periods=0.5, 

973 alpha=0.0, 

974 beta=0.001, 

975 name=None, 

976 ): 

977 """Applies linear cosine decay to the learning rate. 

978 

979 Args: 

980 initial_learning_rate: A scalar `float32` or `float64` Tensor or a 

981 Python number. The initial learning rate. 

982 decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. 

983 Number of steps to decay over. 

984 num_periods: Number of periods in the cosine part of the decay. 

985 See computation above. 

986 alpha: See computation above. 

987 beta: See computation above. 

988 name: String. Optional name of the operation. Defaults to 

989 'LinearCosineDecay'. 

990 """ 

991 super().__init__() 

992 

993 self.initial_learning_rate = initial_learning_rate 

994 self.decay_steps = decay_steps 

995 self.num_periods = num_periods 

996 self.alpha = alpha 

997 self.beta = beta 

998 self.name = name 

999 

1000 def __call__(self, step): 

1001 with tf.name_scope(self.name or "LinearCosineDecay") as name: 

1002 initial_learning_rate = tf.convert_to_tensor( 

1003 self.initial_learning_rate, name="initial_learning_rate" 

1004 ) 

1005 dtype = initial_learning_rate.dtype 

1006 decay_steps = tf.cast(self.decay_steps, dtype) 

1007 num_periods = tf.cast(self.num_periods, dtype) 

1008 alpha = tf.cast(self.alpha, dtype) 

1009 beta = tf.cast(self.beta, dtype) 

1010 

1011 global_step_recomp = tf.cast(step, dtype) 

1012 global_step_recomp = tf.minimum(global_step_recomp, decay_steps) 

1013 linear_decayed = (decay_steps - global_step_recomp) / decay_steps 

1014 completed_fraction = global_step_recomp / decay_steps 

1015 fraction = 2.0 * num_periods * completed_fraction 

1016 cosine_decayed = 0.5 * ( 

1017 1.0 + tf.cos(tf.constant(math.pi, dtype=dtype) * fraction) 

1018 ) 

1019 

1020 linear_cosine_decayed = ( 

1021 alpha + linear_decayed 

1022 ) * cosine_decayed + beta 

1023 return tf.multiply( 

1024 initial_learning_rate, linear_cosine_decayed, name=name 

1025 ) 

1026 

1027 def get_config(self): 

1028 return { 

1029 "initial_learning_rate": self.initial_learning_rate, 

1030 "decay_steps": self.decay_steps, 

1031 "num_periods": self.num_periods, 

1032 "alpha": self.alpha, 

1033 "beta": self.beta, 

1034 "name": self.name, 

1035 } 

1036 

1037 

1038# Note: this code is still used by V1 APIs. 

1039class NoisyLinearCosineDecay(LearningRateSchedule): 

1040 """A LearningRateSchedule that uses a noisy linear cosine decay schedule. 

1041 

1042 See [Bello et al., ICML2017] Neural Optimizer Search with RL. 

1043 https://arxiv.org/abs/1709.07417 

1044 

1045 For the idea of warm starts here controlled by `num_periods`, 

1046 see [Loshchilov & Hutter, ICLR2016] SGDR: Stochastic Gradient Descent 

1047 with Warm Restarts. https://arxiv.org/abs/1608.03983 

1048 

1049 Note that linear cosine decay is more aggressive than cosine decay and 

1050 larger initial learning rates can typically be used. 

1051 

1052 When training a model, it is often recommended to lower the learning rate as 

1053 the training progresses. This schedule applies a noisy linear cosine decay 

1054 function to an optimizer step, given a provided initial learning rate. 

1055 It requires a `step` value to compute the decayed learning rate. You can 

1056 just pass a TensorFlow variable that you increment at each training step. 

1057 

1058 The schedule is a 1-arg callable that produces a decayed learning 

1059 rate when passed the current optimizer step. This can be useful for changing 

1060 the learning rate value across different invocations of optimizer functions. 

1061 It is computed as: 

1062 

1063 ```python 

1064 def decayed_learning_rate(step): 

1065 step = min(step, decay_steps) 

1066 linear_decay = (decay_steps - step) / decay_steps) 

1067 cosine_decay = 0.5 * ( 

1068 1 + cos(pi * 2 * num_periods * step / decay_steps)) 

1069 decayed = (alpha + linear_decay + eps_t) * cosine_decay + beta 

1070 return initial_learning_rate * decayed 

1071 ``` 

1072 where eps_t is 0-centered gaussian noise with variance 

1073 initial_variance / (1 + global_step) ** variance_decay 

1074 

1075 Example usage: 

1076 ```python 

1077 decay_steps = 1000 

1078 lr_decayed_fn = ( 

1079 tf.keras.experimental.NoisyLinearCosineDecay( 

1080 initial_learning_rate, decay_steps)) 

1081 ``` 

1082 

1083 You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` 

1084 as the learning rate. The learning rate schedule is also serializable and 

1085 deserializable using `tf.keras.optimizers.schedules.serialize` and 

1086 `tf.keras.optimizers.schedules.deserialize`. 

1087 

1088 Returns: 

1089 A 1-arg callable learning rate schedule that takes the current optimizer 

1090 step and outputs the decayed learning rate, a scalar `Tensor` of the same 

1091 type as `initial_learning_rate`. 

1092 """ 

1093 

1094 def __init__( 

1095 self, 

1096 initial_learning_rate, 

1097 decay_steps, 

1098 initial_variance=1.0, 

1099 variance_decay=0.55, 

1100 num_periods=0.5, 

1101 alpha=0.0, 

1102 beta=0.001, 

1103 seed=None, 

1104 name=None, 

1105 ): 

1106 """Applies noisy linear cosine decay to the learning rate. 

1107 

1108 Args: 

1109 initial_learning_rate: A scalar `float32` or `float64` Tensor or a 

1110 Python number. The initial learning rate. 

1111 decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. 

1112 Number of steps to decay over. 

1113 initial_variance: initial variance for the noise. See computation 

1114 above. 

1115 variance_decay: decay for the noise's variance. See computation above. 

1116 num_periods: Number of periods in the cosine part of the decay. 

1117 See computation above. 

1118 alpha: See computation above. 

1119 beta: See computation above. 

1120 seed: Integer, optional random seed to enable deterministic behavior. 

1121 name: String. Optional name of the operation. Defaults to 

1122 'NoisyLinearCosineDecay'. 

1123 """ 

1124 super().__init__() 

1125 

1126 self.initial_learning_rate = initial_learning_rate 

1127 self.decay_steps = decay_steps 

1128 self.initial_variance = initial_variance 

1129 self.variance_decay = variance_decay 

1130 self.num_periods = num_periods 

1131 self.alpha = alpha 

1132 self.beta = beta 

1133 self.seed = seed 

1134 self.name = name 

1135 self._random_generator = backend.RandomGenerator(seed) 

1136 

1137 def __call__(self, step): 

1138 with tf.name_scope(self.name or "NoisyLinearCosineDecay") as name: 

1139 initial_learning_rate = tf.convert_to_tensor( 

1140 self.initial_learning_rate, name="initial_learning_rate" 

1141 ) 

1142 dtype = initial_learning_rate.dtype 

1143 decay_steps = tf.cast(self.decay_steps, dtype) 

1144 initial_variance = tf.cast(self.initial_variance, dtype) 

1145 variance_decay = tf.cast(self.variance_decay, dtype) 

1146 num_periods = tf.cast(self.num_periods, dtype) 

1147 alpha = tf.cast(self.alpha, dtype) 

1148 beta = tf.cast(self.beta, dtype) 

1149 

1150 global_step_recomp = tf.cast(step, dtype) 

1151 global_step_recomp = tf.minimum(global_step_recomp, decay_steps) 

1152 linear_decayed = (decay_steps - global_step_recomp) / decay_steps 

1153 variance = initial_variance / ( 

1154 tf.pow(1.0 + global_step_recomp, variance_decay) 

1155 ) 

1156 std = tf.sqrt(variance) 

1157 noisy_linear_decayed = ( 

1158 linear_decayed 

1159 + self._random_generator.random_normal( 

1160 linear_decayed.shape, stddev=std 

1161 ) 

1162 ) 

1163 

1164 completed_fraction = global_step_recomp / decay_steps 

1165 fraction = 2.0 * num_periods * completed_fraction 

1166 cosine_decayed = 0.5 * ( 

1167 1.0 + tf.cos(tf.constant(math.pi, dtype=dtype) * fraction) 

1168 ) 

1169 noisy_linear_cosine_decayed = ( 

1170 alpha + noisy_linear_decayed 

1171 ) * cosine_decayed + beta 

1172 

1173 return tf.multiply( 

1174 initial_learning_rate, noisy_linear_cosine_decayed, name=name 

1175 ) 

1176 

1177 def get_config(self): 

1178 return { 

1179 "initial_learning_rate": self.initial_learning_rate, 

1180 "decay_steps": self.decay_steps, 

1181 "initial_variance": self.initial_variance, 

1182 "variance_decay": self.variance_decay, 

1183 "num_periods": self.num_periods, 

1184 "alpha": self.alpha, 

1185 "beta": self.beta, 

1186 "seed": self.seed, 

1187 "name": self.name, 

1188 } 

1189 

1190 

1191@keras_export("keras.optimizers.schedules.serialize") 

1192def serialize(learning_rate_schedule, use_legacy_format=False): 

1193 """Serializes a `LearningRateSchedule` into a JSON-compatible dict. 

1194 

1195 Args: 

1196 learning_rate_schedule: The `LearningRateSchedule` object to serialize. 

1197 

1198 Returns: 

1199 A JSON-serializable dict representing the object's config. 

1200 

1201 Example: 

1202 

1203 >>> lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( 

1204 ... 0.1, decay_steps=100000, decay_rate=0.96, staircase=True) 

1205 >>> tf.keras.optimizers.schedules.serialize(lr_schedule) 

1206 {'module': 'keras.optimizers.schedules', 

1207 'class_name': 'ExponentialDecay', 'config': {...}, 

1208 'registered_name': None} 

1209 """ 

1210 if use_legacy_format: 

1211 return legacy_serialization.serialize_keras_object( 

1212 learning_rate_schedule 

1213 ) 

1214 

1215 return serialization_lib.serialize_keras_object(learning_rate_schedule) 

1216 

1217 

1218@keras_export("keras.optimizers.schedules.deserialize") 

1219def deserialize(config, custom_objects=None, use_legacy_format=False): 

1220 """Instantiates a `LearningRateSchedule` object from a serialized form. 

1221 

1222 Args: 

1223 config: The serialized form of the `LearningRateSchedule`. 

1224 Dictionary of the form {'class_name': str, 'config': dict}. 

1225 custom_objects: A dictionary mapping class names (or function names) of 

1226 custom (non-Keras) objects to class/functions. 

1227 

1228 Returns: 

1229 A `LearningRateSchedule` object. 

1230 

1231 Example: 

1232 

1233 ```python 

1234 # Configuration for PolynomialDecay 

1235 config = { 

1236 'class_name': 'PolynomialDecay', 

1237 'config': {'cycle': False, 

1238 'decay_steps': 10000, 

1239 'end_learning_rate': 0.01, 

1240 'initial_learning_rate': 0.1, 

1241 'name': None, 

1242 'power': 0.5}} 

1243 lr_schedule = tf.keras.optimizers.schedules.deserialize(config) 

1244 ``` 

1245 """ 

1246 if use_legacy_format: 

1247 return legacy_serialization.deserialize_keras_object( 

1248 config, 

1249 module_objects=globals(), 

1250 custom_objects=custom_objects, 

1251 printable_module_name="decay", 

1252 ) 

1253 

1254 return serialization_lib.deserialize_keras_object( 

1255 config, 

1256 module_objects=globals(), 

1257 custom_objects=custom_objects, 

1258 printable_module_name="decay", 

1259 ) 

1260