Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/legacy_learning_rate

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Various learning rate decay functions."""

17import functools

19from tensorflow.python.eager import context

20from tensorflow.python.framework import dtypes

21from tensorflow.python.framework import tensor_conversion

22from tensorflow.python.keras.optimizer_v2 import learning_rate_schedule

23from tensorflow.python.ops import math_ops

24from tensorflow.python.util import nest

25from tensorflow.python.util.tf_export import tf_export

28@tf_export(v1=["train.exponential_decay"])

29def exponential_decay(learning_rate,

30 global_step,

31 decay_steps,

32 decay_rate,

33 staircase=False,

34 name=None):

35 """Applies exponential decay to the learning rate.

37 When training a model, it is often recommended to lower the learning rate as

38 the training progresses. This function applies an exponential decay function

39 to a provided initial learning rate. It requires a `global_step` value to

40 compute the decayed learning rate. You can just pass a TensorFlow variable

41 that you increment at each training step.

43 The function returns the decayed learning rate. It is computed as:

45 ```python

46 decayed_learning_rate = learning_rate *

47 decay_rate ^ (global_step / decay_steps)

48 ```

50 If the argument `staircase` is `True`, then `global_step / decay_steps` is an

51 integer division and the decayed learning rate follows a staircase function.

53 Example: decay every 100000 steps with a base of 0.96:

55 ```python

56 ...

57 global_step = tf.Variable(0, trainable=False)

58 starter_learning_rate = 0.1

59 learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate,

60 global_step,

61 100000, 0.96, staircase=True)

62 # Passing global_step to minimize() will increment it at each step.

63 learning_step = (

64 tf.compat.v1.train.GradientDescentOptimizer(learning_rate)

65 .minimize(...my loss..., global_step=global_step)

66 )

67 ```

69 Args:

70 learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number.

71 The initial learning rate.

72 global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global

73 step to use for the decay computation. Must not be negative.

74 decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Must

75 be positive. See the decay computation above.

76 decay_rate: A scalar `float32` or `float64` `Tensor` or a Python number.

77 The decay rate.

78 staircase: Boolean. If `True` decay the learning rate at discrete intervals

79 name: String. Optional name of the operation. Defaults to

80 'ExponentialDecay'.

82 Returns:

83 A scalar `Tensor` of the same type as `learning_rate`. The decayed

84 learning rate.

86 Raises:

87 ValueError: if `global_step` is not supplied.

89 @compatibility(eager)

90 When eager execution is enabled, this function returns a function which in

91 turn returns the decayed learning rate Tensor. This can be useful for changing

92 the learning rate value across different invocations of optimizer functions.

93 @end_compatibility

94 """

95 decayed_lr = learning_rate_schedule.ExponentialDecay(

96 learning_rate, decay_steps, decay_rate, staircase=staircase, name=name)

97 if not context.executing_eagerly():

98 decayed_lr = decayed_lr(global_step)

99 else:

100 decayed_lr = functools.partial(decayed_lr, global_step)

101 return decayed_lr

102

103

104@tf_export(v1=["train.piecewise_constant_decay", "train.piecewise_constant"])

105def piecewise_constant(x, boundaries, values, name=None):

106 """Piecewise constant from boundaries and interval values.

107

108 Example: use a learning rate that's 1.0 for the first 100001 steps, 0.5

109 for the next 10000 steps, and 0.1 for any additional steps.

110

111 ```python

112 global_step = tf.Variable(0, trainable=False)

113 boundaries = [100000, 110000]

114 values = [1.0, 0.5, 0.1]

115 learning_rate = tf.compat.v1.train.piecewise_constant(global_step, boundaries,

116 values)

117

118 # Later, whenever we perform an optimization step, we increment global_step.

119 ```

120

121 Args:

122 x: A 0-D scalar `Tensor`. Must be one of the following types: `float32`,

123 `float64`, `uint8`, `int8`, `int16`, `int32`, `int64`.

124 boundaries: A list of `Tensor`s or `int`s or `float`s with strictly

125 increasing entries, and with all elements having the same type as `x`.

126 values: A list of `Tensor`s or `float`s or `int`s that specifies the values

127 for the intervals defined by `boundaries`. It should have one more element

128 than `boundaries`, and all elements should have the same type.

129 name: A string. Optional name of the operation. Defaults to

130 'PiecewiseConstant'.

131

132 Returns:

133 A 0-D Tensor. Its value is `values[0]` when `x <= boundaries[0]`,

134 `values[1]` when `x > boundaries[0]` and `x <= boundaries[1]`, ...,

135 and values[-1] when `x > boundaries[-1]`.

136

137 Raises:

138 ValueError: if types of `x` and `boundaries` do not match, or types of all

139 `values` do not match or

140 the number of elements in the lists does not match.

141

142 @compatibility(eager)

143 When eager execution is enabled, this function returns a function which in

144 turn returns the decayed learning rate Tensor. This can be useful for changing

145 the learning rate value across different invocations of optimizer functions.

146 @end_compatibility

147 """

148 boundaries = nest.map_structure(

149 tensor_conversion.convert_to_tensor_v2_with_dispatch,

150 nest.flatten(boundaries),

151 )

152 values = nest.map_structure(

153 tensor_conversion.convert_to_tensor_v2_with_dispatch, nest.flatten(values)

154 )

155 x_recomp = tensor_conversion.convert_to_tensor_v2_with_dispatch(x)

156 # Avoid explicit conversion to x's dtype. This could result in faulty

157 # comparisons, for example if floats are converted to integers.

158 for i, b in enumerate(boundaries):

159 if b.dtype.base_dtype != x_recomp.dtype.base_dtype:

160 # We can promote int32 boundaries to int64 without loss of precision.

161 # This covers the most common case where the user passes in boundaries

162 # as an array of Python integers.

163 if (b.dtype.base_dtype == dtypes.int32 and

164 x_recomp.dtype.base_dtype == dtypes.int64):

165 b = math_ops.cast(b, x_recomp.dtype.base_dtype)

166 boundaries[i] = b

167 else:

168 raise ValueError(

169 "Boundaries (%s) must have the same dtype as x (%s)." %

170 (b.dtype.base_dtype, x_recomp.dtype.base_dtype))

171 for v in values[1:]:

172 if v.dtype.base_dtype != values[0].dtype.base_dtype:

173 raise ValueError(

174 "Values must have elements all with the same dtype (%s vs %s)." %

175 (values[0].dtype.base_dtype, v.dtype.base_dtype))

176 decayed_lr = learning_rate_schedule.PiecewiseConstantDecay(

177 boundaries, values, name=name)

178 if not context.executing_eagerly():

179 decayed_lr = decayed_lr(x)

180 else:

181 decayed_lr = functools.partial(decayed_lr, x)

182 return decayed_lr

183

184

185@tf_export(v1=["train.polynomial_decay"])

186def polynomial_decay(learning_rate,

187 global_step,

188 decay_steps,

189 end_learning_rate=0.0001,

190 power=1.0,

191 cycle=False,

192 name=None):

193 """Applies a polynomial decay to the learning rate.

194

195 It is commonly observed that a monotonically decreasing learning rate, whose

196 degree of change is carefully chosen, results in a better performing model.

197 This function applies a polynomial decay function to a provided initial

198 `learning_rate` to reach an `end_learning_rate` in the given `decay_steps`.

199

200 It requires a `global_step` value to compute the decayed learning rate. You

201 can just pass a TensorFlow variable that you increment at each training step.

202

203 The function returns the decayed learning rate. It is computed as:

204

205 ```python

206 global_step = min(global_step, decay_steps)

207 decayed_learning_rate = (learning_rate - end_learning_rate) *

208 (1 - global_step / decay_steps) ^ (power) +

209 end_learning_rate

210

211 ```

212

213 If `cycle` is True then a multiple of `decay_steps` is used, the first one

214 that is bigger than `global_steps`.

215

216 ```python

217 decay_steps = decay_steps * ceil(global_step / decay_steps)

218 decayed_learning_rate = (learning_rate - end_learning_rate) *

219 (1 - global_step / decay_steps) ^ (power) +

220 end_learning_rate

221

222 ```

223

224 Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5):

225

226 ```python

227 ...

228 global_step = tf.Variable(0, trainable=False)

229 starter_learning_rate = 0.1

230 end_learning_rate = 0.01

231 decay_steps = 10000

232 learning_rate = tf.compat.v1.train.polynomial_decay(starter_learning_rate,

233 global_step,

234 decay_steps, end_learning_rate,

235 power=0.5)

236 # Passing global_step to minimize() will increment it at each step.

237 learning_step = (

238 tf.compat.v1.train.GradientDescentOptimizer(learning_rate)

239 .minimize(...my loss..., global_step=global_step)

240 )

241 ```

242

243 Args:

244 learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number.

245 The initial learning rate.

246 global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global

247 step to use for the decay computation. Must not be negative.

248 decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Must

249 be positive. See the decay computation above.

250 end_learning_rate: A scalar `float32` or `float64` `Tensor` or a Python

251 number. The minimal end learning rate.

252 power: A scalar `float32` or `float64` `Tensor` or a Python number. The

253 power of the polynomial. Defaults to linear, 1.0.

254 cycle: A boolean, whether or not it should cycle beyond decay_steps.

255 name: String. Optional name of the operation. Defaults to

256 'PolynomialDecay'.

257

258 Returns:

259 A scalar `Tensor` of the same type as `learning_rate`. The decayed

260 learning rate.

261

262 Raises:

263 ValueError: if `global_step` is not supplied.

264

265 @compatibility(eager)

266 When eager execution is enabled, this function returns a function which in

267 turn returns the decayed learning rate Tensor. This can be useful for changing

268 the learning rate value across different invocations of optimizer functions.

269 @end_compatibility

270 """

271 decayed_lr = learning_rate_schedule.PolynomialDecay(

272 learning_rate,

273 decay_steps,

274 end_learning_rate=end_learning_rate,

275 power=power,

276 cycle=cycle,

277 name=name)

278

279 if not context.executing_eagerly():

280 decayed_lr = decayed_lr(global_step)

281 else:

282 decayed_lr = functools.partial(decayed_lr, global_step)

283 return decayed_lr

284

285

286@tf_export(v1=["train.natural_exp_decay"])

287def natural_exp_decay(learning_rate,

288 global_step,

289 decay_steps,

290 decay_rate,

291 staircase=False,

292 name=None):

293 """Applies natural exponential decay to the initial learning rate.

294

295 When training a model, it is often recommended to lower the learning rate as

296 the training progresses. This function applies an exponential decay function

297 to a provided initial learning rate. It requires an `global_step` value to

298 compute the decayed learning rate. You can just pass a TensorFlow variable

299 that you increment at each training step.

300

301 The function returns the decayed learning rate. It is computed as:

302

303 ```python

304 decayed_learning_rate = learning_rate * exp(-decay_rate * global_step /

305 decay_step)

306 ```

307

308 or, if `staircase` is `True`, as:

309

310 ```python

311 decayed_learning_rate = learning_rate * exp(-decay_rate * floor(global_step /

312 decay_step))

313 ```

314

315 Example: decay exponentially with a base of 0.96:

316

317 ```python

318 ...

319 global_step = tf.Variable(0, trainable=False)

320 learning_rate = 0.1

321 decay_steps = 5

322 k = 0.5

323 learning_rate = tf.compat.v1.train.natural_exp_decay(learning_rate,

324 global_step,

325 decay_steps, k)

326

327 # Passing global_step to minimize() will increment it at each step.

328 learning_step = (

329 tf.compat.v1.train.GradientDescentOptimizer(learning_rate)

330 .minimize(...my loss..., global_step=global_step)

331 )

332 ```

333

334 Args:

335 learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number.

336 The initial learning rate.

337 global_step: A Python number. Global step to use for the decay computation.

338 Must not be negative.

339 decay_steps: How often to apply decay.

340 decay_rate: A Python number. The decay rate.

341 staircase: Whether to apply decay in a discrete staircase, as opposed to

342 continuous, fashion.

343 name: String. Optional name of the operation. Defaults to

344 'ExponentialTimeDecay'.

345

346 Returns:

347 A scalar `Tensor` of the same type as `learning_rate`. The decayed

348 learning rate.

349

350 Raises:

351 ValueError: if `global_step` is not supplied.

352

353 @compatibility(eager)

354 When eager execution is enabled, this function returns a function which in

355 turn returns the decayed learning rate Tensor. This can be useful for changing

356 the learning rate value across different invocations of optimizer functions.

357 @end_compatibility

358 """

359 natural_exp_rate = math_ops.exp(math_ops.negative(decay_rate))

360 decayed_lr = learning_rate_schedule.ExponentialDecay(

361 learning_rate,

362 decay_steps,

363 natural_exp_rate,

364 staircase=staircase,

365 name=name)

366

367 if not context.executing_eagerly():

368 decayed_lr = decayed_lr(global_step)

369 else:

370 decayed_lr = functools.partial(decayed_lr, global_step)

371 return decayed_lr

372

373

374@tf_export(v1=["train.inverse_time_decay"])

375def inverse_time_decay(learning_rate,

376 global_step,

377 decay_steps,

378 decay_rate,

379 staircase=False,

380 name=None):

381 """Applies inverse time decay to the initial learning rate.

382

383 When training a model, it is often recommended to lower the learning rate as

384 the training progresses. This function applies an inverse decay function

385 to a provided initial learning rate. It requires an `global_step` value to

386 compute the decayed learning rate. You can just pass a TensorFlow variable

387 that you increment at each training step.

388

389 The function returns the decayed learning rate. It is computed as:

390

391 ```python

392 decayed_learning_rate = learning_rate / (1 + decay_rate * global_step /

393 decay_step)

394 ```

395

396 or, if `staircase` is `True`, as:

397

398 ```python

399 decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step /

400 decay_step))

401 ```

402

403 Example: decay 1/t with a rate of 0.5:

404

405 ```python

406 ...

407 global_step = tf.Variable(0, trainable=False)

408 learning_rate = 0.1

409 decay_steps = 1.0

410 decay_rate = 0.5

411 learning_rate = tf.compat.v1.train.inverse_time_decay(learning_rate,

412 global_step,

413 decay_steps, decay_rate)

414

415 # Passing global_step to minimize() will increment it at each step.

416 learning_step = (

417 tf.compat.v1.train.GradientDescentOptimizer(learning_rate)

418 .minimize(...my loss..., global_step=global_step)

419 )

420 ```

421

422 Args:

423 learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number.

424 The initial learning rate.

425 global_step: A Python number. Global step to use for the decay computation.

426 Must not be negative.

427 decay_steps: How often to apply decay.

428 decay_rate: A Python number. The decay rate.

429 staircase: Whether to apply decay in a discrete staircase, as opposed to

430 continuous, fashion.

431 name: String. Optional name of the operation. Defaults to

432 'InverseTimeDecay'.

433

434 Returns:

435 A scalar `Tensor` of the same type as `learning_rate`. The decayed

436 learning rate.

437

438 Raises:

439 ValueError: if `global_step` is not supplied.

440

441 @compatibility(eager)

442 When eager execution is enabled, this function returns a function which in

443 turn returns the decayed learning rate Tensor. This can be useful for changing

444 the learning rate value across different invocations of optimizer functions.

445 @end_compatibility

446 """

447 decayed_lr = learning_rate_schedule.InverseTimeDecay(

448 learning_rate, decay_steps, decay_rate, staircase=staircase, name=name)

449

450 if not context.executing_eagerly():

451 decayed_lr = decayed_lr(global_step)

452 else:

453 decayed_lr = functools.partial(decayed_lr, global_step)

454 return decayed_lr

455

456

457@tf_export(v1=["train.cosine_decay"])

458def cosine_decay(learning_rate, global_step, decay_steps, alpha=0.0, name=None):

459 """Applies cosine decay to the learning rate.

460

461 When training a model, it is often recommended to lower the learning rate as

462 the training progresses. This function applies a cosine decay function

463 to a provided initial learning rate. It requires a `global_step` value to

464 compute the decayed learning rate. You can just pass a TensorFlow variable

465 that you increment at each training step.

466

467 The function returns the decayed learning rate. It is computed as:

468 ```python

469 global_step = min(global_step, decay_steps)

470 cosine_decay = 0.5 * (1 + cos(pi * global_step / decay_steps))

471 decayed = (1 - alpha) * cosine_decay + alpha

472 decayed_learning_rate = learning_rate * decayed

473 ```

474

475 Example usage:

476 ```python

477 decay_steps = 1000

478 lr_decayed = cosine_decay(learning_rate, global_step, decay_steps)

479 ```

480

481 Args:

482 learning_rate: A scalar `float32` or `float64` Tensor or a Python number.

483 The initial learning rate.

484 global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global

485 step to use for the decay computation.

486 decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Number

487 of steps to decay over.

488 alpha: A scalar `float32` or `float64` Tensor or a Python number. Minimum

489 learning rate value as a fraction of learning_rate.

490 name: String. Optional name of the operation. Defaults to 'CosineDecay'.

491

492 Returns:

493 A scalar `Tensor` of the same type as `learning_rate`. The decayed

494 learning rate.

495 Raises:

496 ValueError: if `global_step` is not supplied.

497

498 References:

499 Stochastic Gradient Descent with Warm Restarts:

500 [Loshchilov et al., 2017]

501 (https://openreview.net/forum?id=Skq89Scxx&noteId=Skq89Scxx)

502 ([pdf](https://openreview.net/pdf?id=Skq89Scxx))

503

504 @compatibility(eager)

505 When eager execution is enabled, this function returns a function which in

506 turn returns the decayed learning rate Tensor. This can be useful for changing

507 the learning rate value across different invocations of optimizer functions.

508 @end_compatibility

509 """

510 decayed_lr = learning_rate_schedule.CosineDecay(

511 learning_rate, decay_steps, alpha=alpha, name=name)

512

513 if not context.executing_eagerly():

514 decayed_lr = decayed_lr(global_step)

515 else:

516 decayed_lr = functools.partial(decayed_lr, global_step)

517 return decayed_lr

518

519

520@tf_export(v1=["train.cosine_decay_restarts"])

521def cosine_decay_restarts(learning_rate,

522 global_step,

523 first_decay_steps,

524 t_mul=2.0,

525 m_mul=1.0,

526 alpha=0.0,

527 name=None):

528 """Applies cosine decay with restarts to the learning rate.

529

530 When training a model, it is often recommended to lower the learning rate as

531 the training progresses. This function applies a cosine decay function with

532 restarts to a provided initial learning rate. It requires a `global_step`

533 value to compute the decayed learning rate. You can just pass a TensorFlow

534 variable that you increment at each training step.

535

536 The function returns the decayed learning rate while taking into account

537 possible warm restarts. The learning rate multiplier first decays

538 from 1 to `alpha` for `first_decay_steps` steps. Then, a warm

539 restart is performed. Each new warm restart runs for `t_mul` times more steps

540 and with `m_mul` times smaller initial learning rate.

541

542 Example usage:

543 ```python

544 first_decay_steps = 1000

545 lr_decayed = cosine_decay_restarts(learning_rate, global_step,

546 first_decay_steps)

547 ```

548

549 Args:

550 learning_rate: A scalar `float32` or `float64` Tensor or a Python number.

551 The initial learning rate.

552 global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global

553 step to use for the decay computation.

554 first_decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number.

555 Number of steps to decay over.

556 t_mul: A scalar `float32` or `float64` `Tensor` or a Python number. Used to

557 derive the number of iterations in the i-th period

558 m_mul: A scalar `float32` or `float64` `Tensor` or a Python number.

559 Used to derive the initial learning rate of the i-th period:

560 alpha: A scalar `float32` or `float64` Tensor or a Python number. Minimum

561 learning rate value as a fraction of the learning_rate.

562 name: String. Optional name of the operation. Defaults to 'SGDRDecay'.

563

564 Returns:

565 A scalar `Tensor` of the same type as `learning_rate`. The decayed

566 learning rate.

567 Raises:

568 ValueError: if `global_step` is not supplied.

569

570 References:

571 Stochastic Gradient Descent with Warm Restarts:

572 [Loshchilov et al., 2017]

573 (https://openreview.net/forum?id=Skq89Scxx&noteId=Skq89Scxx)

574 ([pdf](https://openreview.net/pdf?id=Skq89Scxx))

575

576 @compatibility(eager)

577 When eager execution is enabled, this function returns a function which in

578 turn returns the decayed learning rate Tensor. This can be useful for changing

579 the learning rate value across different invocations of optimizer functions.

580 @end_compatibility

581 """

582 decayed_lr = learning_rate_schedule.CosineDecayRestarts(

583 learning_rate,

584 first_decay_steps,

585 t_mul=t_mul,

586 m_mul=m_mul,

587 alpha=alpha,

588 name=name)

589

590 if not context.executing_eagerly():

591 decayed_lr = decayed_lr(global_step)

592 else:

593 decayed_lr = functools.partial(decayed_lr, global_step)

594 return decayed_lr

595

596

597@tf_export(v1=["train.linear_cosine_decay"])

598def linear_cosine_decay(learning_rate,

599 global_step,

600 decay_steps,

601 num_periods=0.5,

602 alpha=0.0,

603 beta=0.001,

604 name=None):

605 """Applies linear cosine decay to the learning rate.

606

607 Note that linear cosine decay is more aggressive than cosine decay and

608 larger initial learning rates can typically be used.

609

610 When training a model, it is often recommended to lower the learning rate as

611 the training progresses. This function applies a linear cosine decay function

612 to a provided initial learning rate. It requires a `global_step` value to

613 compute the decayed learning rate. You can just pass a TensorFlow variable

614 that you increment at each training step.

615

616 The function returns the decayed learning rate. It is computed as:

617 ```python

618 global_step = min(global_step, decay_steps)

619 linear_decay = (decay_steps - global_step) / decay_steps)

620 cosine_decay = 0.5 * (

621 1 + cos(pi * 2 * num_periods * global_step / decay_steps))

622 decayed = (alpha + linear_decay) * cosine_decay + beta

623 decayed_learning_rate = learning_rate * decayed

624 ```

625

626 Example usage:

627 ```python

628 decay_steps = 1000

629 lr_decayed = linear_cosine_decay(learning_rate, global_step, decay_steps)

630 ```

631

632 Args:

633 learning_rate: A scalar `float32` or `float64` Tensor or a Python number.

634 The initial learning rate.

635 global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global

636 step to use for the decay computation.

637 decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Number

638 of steps to decay over.

639 num_periods: Number of periods in the cosine part of the decay. See

640 computation above.

641 alpha: See computation above.

642 beta: See computation above.

643 name: String. Optional name of the operation. Defaults to

644 'LinearCosineDecay'.

645

646 Returns:

647 A scalar `Tensor` of the same type as `learning_rate`. The decayed

648 learning rate.

649 Raises:

650 ValueError: if `global_step` is not supplied.

651

652 References:

653 Neural Optimizer Search with Reinforcement Learning:

654 [Bello et al., 2017](http://proceedings.mlr.press/v70/bello17a.html)

655 ([pdf](http://proceedings.mlr.press/v70/bello17a/bello17a.pdf))

656 Stochastic Gradient Descent with Warm Restarts:

657 [Loshchilov et al., 2017]

658 (https://openreview.net/forum?id=Skq89Scxx&noteId=Skq89Scxx)

659 ([pdf](https://openreview.net/pdf?id=Skq89Scxx))

660

661 @compatibility(eager)

662 When eager execution is enabled, this function returns a function which in

663 turn returns the decayed learning rate Tensor. This can be useful for changing

664 the learning rate value across different invocations of optimizer functions.

665 @end_compatibility

666 """

667 decayed_lr = learning_rate_schedule.LinearCosineDecay(

668 learning_rate,

669 decay_steps,

670 num_periods=num_periods,

671 alpha=alpha,

672 beta=beta,

673 name=name)

674

675 if not context.executing_eagerly():

676 decayed_lr = decayed_lr(global_step)

677 else:

678 decayed_lr = functools.partial(decayed_lr, global_step)

679 return decayed_lr

680

681

682@tf_export(v1=["train.noisy_linear_cosine_decay"])

683def noisy_linear_cosine_decay(learning_rate,

684 global_step,

685 decay_steps,

686 initial_variance=1.0,

687 variance_decay=0.55,

688 num_periods=0.5,

689 alpha=0.0,

690 beta=0.001,

691 name=None):

692 """Applies noisy linear cosine decay to the learning rate.

693

694 Note that linear cosine decay is more aggressive than cosine decay and

695 larger initial learning rates can typically be used.

696

697 When training a model, it is often recommended to lower the learning rate as

698 the training progresses. This function applies a noisy linear

699 cosine decay function to a provided initial learning rate.

700 It requires a `global_step` value to compute the decayed learning rate.

701 You can just pass a TensorFlow variable that you increment at each

702 training step.

703

704 The function returns the decayed learning rate. It is computed as:

705 ```python

706 global_step = min(global_step, decay_steps)

707 linear_decay = (decay_steps - global_step) / decay_steps)

708 cosine_decay = 0.5 * (

709 1 + cos(pi * 2 * num_periods * global_step / decay_steps))

710 decayed = (alpha + linear_decay + eps_t) * cosine_decay + beta

711 decayed_learning_rate = learning_rate * decayed

712 ```

713 where eps_t is 0-centered gaussian noise with variance

714 initial_variance / (1 + global_step) ** variance_decay

715

716 Example usage:

717 ```python

718 decay_steps = 1000

719 lr_decayed = noisy_linear_cosine_decay(

720 learning_rate, global_step, decay_steps)

721 ```

722

723 Args:

724 learning_rate: A scalar `float32` or `float64` Tensor or a Python number.

725 The initial learning rate.

726 global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global

727 step to use for the decay computation.

728 decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Number

729 of steps to decay over.

730 initial_variance: initial variance for the noise. See computation above.

731 variance_decay: decay for the noise's variance. See computation above.

732 num_periods: Number of periods in the cosine part of the decay. See

733 computation above.

734 alpha: See computation above.

735 beta: See computation above.

736 name: String. Optional name of the operation. Defaults to

737 'NoisyLinearCosineDecay'.

738

739 Returns:

740 A scalar `Tensor` of the same type as `learning_rate`. The decayed

741 learning rate.

742 Raises:

743 ValueError: if `global_step` is not supplied.

744

745 References:

746 Neural Optimizer Search with Reinforcement Learning:

747 [Bello et al., 2017](http://proceedings.mlr.press/v70/bello17a.html)

748 ([pdf](http://proceedings.mlr.press/v70/bello17a/bello17a.pdf))

749 Stochastic Gradient Descent with Warm Restarts:

750 [Loshchilov et al., 2017]

751 (https://openreview.net/forum?id=Skq89Scxx&noteId=Skq89Scxx)

752 ([pdf](https://openreview.net/pdf?id=Skq89Scxx))

753

754 @compatibility(eager)

755 When eager execution is enabled, this function returns a function which in

756 turn returns the decayed learning rate Tensor. This can be useful for changing

757 the learning rate value across different invocations of optimizer functions.

758 @end_compatibility

759 """

760 decayed_lr = learning_rate_schedule.NoisyLinearCosineDecay(

761 learning_rate,

762 decay_steps,

763 initial_variance=initial_variance,

764 variance_decay=variance_decay,

765 num_periods=num_periods,

766 alpha=alpha,

767 beta=beta,

768 name=name)

769

770 if not context.executing_eagerly():

771 decayed_lr = decayed_lr(global_step)

772 else:

773 decayed_lr = functools.partial(decayed_lr, global_step)

774 return decayed_lr

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay.py: 32%

85 statements