Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/activations.py: 55%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Built-in activation functions."""

17import sys

18import types

20import tensorflow.compat.v2 as tf

22import keras.src.layers.activation as activation_layers

23from keras.src import backend

24from keras.src.saving import object_registration

25from keras.src.saving import serialization_lib

26from keras.src.saving.legacy import serialization as legacy_serialization

27from keras.src.saving.legacy.saved_model import utils as saved_model_utils

28from keras.src.utils import generic_utils

30# isort: off

31from tensorflow.python.util.tf_export import keras_export

33# b/123041942

34# In TF 2.x, if the `tf.nn.softmax` is used as an activation function in Keras

35# layers, it gets serialized as 'softmax_v2' instead of 'softmax' as the

36# internal method name is returned in serialization. This results in errors in

37# model exporting and loading as Keras can't find any activation function with

38# the name of `softmax_v2`.

39# This dict maps the activation function name from its v2 version to its

40# canonical name.

41_TF_ACTIVATIONS_V2 = {

42 "softmax_v2": "softmax",

43}

46@keras_export("keras.activations.softmax")

47@tf.__internal__.dispatch.add_dispatch_support

48def softmax(x, axis=-1):

49 """Softmax converts a vector of values to a probability distribution.

51 The elements of the output vector are in range (0, 1) and sum to 1.

53 Each vector is handled independently. The `axis` argument sets which axis

54 of the input the function is applied along.

56 Softmax is often used as the activation for the last

57 layer of a classification network because the result could be interpreted as

58 a probability distribution.

60 The softmax of each vector x is computed as

61 `exp(x) / tf.reduce_sum(exp(x))`.

63 The input values in are the log-odds of the resulting probability.

65 Args:

66 x : Input tensor.

67 axis: Integer, axis along which the softmax normalization is applied.

69 Returns:

70 Tensor, output of softmax transformation (all values are non-negative

71 and sum to 1).

73 Examples:

75 **Example 1: standalone usage**

77 >>> inputs = tf.random.normal(shape=(32, 10))

78 >>> outputs = tf.keras.activations.softmax(inputs)

79 >>> tf.reduce_sum(outputs[0, :]) # Each sample in the batch now sums to 1

80 <tf.Tensor: shape=(), dtype=float32, numpy=1.0000001>

82 **Example 2: usage in a `Dense` layer**

84 >>> layer = tf.keras.layers.Dense(32,

85 ... activation=tf.keras.activations.softmax)

86 """

87 if x.shape.rank <= 1:

88 raise ValueError(

89 f"Cannot apply softmax to a tensor that is 1D. Received input: {x}"

90 )

92 if isinstance(axis, int):

93 output = tf.nn.softmax(x, axis=axis)

94 else:

95 # nn.softmax does not support tuple axis.

96 numerator = tf.exp(x - tf.reduce_max(x, axis=axis, keepdims=True))

97 denominator = tf.reduce_sum(numerator, axis=axis, keepdims=True)

98 output = numerator / denominator

100 # Cache the logits to use for crossentropy loss.

101 output._keras_logits = x

102 return output

103

104

105@keras_export("keras.activations.elu")

106@tf.__internal__.dispatch.add_dispatch_support

107def elu(x, alpha=1.0):

108 """Exponential Linear Unit.

109

110 The exponential linear unit (ELU) with `alpha > 0` is:

111 `x` if `x > 0` and

112 `alpha * (exp(x) - 1)` if `x < 0`

113 The ELU hyperparameter `alpha` controls the value to which an

114 ELU saturates for negative net inputs. ELUs diminish the

115 vanishing gradient effect.

116

117 ELUs have negative values which pushes the mean of the activations

118 closer to zero.

119 Mean activations that are closer to zero enable faster learning as they

120 bring the gradient closer to the natural gradient.

121 ELUs saturate to a negative value when the argument gets smaller.

122 Saturation means a small derivative which decreases the variation

123 and the information that is propagated to the next layer.

124

125 Example Usage:

126

127 >>> import tensorflow as tf

128 >>> model = tf.keras.Sequential()

129 >>> model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='elu',

130 ... input_shape=(28, 28, 1)))

131 >>> model.add(tf.keras.layers.MaxPooling2D((2, 2)))

132 >>> model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='elu'))

133 >>> model.add(tf.keras.layers.MaxPooling2D((2, 2)))

134 >>> model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='elu'))

135

136 <tensorflow.python.keras.engine.sequential.Sequential object ...>

137

138 Args:

139 x: Input tensor.

140 alpha: A scalar, slope of negative section. `alpha` controls the value

141 to which an ELU saturates for negative net inputs.

142

143 Returns:

144 The exponential linear unit (ELU) activation function: `x` if `x > 0`

145 and `alpha * (exp(x) - 1)` if `x < 0`.

146

147

148 Reference:

149 - [Fast and Accurate Deep Network Learning by Exponential Linear Units

150 (ELUs) (Clevert et al, 2016)](https://arxiv.org/abs/1511.07289)

151 """

152 return backend.elu(x, alpha)

153

154

155@keras_export("keras.activations.selu")

156@tf.__internal__.dispatch.add_dispatch_support

157def selu(x):

158 """Scaled Exponential Linear Unit (SELU).

159

160 The Scaled Exponential Linear Unit (SELU) activation function is defined as:

161

162 - `if x > 0: return scale * x`

163 - `if x < 0: return scale * alpha * (exp(x) - 1)`

164

165 where `alpha` and `scale` are pre-defined constants

166 (`alpha=1.67326324` and `scale=1.05070098`).

167

168 Basically, the SELU activation function multiplies `scale` (> 1) with the

169 output of the `tf.keras.activations.elu` function to ensure a slope larger

170 than one for positive inputs.

171

172 The values of `alpha` and `scale` are

173 chosen so that the mean and variance of the inputs are preserved

174 between two consecutive layers as long as the weights are initialized

175 correctly (see `tf.keras.initializers.LecunNormal` initializer)

176 and the number of input units is "large enough"

177 (see reference paper for more information).

178

179 Example Usage:

180

181 >>> num_classes = 10 # 10-class problem

182 >>> model = tf.keras.Sequential()

183 >>> model.add(tf.keras.layers.Dense(64, kernel_initializer='lecun_normal',

184 ... activation='selu'))

185 >>> model.add(tf.keras.layers.Dense(32, kernel_initializer='lecun_normal',

186 ... activation='selu'))

187 >>> model.add(tf.keras.layers.Dense(16, kernel_initializer='lecun_normal',

188 ... activation='selu'))

189 >>> model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

190

191 Args:

192 x: A tensor or variable to compute the activation function for.

193

194 Returns:

195 The scaled exponential unit activation: `scale * elu(x, alpha)`.

196

197 Notes:

198 - To be used together with the

199 `tf.keras.initializers.LecunNormal` initializer.

200 - To be used together with the dropout variant

201 `tf.keras.layers.AlphaDropout` (not regular dropout).

202

203 References:

204 - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515)

205 """

206 return tf.nn.selu(x)

207

208

209@keras_export("keras.activations.softplus")

210@tf.__internal__.dispatch.add_dispatch_support

211def softplus(x):

212 """Softplus activation function, `softplus(x) = log(exp(x) + 1)`.

213

214 Example Usage:

215

216 >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32)

217 >>> b = tf.keras.activations.softplus(a)

218 >>> b.numpy()

219 array([2.0611537e-09, 3.1326166e-01, 6.9314718e-01, 1.3132616e+00,

220 2.0000000e+01], dtype=float32)

221

222 Args:

223 x: Input tensor.

224

225 Returns:

226 The softplus activation: `log(exp(x) + 1)`.

227 """

228 return tf.math.softplus(x)

229

230

231@keras_export("keras.activations.softsign")

232@tf.__internal__.dispatch.add_dispatch_support

233def softsign(x):

234 """Softsign activation function, `softsign(x) = x / (abs(x) + 1)`.

235

236 Example Usage:

237

238 >>> a = tf.constant([-1.0, 0.0, 1.0], dtype = tf.float32)

239 >>> b = tf.keras.activations.softsign(a)

240 >>> b.numpy()

241 array([-0.5, 0. , 0.5], dtype=float32)

242

243 Args:

244 x: Input tensor.

245

246 Returns:

247 The softsign activation: `x / (abs(x) + 1)`.

248 """

249 return tf.math.softsign(x)

250

251

252@keras_export("keras.activations.swish")

253@tf.__internal__.dispatch.add_dispatch_support

254def swish(x):

255 """Swish activation function, `swish(x) = x * sigmoid(x)`.

256

257 Swish activation function which returns `x*sigmoid(x)`.

258 It is a smooth, non-monotonic function that consistently matches

259 or outperforms ReLU on deep networks, it is unbounded above and

260 bounded below.

261

262

263 Example Usage:

264

265 >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32)

266 >>> b = tf.keras.activations.swish(a)

267 >>> b.numpy()

268 array([-4.1223075e-08, -2.6894143e-01, 0.0000000e+00, 7.3105860e-01,

269 2.0000000e+01], dtype=float32)

270

271 Args:

272 x: Input tensor.

273

274 Returns:

275 The swish activation applied to `x` (see reference paper for details).

276

277 Reference:

278 - [Ramachandran et al., 2017](https://arxiv.org/abs/1710.05941)

279 """

280 return tf.nn.silu(x)

281

282

283@keras_export("keras.activations.relu")

284@tf.__internal__.dispatch.add_dispatch_support

285def relu(x, alpha=0.0, max_value=None, threshold=0.0):

286 """Applies the rectified linear unit activation function.

287

288 With default values, this returns the standard ReLU activation:

289 `max(x, 0)`, the element-wise maximum of 0 and the input tensor.

290

291 Modifying default parameters allows you to use non-zero thresholds,

292 change the max value of the activation,

293 and to use a non-zero multiple of the input for values below the threshold.

294

295 Example:

296

297 >>> foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32)

298 >>> tf.keras.activations.relu(foo).numpy()

299 array([ 0., 0., 0., 5., 10.], dtype=float32)

300 >>> tf.keras.activations.relu(foo, alpha=0.5).numpy()

301 array([-5. , -2.5, 0. , 5. , 10. ], dtype=float32)

302 >>> tf.keras.activations.relu(foo, max_value=5.).numpy()

303 array([0., 0., 0., 5., 5.], dtype=float32)

304 >>> tf.keras.activations.relu(foo, threshold=5.).numpy()

305 array([-0., -0., 0., 0., 10.], dtype=float32)

306

307 Args:

308 x: Input `tensor` or `variable`.

309 alpha: A `float` that governs the slope for values lower than the

310 threshold.

311 max_value: A `float` that sets the saturation threshold (the largest

312 value the function will return).

313 threshold: A `float` giving the threshold value of the activation

314 function below which values will be damped or set to zero.

315

316 Returns:

317 A `Tensor` representing the input tensor,

318 transformed by the relu activation function.

319 Tensor will be of the same shape and dtype of input `x`.

320 """

321 return backend.relu(

322 x, alpha=alpha, max_value=max_value, threshold=threshold

323 )

324

325

326@keras_export("keras.activations.gelu", v1=[])

327@tf.__internal__.dispatch.add_dispatch_support

328def gelu(x, approximate=False):

329 """Applies the Gaussian error linear unit (GELU) activation function.

330

331 Gaussian error linear unit (GELU) computes

332 `x * P(X <= x)`, where `P(X) ~ N(0, 1)`.

333 The (GELU) nonlinearity weights inputs by their value, rather than gates

334 inputs by their sign as in ReLU.

335

336 Example:

337

338 >>> x = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype=tf.float32)

339 >>> y = tf.keras.activations.gelu(x)

340 >>> y.numpy()

341 array([-0.00404951, -0.15865529, 0. , 0.8413447 , 2.9959507 ],

342 dtype=float32)

343 >>> y = tf.keras.activations.gelu(x, approximate=True)

344 >>> y.numpy()

345 array([-0.00363752, -0.15880796, 0. , 0.841192 , 2.9963627 ],

346 dtype=float32)

347

348 Args:

349 x: Input tensor.

350 approximate: A `bool`, whether to enable approximation.

351

352 Returns:

353 The gaussian error linear activation:

354 `0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))`

355 if `approximate` is `True` or

356 `x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`,

357 where `P(X) ~ N(0, 1)`,

358 if `approximate` is `False`.

359

360 Reference:

361 - [Gaussian Error Linear Units (GELUs)](https://arxiv.org/abs/1606.08415)

362 """

363 return tf.nn.gelu(x, approximate)

364

365

366@keras_export("keras.activations.tanh")

367@tf.__internal__.dispatch.add_dispatch_support

368def tanh(x):

369 """Hyperbolic tangent activation function.

370

371 Example:

372

373 >>> a = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype = tf.float32)

374 >>> b = tf.keras.activations.tanh(a)

375 >>> b.numpy()

376 array([-0.9950547, -0.7615942, 0., 0.7615942, 0.9950547], dtype=float32)

377

378 Args:

379 x: Input tensor.

380

381 Returns:

382 Tensor of same shape and dtype of input `x`, with tanh activation:

383 `tanh(x) = sinh(x)/cosh(x) = ((exp(x) - exp(-x))/(exp(x) + exp(-x)))`.

384 """

385 return tf.tanh(x)

386

387

388@keras_export("keras.activations.sigmoid")

389@tf.__internal__.dispatch.add_dispatch_support

390def sigmoid(x):

391 """Sigmoid activation function, `sigmoid(x) = 1 / (1 + exp(-x))`.

392

393 Applies the sigmoid activation function. For small values (<-5),

394 `sigmoid` returns a value close to zero, and for large values (>5)

395 the result of the function gets close to 1.

396

397 Sigmoid is equivalent to a 2-element Softmax, where the second element is

398 assumed to be zero. The sigmoid function always returns a value between

399 0 and 1.

400

401 Example:

402

403 >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32)

404 >>> b = tf.keras.activations.sigmoid(a)

405 >>> b.numpy()

406 array([2.0611537e-09, 2.6894143e-01, 5.0000000e-01, 7.3105860e-01,

407 1.0000000e+00], dtype=float32)

408

409 Args:

410 x: Input tensor.

411

412 Returns:

413 Tensor with the sigmoid activation: `1 / (1 + exp(-x))`.

414 """

415 output = tf.sigmoid(x)

416 # Cache the logits to use for crossentropy loss.

417 output._keras_logits = x

418 return output

419

420

421@keras_export("keras.activations.exponential")

422@tf.__internal__.dispatch.add_dispatch_support

423def exponential(x):

424 """Exponential activation function.

425

426 Example:

427

428 >>> a = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype = tf.float32)

429 >>> b = tf.keras.activations.exponential(a)

430 >>> b.numpy()

431 array([0.04978707, 0.36787945, 1., 2.7182817 , 20.085537], dtype=float32)

432

433 Args:

434 x: Input tensor.

435

436 Returns:

437 Tensor with exponential activation: `exp(x)`.

438 """

439 return tf.exp(x)

440

441

442@keras_export("keras.activations.hard_sigmoid")

443@tf.__internal__.dispatch.add_dispatch_support

444def hard_sigmoid(x):

445 """Hard sigmoid activation function.

446

447 A faster approximation of the sigmoid activation.

448 Piecewise linear approximation of the sigmoid function.

449 Ref: 'https://en.wikipedia.org/wiki/Hard_sigmoid'

450

451 Example:

452

453 >>> a = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype = tf.float32)

454 >>> b = tf.keras.activations.hard_sigmoid(a)

455 >>> b.numpy()

456 array([0. , 0.3, 0.5, 0.7, 1. ], dtype=float32)

457

458 Args:

459 x: Input tensor.

460

461 Returns:

462 The hard sigmoid activation, defined as:

463

464 - `if x < -2.5: return 0`

465 - `if x > 2.5: return 1`

466 - `if -2.5 <= x <= 2.5: return 0.2 * x + 0.5`

467 """

468 return backend.hard_sigmoid(x)

469

470

471@keras_export("keras.activations.linear")

472@tf.__internal__.dispatch.add_dispatch_support

473def linear(x):

474 """Linear activation function (pass-through).

475

476 Example:

477

478 >>> a = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype = tf.float32)

479 >>> b = tf.keras.activations.linear(a)

480 >>> b.numpy()

481 array([-3., -1., 0., 1., 3.], dtype=float32)

482

483 Args:

484 x: Input tensor.

485

486 Returns:

487 The input, unmodified.

488 """

489 return x

490

491

492@keras_export("keras.activations.mish")

493@tf.__internal__.dispatch.add_dispatch_support

494def mish(x):

495 """Mish activation function.

496

497 It is defined as:

498

499 ```python

500 def mish(x):

501 return x * tanh(softplus(x))

502 ```

503

504 where `softplus` is defined as:

505

506 ```python

507 def softplus(x):

508 return log(exp(x) + 1)

509 ```

510

511 Example:

512

513 >>> a = tf.constant([-3.0, -1.0, 0.0, 1.0], dtype = tf.float32)

514 >>> b = tf.keras.activations.mish(a)

515 >>> b.numpy()

516 array([-0.14564745, -0.30340144, 0., 0.86509836], dtype=float32)

517

518 Args:

519 x: Input tensor.

520

521 Returns:

522 The mish activation.

523

524 Reference:

525 - [Mish: A Self Regularized Non-Monotonic

526 Activation Function](https://arxiv.org/abs/1908.08681)

527 """

528 return x * tf.math.tanh(tf.math.softplus(x))

529

530

531@keras_export("keras.activations.serialize")

532@tf.__internal__.dispatch.add_dispatch_support

533def serialize(activation, use_legacy_format=False):

534 """Returns the string identifier of an activation function.

535

536 Args:

537 activation : Function object.

538

539 Returns:

540 String denoting the name attribute of the input function

541

542 Example:

543

544 >>> tf.keras.activations.serialize(tf.keras.activations.tanh)

545 'tanh'

546 >>> tf.keras.activations.serialize(tf.keras.activations.sigmoid)

547 'sigmoid'

548 >>> tf.keras.activations.serialize('abcd')

549 Traceback (most recent call last):

550 ...

551 ValueError: Unknown activation function 'abcd' cannot be serialized.

552

553 Raises:

554 ValueError: The input function is not a valid one.

555 """

556 if (

557 hasattr(activation, "__name__")

558 and activation.__name__ in _TF_ACTIVATIONS_V2

559 ):

560 return _TF_ACTIVATIONS_V2[activation.__name__]

561

562 if use_legacy_format:

563 return legacy_serialization.serialize_keras_object(activation)

564

565 fn_config = serialization_lib.serialize_keras_object(activation)

566 if (

567 not tf.__internal__.tf2.enabled()

568 or saved_model_utils.in_tf_saved_model_scope()

569 ):

570 return fn_config

571 if "config" not in fn_config:

572 raise ValueError(

573 f"Unknown activation function '{activation}' cannot be "

574 "serialized due to invalid function name. Make sure to use "

575 "an activation name that matches the references defined in "

576 "activations.py or use "

577 "`@keras.saving.register_keras_serializable()` "

578 "to register any custom activations. "

579 f"config={fn_config}"

580 )

581 if not isinstance(activation, types.FunctionType):

582 # Case for additional custom activations represented by objects

583 return fn_config

584 if (

585 isinstance(fn_config["config"], str)

586 and fn_config["config"] not in globals()

587 ):

588 # Case for custom activation functions from external activations modules

589 fn_config["config"] = object_registration.get_registered_name(

590 activation

591 )

592 return fn_config

593 return fn_config["config"]

594 # Case for keras.activations builtins (simply return name)

595

596

597# Add additional globals so that deserialize() can find these common activation

598# functions

599leaky_relu = tf.nn.leaky_relu

600log_softmax = tf.nn.log_softmax

601relu6 = tf.nn.relu6

602silu = tf.nn.silu

603

604

605@keras_export("keras.activations.deserialize")

606@tf.__internal__.dispatch.add_dispatch_support

607def deserialize(name, custom_objects=None, use_legacy_format=False):

608 """Returns activation function given a string identifier.

609

610 Args:

611 name: The name of the activation function.

612 custom_objects: Optional `{function_name: function_obj}`

613 dictionary listing user-provided activation functions.

614

615 Returns:

616 Corresponding activation function.

617

618 Example:

619

620 >>> tf.keras.activations.deserialize('linear')

621 <function linear at 0x1239596a8>

622 >>> tf.keras.activations.deserialize('sigmoid')

623 <function sigmoid at 0x123959510>

624 >>> tf.keras.activations.deserialize('abcd')

625 Traceback (most recent call last):

626 ...

627 ValueError: Unknown activation function 'abcd' cannot be deserialized.

628

629 Raises:

630 ValueError: `Unknown activation function` if the input string does not

631 denote any defined Tensorflow activation function.

632 """

633 activation_functions = {}

634 current_module = sys.modules[__name__]

635

636 # we put 'current_module' after 'activation_layers' to prefer the local one

637 # if there is a collision

638 generic_utils.populate_dict_with_module_objects(

639 activation_functions,

640 (activation_layers, current_module),

641 obj_filter=callable,

642 )

643

644 if use_legacy_format:

645 return legacy_serialization.deserialize_keras_object(

646 name,

647 module_objects=activation_functions,

648 custom_objects=custom_objects,

649 printable_module_name="activation function",

650 )

651

652 returned_fn = serialization_lib.deserialize_keras_object(

653 name,

654 module_objects=activation_functions,

655 custom_objects=custom_objects,

656 printable_module_name="activation function",

657 )

658

659 if isinstance(returned_fn, str):

660 raise ValueError(

661 f"Unknown activation function '{name}' cannot be deserialized."

662 )

663

664 return returned_fn

665

666

667@keras_export("keras.activations.get")

668@tf.__internal__.dispatch.add_dispatch_support

669def get(identifier):

670 """Returns function.

671

672 Args:

673 identifier: Function or string

674

675 Returns:

676 Function corresponding to the input string or input function.

677

678 Example:

679

680 >>> tf.keras.activations.get('softmax')

681 <function softmax at 0x1222a3d90>

682 >>> tf.keras.activations.get(tf.keras.activations.softmax)

683 <function softmax at 0x1222a3d90>

684 >>> tf.keras.activations.get(None)

685 <function linear at 0x1239596a8>

686 >>> tf.keras.activations.get(abs)

687 <built-in function abs>

688 >>> tf.keras.activations.get('abcd')

689 Traceback (most recent call last):

690 ...

691 ValueError: Unknown activation function:abcd

692

693 Raises:

694 ValueError: Input is an unknown function or string, i.e., the input does

695 not denote any defined function.

696 """

697 if identifier is None:

698 return linear

699 if isinstance(identifier, (str, dict)):

700 use_legacy_format = (

701 "module" not in identifier

702 if isinstance(identifier, dict)

703 else False

704 )

705 return deserialize(identifier, use_legacy_format=use_legacy_format)

706 elif callable(identifier):

707 return identifier

708 raise TypeError(

709 f"Could not interpret activation function identifier: {identifier}"

710 )

711