Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/initializers/initializers.py: 30%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Keras initializers."""

17import math

18import warnings

20import tensorflow.compat.v2 as tf

22from keras.src import backend

23from keras.src.dtensor import utils

24from keras.src.saving import serialization_lib

26# isort: off

27from tensorflow.python.util.tf_export import keras_export

29_PARTITION_SHAPE = "partition_shape"

30_PARTITION_OFFSET = "partition_offset"

31_LAYOUT = "layout"

32_ALLOWED_INITIALIZER_KWARGS = [_PARTITION_SHAPE, _PARTITION_OFFSET, _LAYOUT]

35@keras_export("keras.initializers.Initializer")

36class Initializer:

37 """Initializer base class: all Keras initializers inherit from this class.

39 Initializers should implement a `__call__()` method with the following

40 signature:

42 ```python

43 def __call__(self, shape, dtype=None, **kwargs):

44 # returns a tensor of shape `shape` and dtype `dtype`

45 # containing values drawn from a distribution of your choice.

46 return tf.random.uniform(shape=shape, dtype=dtype)

47 ```

49 Optionally, you an also implement the method `get_config()` and the class

50 method `from_config()` in order to support serialization -- just like with

51 any Keras object.

53 Here's a simple example: a random normal initializer.

55 ```python

56 class ExampleRandomNormal(Initializer):

57 def __init__(self, mean, stddev):

58 self.mean = mean

59 self.stddev = stddev

61 def __call__(self, shape, dtype=None, **kwargs):

62 return tf.random.normal(

63 shape, mean=self.mean, stddev=self.stddev, dtype=dtype

64 )

66 def get_config(self): # To support serialization

67 return {"mean": self.mean, "stddev": self.stddev}

68 ```

70 Note that we don't have to implement `from_config()` in the example above

71 since the constructor arguments of the class the keys in the config returned

72 by `get_config` are the same. In this case, the default `from_config()`

73 works fine.

74 """

76 def __call__(self, shape, dtype=None, **kwargs):

77 """Returns a tensor object initialized as specified by the initializer.

79 Args:

80 shape: Shape of the tensor.

81 dtype: Optional dtype of the tensor.

82 **kwargs: Additional keyword arguments.

83 """

84 raise NotImplementedError(

85 "Initializer subclasses must implement the `__call__()` method."

86 )

88 def get_config(self):

89 """Returns the initializer's configuration as a JSON-serializable dict.

91 Returns:

92 A JSON-serializable Python dict.

93 """

94 return {}

96 @classmethod

97 def from_config(cls, config):

98 """Instantiates an initializer from a configuration dictionary.

100 Example:

101

102 ```python

103 initializer = RandomUniform(-1, 1)

104 config = initializer.get_config()

105 initializer = RandomUniform.from_config(config)

106 ```

107

108 Args:

109 config: A Python dictionary, the output of `get_config()`.

110

111 Returns:

112 An `Initializer` instance.

113 """

114 config.pop("dtype", None)

115 return cls(**config)

116

117 def _warn_reuse(self):

118 if getattr(self, "_used", False):

119 if getattr(self, "seed", None) is None:

120 warnings.warn(

121 f"The initializer {self.__class__.__name__} is unseeded "

122 "and being called multiple times, which will return "

123 "identical values each time (even if the initializer is "

124 "unseeded). Please update your code to provide a seed to "

125 "the initializer, or avoid using the same initializer "

126 "instance more than once."

127 )

128 else:

129 self._used = True

130

131

132@keras_export("keras.initializers.Zeros", "keras.initializers.zeros", v1=[])

133class Zeros(Initializer):

134 """Initializer that generates tensors initialized to 0.

135

136 Also available via the shortcut function `tf.keras.initializers.zeros`.

137

138 Examples:

139

140 >>> # Standalone usage:

141 >>> initializer = tf.keras.initializers.Zeros()

142 >>> values = initializer(shape=(2, 2))

143

144 >>> # Usage in a Keras layer:

145 >>> initializer = tf.keras.initializers.Zeros()

146 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

147 """

148

149 def __call__(self, shape, dtype=None, **kwargs):

150 """Returns a tensor object initialized as specified by the initializer.

151

152 Args:

153 shape: Shape of the tensor.

154 dtype: Optional dtype of the tensor. Only numeric or boolean dtypes

155 are supported. If not specified, `keras.backend.floatx()` is

156 used, which defaults to `float32` unless you configured it

157 otherwise (via `keras.backend.set_floatx(float_dtype)`).

158 **kwargs: Additional keyword arguments.

159 """

160 _validate_kwargs(self.__class__.__name__, kwargs)

161 dtype = _get_dtype(dtype)

162 if not dtype.is_numpy_compatible or dtype == tf.string:

163 raise ValueError(f"Expected numeric or boolean dtype, got {dtype}.")

164 if _PARTITION_SHAPE in kwargs:

165 shape = kwargs[_PARTITION_SHAPE]

166 layout = kwargs.pop("layout", None)

167 if layout:

168 return utils.call_with_layout(

169 tf.zeros, layout, shape=shape, dtype=dtype

170 )

171 return tf.zeros(shape, dtype)

172

173

174@keras_export("keras.initializers.Ones", "keras.initializers.ones", v1=[])

175class Ones(Initializer):

176 """Initializer that generates tensors initialized to 1.

177

178 Also available via the shortcut function `tf.keras.initializers.ones`.

179

180 Examples:

181

182 >>> # Standalone usage:

183 >>> initializer = tf.keras.initializers.Ones()

184 >>> values = initializer(shape=(2, 2))

185

186 >>> # Usage in a Keras layer:

187 >>> initializer = tf.keras.initializers.Ones()

188 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

189 """

190

191 def __call__(self, shape, dtype=None, **kwargs):

192 """Returns a tensor object initialized as specified by the initializer.

193

194 Args:

195 shape: Shape of the tensor.

196 dtype: Optional dtype of the tensor. Only numeric or boolean dtypes

197 are supported. If not specified, `keras.backend.floatx()` is

198 used, which defaults to `float32` unless you configured it

199 otherwise (via `keras.backend.set_floatx(float_dtype)`).

200 **kwargs: Additional keyword arguments.

201 """

202 _validate_kwargs(self.__class__.__name__, kwargs)

203 dtype = _get_dtype(dtype)

204 if not dtype.is_numpy_compatible or dtype == tf.string:

205 raise ValueError(f"Expected numeric or boolean dtype, got {dtype}.")

206 if _PARTITION_SHAPE in kwargs:

207 shape = kwargs[_PARTITION_SHAPE]

208 layout = kwargs.pop("layout", None)

209 if layout:

210 return utils.call_with_layout(

211 tf.ones, layout, shape=shape, dtype=dtype

212 )

213 return tf.ones(shape, dtype)

214

215

216@keras_export(

217 "keras.initializers.Constant", "keras.initializers.constant", v1=[]

218)

219class Constant(Initializer):

220 """Initializer that generates tensors with constant values.

221

222 Also available via the shortcut function `tf.keras.initializers.constant`.

223

224 Only scalar values are allowed.

225 The constant value provided must be convertible to the dtype requested

226 when calling the initializer.

227

228 Examples:

229

230 >>> # Standalone usage:

231 >>> initializer = tf.keras.initializers.Constant(3.)

232 >>> values = initializer(shape=(2, 2))

233

234 >>> # Usage in a Keras layer:

235 >>> initializer = tf.keras.initializers.Constant(3.)

236 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

237

238 Args:

239 value: A Python scalar.

240 """

241

242 def __init__(self, value=0):

243 self.value = value

244

245 def __call__(self, shape, dtype=None, **kwargs):

246 """Returns a tensor object initialized to `self.value`.

247

248 Args:

249 shape: Shape of the tensor.

250 dtype: Optional dtype of the tensor. If not specified,

251 `keras.backend.floatx()` is used,

252 which defaults to `float32` unless you configured it

253 otherwise (via `keras.backend.set_floatx(float_dtype)`).

254 **kwargs: Additional keyword arguments.

255 """

256 _validate_kwargs(self.__class__.__name__, kwargs)

257 dtype = _get_dtype(dtype)

258 if _PARTITION_SHAPE in kwargs:

259 shape = kwargs[_PARTITION_SHAPE]

260 layout = kwargs.pop("layout", None)

261 if layout:

262 return utils.call_with_layout(

263 tf.constant, layout, self.value, shape=shape, dtype=dtype

264 )

265 return tf.constant(self.value, dtype=_get_dtype(dtype), shape=shape)

266

267 def get_config(self):

268 return {"value": self.value}

269

270 @classmethod

271 def from_config(cls, config):

272 config.pop("dtype", None)

273 if "value" in config:

274 if isinstance(config["value"], dict):

275 config["value"] = serialization_lib.deserialize_keras_object(

276 config["value"]

277 )

278 return cls(**config)

279

280

281@keras_export(

282 "keras.initializers.RandomUniform",

283 "keras.initializers.random_uniform",

284 v1=[],

285)

286class RandomUniform(Initializer):

287 """Initializer that generates tensors with a uniform distribution.

288

289 Also available via the shortcut function

290 `tf.keras.initializers.random_uniform`.

291

292 Examples:

293

294 >>> # Standalone usage:

295 >>> initializer = tf.keras.initializers.RandomUniform(minval=0., maxval=1.)

296 >>> values = initializer(shape=(2, 2))

297

298 >>> # Usage in a Keras layer:

299 >>> initializer = tf.keras.initializers.RandomUniform(minval=0., maxval=1.)

300 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

301

302 Args:

303 minval: A python scalar or a scalar tensor. Lower bound of the range of

304 random values to generate (inclusive).

305 maxval: A python scalar or a scalar tensor. Upper bound of the range of

306 random values to generate (exclusive).

307 seed: A Python integer. Used to make the behavior of the initializer

308 deterministic. Note that a seeded initializer will produce the same

309 random values across multiple calls.

310 """

311

312 def __init__(self, minval=-0.05, maxval=0.05, seed=None):

313 self.minval = minval

314 self.maxval = maxval

315 self.seed = seed

316 self._random_generator = backend.RandomGenerator(

317 seed, rng_type="stateless"

318 )

319

320 def __call__(self, shape, dtype=None, **kwargs):

321 """Returns a tensor object initialized as specified by the initializer.

322

323 Args:

324 shape: Shape of the tensor.

325 dtype: Optional dtype of the tensor. Only floating point and integer

326 types are supported. If not specified,

327 `tf.keras.backend.floatx()` is used,

328 which default to `float32` unless you configured it otherwise

329 (via `tf.keras.backend.set_floatx(float_dtype)`).

330 **kwargs: Additional keyword arguments.

331 """

332 _validate_kwargs(self.__class__.__name__, kwargs)

333 dtype = _get_dtype(dtype)

334 if not dtype.is_floating and not dtype.is_integer:

335 raise ValueError(f"Expected float or integer dtype, got {dtype}.")

336 if _PARTITION_SHAPE in kwargs:

337 shape = kwargs[_PARTITION_SHAPE]

338 partition_offset = kwargs.get(_PARTITION_OFFSET, None)

339 if partition_offset is None:

340 # We skip the reuse warning for partitioned variable, since the same

341 # initializer will be called multiple times for each partition.

342 self._warn_reuse()

343 nonce = hash(partition_offset) if partition_offset else None

344 layout = kwargs.pop("layout", None)

345 if layout:

346 _ensure_keras_seeded()

347 return utils.call_with_layout(

348 self._random_generator.random_uniform,

349 layout,

350 shape,

351 self.minval,

352 self.maxval,

353 dtype,

354 nonce,

355 )

356 return self._random_generator.random_uniform(

357 shape, self.minval, self.maxval, dtype, nonce

358 )

359

360 def get_config(self):

361 return {"minval": self.minval, "maxval": self.maxval, "seed": self.seed}

362

363

364@keras_export(

365 "keras.initializers.RandomNormal", "keras.initializers.random_normal", v1=[]

366)

367class RandomNormal(Initializer):

368 """Initializer that generates tensors with a normal distribution.

369

370 Also available via the shortcut function

371 `tf.keras.initializers.random_normal`.

372

373 Examples:

374

375 >>> # Standalone usage:

376 >>> initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)

377 >>> values = initializer(shape=(2, 2))

378

379 >>> # Usage in a Keras layer:

380 >>> initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)

381 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

382

383 Args:

384 mean: a python scalar or a scalar tensor. Mean of the random values to

385 generate.

386 stddev: a python scalar or a scalar tensor. Standard deviation of the

387 random values to generate.

388 seed: A Python integer. Used to make the behavior of the initializer

389 deterministic. Note that a seeded initializer will produce the same

390 random values across multiple calls.

391 """

392

393 def __init__(self, mean=0.0, stddev=0.05, seed=None):

394 self.mean = mean

395 self.stddev = stddev

396 self.seed = seed

397 self._random_generator = backend.RandomGenerator(

398 seed, rng_type="stateless"

399 )

400

401 def __call__(self, shape, dtype=None, **kwargs):

402 """Returns a tensor object initialized to random normal values.

403

404 Args:

405 shape: Shape of the tensor.

406 dtype: Optional dtype of the tensor. Only floating point types are

407 supported. If not specified, `tf.keras.backend.floatx()` is used,

408 which default to `float32` unless you configured it otherwise (via

409 `tf.keras.backend.set_floatx(float_dtype)`)

410 **kwargs: Additional keyword arguments.

411 """

412 _validate_kwargs(self.__class__.__name__, kwargs)

413 dtype = _assert_float_dtype(_get_dtype(dtype))

414 if _PARTITION_SHAPE in kwargs:

415 shape = kwargs[_PARTITION_SHAPE]

416 partition_offset = kwargs.get(_PARTITION_OFFSET, None)

417 if partition_offset is None:

418 # We skip the reuse warning for partitioned variable, since the same

419 # initializer will be called multiple times for each partition.

420 self._warn_reuse()

421 nonce = hash(partition_offset) if partition_offset else None

422 layout = kwargs.pop("layout", None)

423 if layout:

424 _ensure_keras_seeded()

425 return utils.call_with_layout(

426 self._random_generator.random_normal,

427 layout,

428 shape,

429 self.mean,

430 self.stddev,

431 dtype,

432 nonce,

433 )

434 return self._random_generator.random_normal(

435 shape, self.mean, self.stddev, dtype, nonce

436 )

437

438 def get_config(self):

439 return {"mean": self.mean, "stddev": self.stddev, "seed": self.seed}

440

441

442@keras_export(

443 "keras.initializers.TruncatedNormal",

444 "keras.initializers.truncated_normal",

445 v1=[],

446)

447class TruncatedNormal(Initializer):

448 """Initializer that generates a truncated normal distribution.

449

450 Also available via the shortcut function

451 `tf.keras.initializers.truncated_normal`.

452

453 The values generated are similar to values from a

454 `tf.keras.initializers.RandomNormal` initializer except that values more

455 than two standard deviations from the mean are

456 discarded and re-drawn.

457

458 Examples:

459

460 >>> # Standalone usage:

461 >>> initializer = tf.keras.initializers.TruncatedNormal(mean=0., stddev=1.)

462 >>> values = initializer(shape=(2, 2))

463

464 >>> # Usage in a Keras layer:

465 >>> initializer = tf.keras.initializers.TruncatedNormal(mean=0., stddev=1.)

466 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

467

468 Args:

469 mean: a python scalar or a scalar tensor. Mean of the random values

470 to generate.

471 stddev: a python scalar or a scalar tensor. Standard deviation of the

472 random values to generate before truncation.

473 seed: A Python integer. Used to make the behavior of the initializer

474 deterministic. Note that a seeded initializer will produce the same

475 random values across multiple calls.

476 """

477

478 def __init__(self, mean=0.0, stddev=0.05, seed=None):

479 self.mean = mean

480 self.stddev = stddev

481 self.seed = seed

482 self._random_generator = backend.RandomGenerator(

483 seed, rng_type="stateless"

484 )

485

486 def __call__(self, shape, dtype=None, **kwargs):

487 """Returns a tensor initialized to random normal values (truncated).

488

489 Args:

490 shape: Shape of the tensor.

491 dtype: Optional dtype of the tensor. Only floating point types are

492 supported. If not specified, `tf.keras.backend.floatx()` is used,

493 which default to `float32` unless you configured it otherwise (via

494 `tf.keras.backend.set_floatx(float_dtype)`)

495 **kwargs: Additional keyword arguments.

496 """

497 _validate_kwargs(self.__class__.__name__, kwargs)

498 dtype = _assert_float_dtype(_get_dtype(dtype))

499 if _PARTITION_SHAPE in kwargs:

500 shape = kwargs[_PARTITION_SHAPE]

501 partition_offset = kwargs.get(_PARTITION_OFFSET, None)

502 if partition_offset is None:

503 # We skip the reuse warning for partitioned variable, since the same

504 # initializer will be called multiple times for each partition.

505 self._warn_reuse()

506 nonce = hash(partition_offset) if partition_offset else None

507 layout = kwargs.pop("layout", None)

508 if layout:

509 # TODO(scottzhu): Remove this once the forward compat period above

510 # is expired.

511 self._random_generator._rng_type = (

512 self._random_generator.RNG_STATEFUL

513 )

514 _ensure_keras_seeded()

515 return utils.call_with_layout(

516 self._random_generator.truncated_normal,

517 layout,

518 shape,

519 self.mean,

520 self.stddev,

521 dtype,

522 nonce,

523 )

524 return self._random_generator.truncated_normal(

525 shape, self.mean, self.stddev, dtype, nonce

526 )

527

528 def get_config(self):

529 return {"mean": self.mean, "stddev": self.stddev, "seed": self.seed}

530

531

532@keras_export(

533 "keras.initializers.VarianceScaling",

534 "keras.initializers.variance_scaling",

535 v1=[],

536)

537class VarianceScaling(Initializer):

538 """Initializer that adapts its scale to the shape of its input tensors.

539

540 Also available via the shortcut function

541 `tf.keras.initializers.variance_scaling`.

542

543 With `distribution="truncated_normal" or "untruncated_normal"`, samples are

544 drawn from a truncated/untruncated normal distribution with a mean of zero

545 and a standard deviation (after truncation, if used) `stddev = sqrt(scale /

546 n)`, where `n` is:

547

548 - number of input units in the weight tensor, if `mode="fan_in"`

549 - number of output units, if `mode="fan_out"`

550 - average of the numbers of input and output units, if `mode="fan_avg"`

551

552 With `distribution="uniform"`, samples are drawn from a uniform distribution

553 within `[-limit, limit]`, where `limit = sqrt(3 * scale / n)`.

554

555 Examples:

556

557 >>> # Standalone usage:

558 >>> initializer = tf.keras.initializers.VarianceScaling(

559 ... scale=0.1, mode='fan_in', distribution='uniform')

560 >>> values = initializer(shape=(2, 2))

561

562 >>> # Usage in a Keras layer:

563 >>> initializer = tf.keras.initializers.VarianceScaling(

564 ... scale=0.1, mode='fan_in', distribution='uniform')

565 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

566

567 Args:

568 scale: Scaling factor (positive float).

569 mode: One of `"fan_in"`, `"fan_out"`, `"fan_avg"`.

570 distribution: Random distribution to use. One of `"truncated_normal"`,

571 `"untruncated_normal"`, or `"uniform"`.

572 seed: A Python integer. Used to make the behavior of the initializer

573 deterministic. Note that a seeded initializer will produce the same

574 random values across multiple calls.

575 """

576

577 def __init__(

578 self,

579 scale=1.0,

580 mode="fan_in",

581 distribution="truncated_normal",

582 seed=None,

583 ):

584 if scale <= 0.0:

585 raise ValueError(

586 f"`scale` must be positive float. Received: scale={scale}."

587 )

588 allowed_modes = {"fan_in", "fan_out", "fan_avg"}

589 if mode not in allowed_modes:

590 raise ValueError(

591 f"Invalid `mode` argument: {mode}. "

592 f"Please use one of the {allowed_modes}."

593 )

594 distribution = distribution.lower()

595 # Compatibility with keras-team/keras.

596 if distribution == "normal":

597 distribution = "truncated_normal"

598 allowed_distributions = {

599 "uniform",

600 "truncated_normal",

601 "untruncated_normal",

602 }

603 if distribution not in allowed_distributions:

604 raise ValueError(

605 f"Invalid `distribution` argument: {distribution}."

606 f"Allowed distributions: {allowed_distributions}."

607 )

608 self.scale = scale

609 self.mode = mode

610 self.distribution = distribution

611 self.seed = seed

612 self._random_generator = backend.RandomGenerator(

613 seed, rng_type="stateless"

614 )

615

616 def __call__(self, shape, dtype=None, **kwargs):

617 """Returns a tensor object initialized as specified by the initializer.

618

619 Args:

620 shape: Shape of the tensor.

621 dtype: Optional dtype of the tensor. Only floating point types are

622 supported. If not specified, `tf.keras.backend.floatx()` is used,

623 which default to `float32` unless you configured it otherwise (via

624 `tf.keras.backend.set_floatx(float_dtype)`)

625 **kwargs: Additional keyword arguments.

626 """

627 _validate_kwargs(self.__class__.__name__, kwargs)

628 dtype = _assert_float_dtype(_get_dtype(dtype))

629 if _PARTITION_SHAPE in kwargs:

630 shape = kwargs[_PARTITION_SHAPE]

631 partition_offset = kwargs.get(_PARTITION_OFFSET, None)

632 if partition_offset is None:

633 # We skip the reuse warning for partitioned variable, since the same

634 # initializer will be called multiple times for each partition.

635 self._warn_reuse()

636 nonce = hash(partition_offset) if partition_offset else None

637 layout = kwargs.pop("layout", None)

638 if layout:

639 _ensure_keras_seeded()

640 return utils.call_with_layout(

641 self._generate_init_val,

642 layout,

643 shape=shape,

644 dtype=dtype,

645 nonce=nonce,

646 )

647 return self._generate_init_val(shape=shape, dtype=dtype, nonce=nonce)

648

649 def _generate_init_val(self, shape, dtype, nonce):

650 scale = self.scale

651 fan_in, fan_out = _compute_fans(shape)

652 if self.mode == "fan_in":

653 scale /= max(1.0, fan_in)

654 elif self.mode == "fan_out":

655 scale /= max(1.0, fan_out)

656 else:

657 scale /= max(1.0, (fan_in + fan_out) / 2.0)

658 if self.distribution == "truncated_normal":

659 # constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0.,

660 # scale=1.)

661 stddev = math.sqrt(scale) / 0.87962566103423978

662 return self._random_generator.truncated_normal(

663 shape, 0.0, stddev, dtype, nonce

664 )

665 elif self.distribution == "untruncated_normal":

666 stddev = math.sqrt(scale)

667 return self._random_generator.random_normal(

668 shape, 0.0, stddev, dtype, nonce

669 )

670 else:

671 limit = math.sqrt(3.0 * scale)

672 return self._random_generator.random_uniform(

673 shape, -limit, limit, dtype, nonce

674 )

675

676 def get_config(self):

677 return {

678 "scale": self.scale,

679 "mode": self.mode,

680 "distribution": self.distribution,

681 "seed": self.seed,

682 }

683

684

685@keras_export(

686 "keras.initializers.Orthogonal", "keras.initializers.orthogonal", v1=[]

687)

688class Orthogonal(Initializer):

689 """Initializer that generates an orthogonal matrix.

690

691 Also available via the shortcut function `tf.keras.initializers.orthogonal`.

692

693 If the shape of the tensor to initialize is two-dimensional, it is

694 initialized with an orthogonal matrix obtained from the QR decomposition of

695 a matrix of random numbers drawn from a normal distribution. If the matrix

696 has fewer rows than columns then the output will have orthogonal rows.

697 Otherwise, the output will have orthogonal columns.

698

699 If the shape of the tensor to initialize is more than two-dimensional,

700 a matrix of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])`

701 is initialized, where `n` is the length of the shape vector.

702 The matrix is subsequently reshaped to give a tensor of the desired shape.

703

704 Examples:

705

706 >>> # Standalone usage:

707 >>> initializer = tf.keras.initializers.Orthogonal()

708 >>> values = initializer(shape=(2, 2))

709

710 >>> # Usage in a Keras layer:

711 >>> initializer = tf.keras.initializers.Orthogonal()

712 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

713

714 Args:

715 gain: multiplicative factor to apply to the orthogonal matrix

716 seed: A Python integer. Used to make the behavior of the initializer

717 deterministic. Note that a seeded initializer will produce the same

718 random values across multiple calls.

719

720 References:

721 - [Saxe et al., 2014](https://openreview.net/forum?id=_wzZwKpTDF_9C)

722 """

723

724 def __init__(self, gain=1.0, seed=None):

725 self.gain = gain

726 self.seed = seed

727 self._random_generator = backend.RandomGenerator(

728 seed, rng_type="stateless"

729 )

730

731 def __call__(self, shape, dtype=None, **kwargs):

732 """Returns a tensor object initialized to an orthogonal matrix.

733

734 Args:

735 shape: Shape of the tensor.

736 dtype: Optional dtype of the tensor. Only floating point types are

737 supported. If not specified, `tf.keras.backend.floatx()` is used,

738 which default to `float32` unless you configured it otherwise

739 (via `tf.keras.backend.set_floatx(float_dtype)`)

740 **kwargs: Additional keyword arguments.

741 """

742 _validate_kwargs(

743 self.__class__.__name__, kwargs, support_partition=False

744 )

745 dtype = _assert_float_dtype(_get_dtype(dtype))

746 # Check the shape

747 if len(shape) < 2:

748 raise ValueError(

749 "The tensor to initialize must be "

750 "at least two-dimensional. Received: "

751 f"shape={shape} of rank {len(shape)}."

752 )

753 self._warn_reuse()

754 layout = kwargs.pop("layout", None)

755 if layout:

756 _ensure_keras_seeded()

757 return utils.call_with_layout(

758 self._generate_init_val, layout, shape=shape, dtype=dtype

759 )

760 return self._generate_init_val(shape, dtype)

761

762 def _generate_init_val(self, shape, dtype):

763 # Flatten the input shape with the last dimension remaining

764 # its original shape so it works for conv2d

765 num_rows = 1

766 for dim in shape[:-1]:

767 num_rows *= dim

768 num_cols = shape[-1]

769 flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows))

770

771 # Generate a random matrix

772 a = self._random_generator.random_normal(flat_shape, dtype=dtype)

773 # Compute the qr factorization

774 q, r = tf.linalg.qr(a, full_matrices=False)

775 # Make Q uniform

776 d = tf.linalg.tensor_diag_part(r)

777 q *= tf.sign(d)

778 if num_rows < num_cols:

779 q = tf.linalg.matrix_transpose(q)

780 return self.gain * tf.reshape(q, shape)

781

782 def get_config(self):

783 return {"gain": self.gain, "seed": self.seed}

784

785

786@keras_export(

787 "keras.initializers.Identity", "keras.initializers.identity", v1=[]

788)

789class Identity(Initializer):

790 """Initializer that generates the identity matrix.

791

792 Also available via the shortcut function `tf.keras.initializers.identity`.

793

794 Only usable for generating 2D matrices.

795

796 Examples:

797

798 >>> # Standalone usage:

799 >>> initializer = tf.keras.initializers.Identity()

800 >>> values = initializer(shape=(2, 2))

801

802 >>> # Usage in a Keras layer:

803 >>> initializer = tf.keras.initializers.Identity()

804 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

805

806 Args:

807 gain: Multiplicative factor to apply to the identity matrix.

808 """

809

810 def __init__(self, gain=1.0):

811 self.gain = gain

812

813 def __call__(self, shape, dtype=None, **kwargs):

814 """Returns a tensor object initialized to a 2D identity matrix.

815

816 Args:

817 shape: Shape of the tensor. It should have exactly rank 2.

818 dtype: Optional dtype of the tensor. Only floating point types are

819 supported. If not specified, `tf.keras.backend.floatx()` is used,

820 which default to `float32` unless you configured it otherwise

821 (via `tf.keras.backend.set_floatx(float_dtype)`)

822 **kwargs: Additional keyword arguments.

823 """

824 _validate_kwargs(

825 self.__class__.__name__, kwargs, support_partition=False

826 )

827 dtype = _assert_float_dtype(_get_dtype(dtype))

828 if len(shape) != 2:

829 raise ValueError(

830 "Identity matrix initializer can only be used for 2D matrices. "

831 f"Received: shape={shape} of rank {len(shape)}."

832 )

833 layout = kwargs.pop("layout", None)

834 if layout:

835 return utils.call_with_layout(

836 self._generate_init_val, layout, shape=shape, dtype=dtype

837 )

838 return self._generate_init_val(shape, dtype)

839

840 def _generate_init_val(self, shape, dtype):

841 initializer = tf.eye(*shape, dtype=dtype)

842 return self.gain * initializer

843

844 def get_config(self):

845 return {"gain": self.gain}

846

847

848@keras_export(

849 "keras.initializers.GlorotUniform",

850 "keras.initializers.glorot_uniform",

851 v1=[],

852)

853class GlorotUniform(VarianceScaling):

854 """The Glorot uniform initializer, also called Xavier uniform initializer.

855

856 Also available via the shortcut function

857 `tf.keras.initializers.glorot_uniform`.

858

859 Draws samples from a uniform distribution within `[-limit, limit]`, where

860 `limit = sqrt(6 / (fan_in + fan_out))` (`fan_in` is the number of input

861 units in the weight tensor and `fan_out` is the number of output units).

862

863 Examples:

864

865 >>> # Standalone usage:

866 >>> initializer = tf.keras.initializers.GlorotUniform()

867 >>> values = initializer(shape=(2, 2))

868

869 >>> # Usage in a Keras layer:

870 >>> initializer = tf.keras.initializers.GlorotUniform()

871 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

872

873 Args:

874 seed: A Python integer. Used to make the behavior of the initializer

875 deterministic. Note that a seeded initializer will not produce the same

876 random values across multiple calls, but multiple initializers will

877 produce the same sequence when constructed with the same seed value.

878

879 References:

880 - [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)

881 """

882

883 def __init__(self, seed=None):

884 super().__init__(

885 scale=1.0, mode="fan_avg", distribution="uniform", seed=seed

886 )

887

888 def get_config(self):

889 return {"seed": self.seed}

890

891

892@keras_export(

893 "keras.initializers.GlorotNormal", "keras.initializers.glorot_normal", v1=[]

894)

895class GlorotNormal(VarianceScaling):

896 """The Glorot normal initializer, also called Xavier normal initializer.

897

898 Also available via the shortcut function

899 `tf.keras.initializers.glorot_normal`.

900

901 Draws samples from a truncated normal distribution centered on 0 with

902 `stddev = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number of

903 input units in the weight tensor and `fan_out` is the number of output units

904 in the weight tensor.

905

906 Examples:

907

908 >>> # Standalone usage:

909 >>> initializer = tf.keras.initializers.GlorotNormal()

910 >>> values = initializer(shape=(2, 2))

911

912 >>> # Usage in a Keras layer:

913 >>> initializer = tf.keras.initializers.GlorotNormal()

914 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

915

916 Args:

917 seed: A Python integer. Used to make the behavior of the initializer

918 deterministic. Note that a seeded initializer will not produce the same

919 random values across multiple calls, but multiple initializers will

920 produce the same sequence when constructed with the same seed value.

921

922 References:

923 - [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)

924 """

925

926 def __init__(self, seed=None):

927 super().__init__(

928 scale=1.0,

929 mode="fan_avg",

930 distribution="truncated_normal",

931 seed=seed,

932 )

933

934 def get_config(self):

935 return {"seed": self.seed}

936

937

938@keras_export(

939 "keras.initializers.LecunNormal", "keras.initializers.lecun_normal", v1=[]

940)

941class LecunNormal(VarianceScaling):

942 """Lecun normal initializer.

943

944 Also available via the shortcut function

945 `tf.keras.initializers.lecun_normal`.

946

947 Initializers allow you to pre-specify an initialization strategy, encoded in

948 the Initializer object, without knowing the shape and dtype of the variable

949 being initialized.

950

951 Draws samples from a truncated normal distribution centered on 0 with

952 `stddev = sqrt(1 / fan_in)` where `fan_in` is the number of input units in

953 the weight tensor.

954

955 Examples:

956

957 >>> # Standalone usage:

958 >>> initializer = tf.keras.initializers.LecunNormal()

959 >>> values = initializer(shape=(2, 2))

960

961 >>> # Usage in a Keras layer:

962 >>> initializer = tf.keras.initializers.LecunNormal()

963 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

964

965 Args:

966 seed: A Python integer. Used to make the behavior of the initializer

967 deterministic. Note that a seeded initializer will not produce the same

968 random values across multiple calls, but multiple initializers will

969 produce the same sequence when constructed with the same seed value.

970

971 References:

972 - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515)

973 """

974

975 def __init__(self, seed=None):

976 super().__init__(

977 scale=1.0, mode="fan_in", distribution="truncated_normal", seed=seed

978 )

979

980 def get_config(self):

981 return {"seed": self.seed}

982

983

984@keras_export(

985 "keras.initializers.LecunUniform", "keras.initializers.lecun_uniform", v1=[]

986)

987class LecunUniform(VarianceScaling):

988 """Lecun uniform initializer.

989

990 Also available via the shortcut function

991 `tf.keras.initializers.lecun_uniform`.

992

993 Draws samples from a uniform distribution within `[-limit, limit]`, where

994 `limit = sqrt(3 / fan_in)` (`fan_in` is the number of input units in the

995 weight tensor).

996

997 Examples:

998

999 >>> # Standalone usage:

1000 >>> initializer = tf.keras.initializers.LecunUniform()

1001 >>> values = initializer(shape=(2, 2))

1002

1003 >>> # Usage in a Keras layer:

1004 >>> initializer = tf.keras.initializers.LecunUniform()

1005 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

1006

1007 Args:

1008 seed: A Python integer. Used to make the behavior of the initializer

1009 deterministic. Note that a seeded initializer will not produce the same

1010 random values across multiple calls, but multiple initializers will

1011 produce the same sequence when constructed with the same seed value.

1012

1013 References:

1014 - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515)

1015 """

1016

1017 def __init__(self, seed=None):

1018 super().__init__(

1019 scale=1.0, mode="fan_in", distribution="uniform", seed=seed

1020 )

1021

1022 def get_config(self):

1023 return {"seed": self.seed}

1024

1025

1026@keras_export(

1027 "keras.initializers.HeNormal", "keras.initializers.he_normal", v1=[]

1028)

1029class HeNormal(VarianceScaling):

1030 """He normal initializer.

1031

1032 Also available via the shortcut function

1033 `tf.keras.initializers.he_normal`.

1034

1035 It draws samples from a truncated normal distribution centered on 0 with

1036 `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in

1037 the weight tensor.

1038

1039 Examples:

1040

1041 >>> # Standalone usage:

1042 >>> initializer = tf.keras.initializers.HeNormal()

1043 >>> values = initializer(shape=(2, 2))

1044

1045 >>> # Usage in a Keras layer:

1046 >>> initializer = tf.keras.initializers.HeNormal()

1047 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

1048

1049 Args:

1050 seed: A Python integer. Used to make the behavior of the initializer

1051 deterministic. Note that a seeded initializer will not produce the same

1052 random values across multiple calls, but multiple initializers will

1053 produce the same sequence when constructed with the same seed value.

1054

1055 References:

1056 - [He et al., 2015](https://arxiv.org/abs/1502.01852)

1057 """

1058

1059 def __init__(self, seed=None):

1060 super().__init__(

1061 scale=2.0, mode="fan_in", distribution="truncated_normal", seed=seed

1062 )

1063

1064 def get_config(self):

1065 return {"seed": self.seed}

1066

1067

1068@keras_export(

1069 "keras.initializers.HeUniform", "keras.initializers.he_uniform", v1=[]

1070)

1071class HeUniform(VarianceScaling):

1072 """He uniform variance scaling initializer.

1073

1074 Also available via the shortcut function

1075 `tf.keras.initializers.he_uniform`.

1076

1077 Draws samples from a uniform distribution within `[-limit, limit]`, where

1078 `limit = sqrt(6 / fan_in)` (`fan_in` is the number of input units in the

1079 weight tensor).

1080

1081 Examples:

1082

1083 >>> # Standalone usage:

1084 >>> initializer = tf.keras.initializers.HeUniform()

1085 >>> values = initializer(shape=(2, 2))

1086

1087 >>> # Usage in a Keras layer:

1088 >>> initializer = tf.keras.initializers.HeUniform()

1089 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)

1090

1091 Args:

1092 seed: A Python integer. Used to make the behavior of the initializer

1093 deterministic. Note that a seeded initializer will not produce the same

1094 random values across multiple calls, but multiple initializers will

1095 produce the same sequence when constructed with the same seed value.

1096

1097 References:

1098 - [He et al., 2015](https://arxiv.org/abs/1502.01852)

1099 """

1100

1101 def __init__(self, seed=None):

1102 super().__init__(

1103 scale=2.0, mode="fan_in", distribution="uniform", seed=seed

1104 )

1105

1106 def get_config(self):

1107 return {"seed": self.seed}

1108

1109

1110def _get_dtype(dtype):

1111 if dtype is None:

1112 dtype = backend.floatx()

1113 return tf.as_dtype(dtype)

1114

1115

1116def _assert_float_dtype(dtype):

1117 """Validate and return floating point type based on `dtype`.

1118

1119 `dtype` must be a floating point type.

1120

1121 Args:

1122 dtype: The data type to validate.

1123

1124 Returns:

1125 Validated type.

1126

1127 Raises:

1128 ValueError: if `dtype` is not a floating point type.

1129 """

1130 dtype = tf.as_dtype(dtype)

1131 if not dtype.is_floating:

1132 raise ValueError(f"Expected floating point type, got {dtype}.")

1133 return dtype

1134

1135

1136def _compute_fans(shape):

1137 """Computes the number of input and output units for a weight shape.

1138

1139 Args:

1140 shape: Integer shape tuple or TF tensor shape.

1141

1142 Returns:

1143 A tuple of integer scalars (fan_in, fan_out).

1144 """

1145 if len(shape) < 1: # Just to avoid errors for constants.

1146 fan_in = fan_out = 1

1147 elif len(shape) == 1:

1148 fan_in = fan_out = shape[0]

1149 elif len(shape) == 2:

1150 fan_in = shape[0]

1151 fan_out = shape[1]

1152 else:

1153 # Assuming convolution kernels (2D, 3D, or more).

1154 # kernel shape: (..., input_depth, depth)

1155 receptive_field_size = 1

1156 for dim in shape[:-2]:

1157 receptive_field_size *= dim

1158 fan_in = shape[-2] * receptive_field_size

1159 fan_out = shape[-1] * receptive_field_size

1160 return int(fan_in), int(fan_out)

1161

1162

1163def _validate_kwargs(cls_name, kwargs, support_partition=True):

1164 invalid_kwargs = [k for k in kwargs if k not in _ALLOWED_INITIALIZER_KWARGS]

1165 if invalid_kwargs:

1166 raise TypeError(

1167 f"Unknown keyword arguments: {invalid_kwargs}. Allowed "

1168 f"keyword arguments: {_ALLOWED_INITIALIZER_KWARGS}."

1169 )

1170 if not support_partition and (

1171 _PARTITION_SHAPE in kwargs or _PARTITION_OFFSET in kwargs

1172 ):

1173 raise ValueError(

1174 f"{cls_name} initializer doesn't support "

1175 "partition-related arguments."

1176 )

1177

1178

1179def _ensure_keras_seeded():

1180 """Make sure the keras.backend global seed generator is set.

1181

1182 This is important for DTensor use case to ensure that each client are

1183 initialized with same seed for tf.random.Generator, so that the value

1184 created are in sync among all the clients.

1185 """

1186 if not getattr(backend._SEED_GENERATOR, "generator", None):

1187 raise ValueError(

1188 "When using DTensor APIs, you need to set the global seed "

1189 "before using any Keras initializers. Please make sure "

1190 "to call `tf.keras.utils.set_random_seed()` in your code."

1191 )

1192