Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/initializers/initializers.py: 30%
316 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Keras initializers."""
17import math
18import warnings
20import tensorflow.compat.v2 as tf
22from keras.src import backend
23from keras.src.dtensor import utils
24from keras.src.saving import serialization_lib
26# isort: off
27from tensorflow.python.util.tf_export import keras_export
29_PARTITION_SHAPE = "partition_shape"
30_PARTITION_OFFSET = "partition_offset"
31_LAYOUT = "layout"
32_ALLOWED_INITIALIZER_KWARGS = [_PARTITION_SHAPE, _PARTITION_OFFSET, _LAYOUT]
35@keras_export("keras.initializers.Initializer")
36class Initializer:
37 """Initializer base class: all Keras initializers inherit from this class.
39 Initializers should implement a `__call__()` method with the following
40 signature:
42 ```python
43 def __call__(self, shape, dtype=None, **kwargs):
44 # returns a tensor of shape `shape` and dtype `dtype`
45 # containing values drawn from a distribution of your choice.
46 return tf.random.uniform(shape=shape, dtype=dtype)
47 ```
49 Optionally, you an also implement the method `get_config()` and the class
50 method `from_config()` in order to support serialization -- just like with
51 any Keras object.
53 Here's a simple example: a random normal initializer.
55 ```python
56 class ExampleRandomNormal(Initializer):
57 def __init__(self, mean, stddev):
58 self.mean = mean
59 self.stddev = stddev
61 def __call__(self, shape, dtype=None, **kwargs):
62 return tf.random.normal(
63 shape, mean=self.mean, stddev=self.stddev, dtype=dtype
64 )
66 def get_config(self): # To support serialization
67 return {"mean": self.mean, "stddev": self.stddev}
68 ```
70 Note that we don't have to implement `from_config()` in the example above
71 since the constructor arguments of the class the keys in the config returned
72 by `get_config` are the same. In this case, the default `from_config()`
73 works fine.
74 """
76 def __call__(self, shape, dtype=None, **kwargs):
77 """Returns a tensor object initialized as specified by the initializer.
79 Args:
80 shape: Shape of the tensor.
81 dtype: Optional dtype of the tensor.
82 **kwargs: Additional keyword arguments.
83 """
84 raise NotImplementedError(
85 "Initializer subclasses must implement the `__call__()` method."
86 )
88 def get_config(self):
89 """Returns the initializer's configuration as a JSON-serializable dict.
91 Returns:
92 A JSON-serializable Python dict.
93 """
94 return {}
96 @classmethod
97 def from_config(cls, config):
98 """Instantiates an initializer from a configuration dictionary.
100 Example:
102 ```python
103 initializer = RandomUniform(-1, 1)
104 config = initializer.get_config()
105 initializer = RandomUniform.from_config(config)
106 ```
108 Args:
109 config: A Python dictionary, the output of `get_config()`.
111 Returns:
112 An `Initializer` instance.
113 """
114 config.pop("dtype", None)
115 return cls(**config)
117 def _warn_reuse(self):
118 if getattr(self, "_used", False):
119 if getattr(self, "seed", None) is None:
120 warnings.warn(
121 f"The initializer {self.__class__.__name__} is unseeded "
122 "and being called multiple times, which will return "
123 "identical values each time (even if the initializer is "
124 "unseeded). Please update your code to provide a seed to "
125 "the initializer, or avoid using the same initializer "
126 "instance more than once."
127 )
128 else:
129 self._used = True
132@keras_export("keras.initializers.Zeros", "keras.initializers.zeros", v1=[])
133class Zeros(Initializer):
134 """Initializer that generates tensors initialized to 0.
136 Also available via the shortcut function `tf.keras.initializers.zeros`.
138 Examples:
140 >>> # Standalone usage:
141 >>> initializer = tf.keras.initializers.Zeros()
142 >>> values = initializer(shape=(2, 2))
144 >>> # Usage in a Keras layer:
145 >>> initializer = tf.keras.initializers.Zeros()
146 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
147 """
149 def __call__(self, shape, dtype=None, **kwargs):
150 """Returns a tensor object initialized as specified by the initializer.
152 Args:
153 shape: Shape of the tensor.
154 dtype: Optional dtype of the tensor. Only numeric or boolean dtypes
155 are supported. If not specified, `keras.backend.floatx()` is
156 used, which defaults to `float32` unless you configured it
157 otherwise (via `keras.backend.set_floatx(float_dtype)`).
158 **kwargs: Additional keyword arguments.
159 """
160 _validate_kwargs(self.__class__.__name__, kwargs)
161 dtype = _get_dtype(dtype)
162 if not dtype.is_numpy_compatible or dtype == tf.string:
163 raise ValueError(f"Expected numeric or boolean dtype, got {dtype}.")
164 if _PARTITION_SHAPE in kwargs:
165 shape = kwargs[_PARTITION_SHAPE]
166 layout = kwargs.pop("layout", None)
167 if layout:
168 return utils.call_with_layout(
169 tf.zeros, layout, shape=shape, dtype=dtype
170 )
171 return tf.zeros(shape, dtype)
174@keras_export("keras.initializers.Ones", "keras.initializers.ones", v1=[])
175class Ones(Initializer):
176 """Initializer that generates tensors initialized to 1.
178 Also available via the shortcut function `tf.keras.initializers.ones`.
180 Examples:
182 >>> # Standalone usage:
183 >>> initializer = tf.keras.initializers.Ones()
184 >>> values = initializer(shape=(2, 2))
186 >>> # Usage in a Keras layer:
187 >>> initializer = tf.keras.initializers.Ones()
188 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
189 """
191 def __call__(self, shape, dtype=None, **kwargs):
192 """Returns a tensor object initialized as specified by the initializer.
194 Args:
195 shape: Shape of the tensor.
196 dtype: Optional dtype of the tensor. Only numeric or boolean dtypes
197 are supported. If not specified, `keras.backend.floatx()` is
198 used, which defaults to `float32` unless you configured it
199 otherwise (via `keras.backend.set_floatx(float_dtype)`).
200 **kwargs: Additional keyword arguments.
201 """
202 _validate_kwargs(self.__class__.__name__, kwargs)
203 dtype = _get_dtype(dtype)
204 if not dtype.is_numpy_compatible or dtype == tf.string:
205 raise ValueError(f"Expected numeric or boolean dtype, got {dtype}.")
206 if _PARTITION_SHAPE in kwargs:
207 shape = kwargs[_PARTITION_SHAPE]
208 layout = kwargs.pop("layout", None)
209 if layout:
210 return utils.call_with_layout(
211 tf.ones, layout, shape=shape, dtype=dtype
212 )
213 return tf.ones(shape, dtype)
216@keras_export(
217 "keras.initializers.Constant", "keras.initializers.constant", v1=[]
218)
219class Constant(Initializer):
220 """Initializer that generates tensors with constant values.
222 Also available via the shortcut function `tf.keras.initializers.constant`.
224 Only scalar values are allowed.
225 The constant value provided must be convertible to the dtype requested
226 when calling the initializer.
228 Examples:
230 >>> # Standalone usage:
231 >>> initializer = tf.keras.initializers.Constant(3.)
232 >>> values = initializer(shape=(2, 2))
234 >>> # Usage in a Keras layer:
235 >>> initializer = tf.keras.initializers.Constant(3.)
236 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
238 Args:
239 value: A Python scalar.
240 """
242 def __init__(self, value=0):
243 self.value = value
245 def __call__(self, shape, dtype=None, **kwargs):
246 """Returns a tensor object initialized to `self.value`.
248 Args:
249 shape: Shape of the tensor.
250 dtype: Optional dtype of the tensor. If not specified,
251 `keras.backend.floatx()` is used,
252 which defaults to `float32` unless you configured it
253 otherwise (via `keras.backend.set_floatx(float_dtype)`).
254 **kwargs: Additional keyword arguments.
255 """
256 _validate_kwargs(self.__class__.__name__, kwargs)
257 dtype = _get_dtype(dtype)
258 if _PARTITION_SHAPE in kwargs:
259 shape = kwargs[_PARTITION_SHAPE]
260 layout = kwargs.pop("layout", None)
261 if layout:
262 return utils.call_with_layout(
263 tf.constant, layout, self.value, shape=shape, dtype=dtype
264 )
265 return tf.constant(self.value, dtype=_get_dtype(dtype), shape=shape)
267 def get_config(self):
268 return {"value": self.value}
270 @classmethod
271 def from_config(cls, config):
272 config.pop("dtype", None)
273 if "value" in config:
274 if isinstance(config["value"], dict):
275 config["value"] = serialization_lib.deserialize_keras_object(
276 config["value"]
277 )
278 return cls(**config)
281@keras_export(
282 "keras.initializers.RandomUniform",
283 "keras.initializers.random_uniform",
284 v1=[],
285)
286class RandomUniform(Initializer):
287 """Initializer that generates tensors with a uniform distribution.
289 Also available via the shortcut function
290 `tf.keras.initializers.random_uniform`.
292 Examples:
294 >>> # Standalone usage:
295 >>> initializer = tf.keras.initializers.RandomUniform(minval=0., maxval=1.)
296 >>> values = initializer(shape=(2, 2))
298 >>> # Usage in a Keras layer:
299 >>> initializer = tf.keras.initializers.RandomUniform(minval=0., maxval=1.)
300 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
302 Args:
303 minval: A python scalar or a scalar tensor. Lower bound of the range of
304 random values to generate (inclusive).
305 maxval: A python scalar or a scalar tensor. Upper bound of the range of
306 random values to generate (exclusive).
307 seed: A Python integer. Used to make the behavior of the initializer
308 deterministic. Note that a seeded initializer will produce the same
309 random values across multiple calls.
310 """
312 def __init__(self, minval=-0.05, maxval=0.05, seed=None):
313 self.minval = minval
314 self.maxval = maxval
315 self.seed = seed
316 self._random_generator = backend.RandomGenerator(
317 seed, rng_type="stateless"
318 )
320 def __call__(self, shape, dtype=None, **kwargs):
321 """Returns a tensor object initialized as specified by the initializer.
323 Args:
324 shape: Shape of the tensor.
325 dtype: Optional dtype of the tensor. Only floating point and integer
326 types are supported. If not specified,
327 `tf.keras.backend.floatx()` is used,
328 which default to `float32` unless you configured it otherwise
329 (via `tf.keras.backend.set_floatx(float_dtype)`).
330 **kwargs: Additional keyword arguments.
331 """
332 _validate_kwargs(self.__class__.__name__, kwargs)
333 dtype = _get_dtype(dtype)
334 if not dtype.is_floating and not dtype.is_integer:
335 raise ValueError(f"Expected float or integer dtype, got {dtype}.")
336 if _PARTITION_SHAPE in kwargs:
337 shape = kwargs[_PARTITION_SHAPE]
338 partition_offset = kwargs.get(_PARTITION_OFFSET, None)
339 if partition_offset is None:
340 # We skip the reuse warning for partitioned variable, since the same
341 # initializer will be called multiple times for each partition.
342 self._warn_reuse()
343 nonce = hash(partition_offset) if partition_offset else None
344 layout = kwargs.pop("layout", None)
345 if layout:
346 _ensure_keras_seeded()
347 return utils.call_with_layout(
348 self._random_generator.random_uniform,
349 layout,
350 shape,
351 self.minval,
352 self.maxval,
353 dtype,
354 nonce,
355 )
356 return self._random_generator.random_uniform(
357 shape, self.minval, self.maxval, dtype, nonce
358 )
360 def get_config(self):
361 return {"minval": self.minval, "maxval": self.maxval, "seed": self.seed}
364@keras_export(
365 "keras.initializers.RandomNormal", "keras.initializers.random_normal", v1=[]
366)
367class RandomNormal(Initializer):
368 """Initializer that generates tensors with a normal distribution.
370 Also available via the shortcut function
371 `tf.keras.initializers.random_normal`.
373 Examples:
375 >>> # Standalone usage:
376 >>> initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)
377 >>> values = initializer(shape=(2, 2))
379 >>> # Usage in a Keras layer:
380 >>> initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)
381 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
383 Args:
384 mean: a python scalar or a scalar tensor. Mean of the random values to
385 generate.
386 stddev: a python scalar or a scalar tensor. Standard deviation of the
387 random values to generate.
388 seed: A Python integer. Used to make the behavior of the initializer
389 deterministic. Note that a seeded initializer will produce the same
390 random values across multiple calls.
391 """
393 def __init__(self, mean=0.0, stddev=0.05, seed=None):
394 self.mean = mean
395 self.stddev = stddev
396 self.seed = seed
397 self._random_generator = backend.RandomGenerator(
398 seed, rng_type="stateless"
399 )
401 def __call__(self, shape, dtype=None, **kwargs):
402 """Returns a tensor object initialized to random normal values.
404 Args:
405 shape: Shape of the tensor.
406 dtype: Optional dtype of the tensor. Only floating point types are
407 supported. If not specified, `tf.keras.backend.floatx()` is used,
408 which default to `float32` unless you configured it otherwise (via
409 `tf.keras.backend.set_floatx(float_dtype)`)
410 **kwargs: Additional keyword arguments.
411 """
412 _validate_kwargs(self.__class__.__name__, kwargs)
413 dtype = _assert_float_dtype(_get_dtype(dtype))
414 if _PARTITION_SHAPE in kwargs:
415 shape = kwargs[_PARTITION_SHAPE]
416 partition_offset = kwargs.get(_PARTITION_OFFSET, None)
417 if partition_offset is None:
418 # We skip the reuse warning for partitioned variable, since the same
419 # initializer will be called multiple times for each partition.
420 self._warn_reuse()
421 nonce = hash(partition_offset) if partition_offset else None
422 layout = kwargs.pop("layout", None)
423 if layout:
424 _ensure_keras_seeded()
425 return utils.call_with_layout(
426 self._random_generator.random_normal,
427 layout,
428 shape,
429 self.mean,
430 self.stddev,
431 dtype,
432 nonce,
433 )
434 return self._random_generator.random_normal(
435 shape, self.mean, self.stddev, dtype, nonce
436 )
438 def get_config(self):
439 return {"mean": self.mean, "stddev": self.stddev, "seed": self.seed}
442@keras_export(
443 "keras.initializers.TruncatedNormal",
444 "keras.initializers.truncated_normal",
445 v1=[],
446)
447class TruncatedNormal(Initializer):
448 """Initializer that generates a truncated normal distribution.
450 Also available via the shortcut function
451 `tf.keras.initializers.truncated_normal`.
453 The values generated are similar to values from a
454 `tf.keras.initializers.RandomNormal` initializer except that values more
455 than two standard deviations from the mean are
456 discarded and re-drawn.
458 Examples:
460 >>> # Standalone usage:
461 >>> initializer = tf.keras.initializers.TruncatedNormal(mean=0., stddev=1.)
462 >>> values = initializer(shape=(2, 2))
464 >>> # Usage in a Keras layer:
465 >>> initializer = tf.keras.initializers.TruncatedNormal(mean=0., stddev=1.)
466 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
468 Args:
469 mean: a python scalar or a scalar tensor. Mean of the random values
470 to generate.
471 stddev: a python scalar or a scalar tensor. Standard deviation of the
472 random values to generate before truncation.
473 seed: A Python integer. Used to make the behavior of the initializer
474 deterministic. Note that a seeded initializer will produce the same
475 random values across multiple calls.
476 """
478 def __init__(self, mean=0.0, stddev=0.05, seed=None):
479 self.mean = mean
480 self.stddev = stddev
481 self.seed = seed
482 self._random_generator = backend.RandomGenerator(
483 seed, rng_type="stateless"
484 )
486 def __call__(self, shape, dtype=None, **kwargs):
487 """Returns a tensor initialized to random normal values (truncated).
489 Args:
490 shape: Shape of the tensor.
491 dtype: Optional dtype of the tensor. Only floating point types are
492 supported. If not specified, `tf.keras.backend.floatx()` is used,
493 which default to `float32` unless you configured it otherwise (via
494 `tf.keras.backend.set_floatx(float_dtype)`)
495 **kwargs: Additional keyword arguments.
496 """
497 _validate_kwargs(self.__class__.__name__, kwargs)
498 dtype = _assert_float_dtype(_get_dtype(dtype))
499 if _PARTITION_SHAPE in kwargs:
500 shape = kwargs[_PARTITION_SHAPE]
501 partition_offset = kwargs.get(_PARTITION_OFFSET, None)
502 if partition_offset is None:
503 # We skip the reuse warning for partitioned variable, since the same
504 # initializer will be called multiple times for each partition.
505 self._warn_reuse()
506 nonce = hash(partition_offset) if partition_offset else None
507 layout = kwargs.pop("layout", None)
508 if layout:
509 # TODO(scottzhu): Remove this once the forward compat period above
510 # is expired.
511 self._random_generator._rng_type = (
512 self._random_generator.RNG_STATEFUL
513 )
514 _ensure_keras_seeded()
515 return utils.call_with_layout(
516 self._random_generator.truncated_normal,
517 layout,
518 shape,
519 self.mean,
520 self.stddev,
521 dtype,
522 nonce,
523 )
524 return self._random_generator.truncated_normal(
525 shape, self.mean, self.stddev, dtype, nonce
526 )
528 def get_config(self):
529 return {"mean": self.mean, "stddev": self.stddev, "seed": self.seed}
532@keras_export(
533 "keras.initializers.VarianceScaling",
534 "keras.initializers.variance_scaling",
535 v1=[],
536)
537class VarianceScaling(Initializer):
538 """Initializer that adapts its scale to the shape of its input tensors.
540 Also available via the shortcut function
541 `tf.keras.initializers.variance_scaling`.
543 With `distribution="truncated_normal" or "untruncated_normal"`, samples are
544 drawn from a truncated/untruncated normal distribution with a mean of zero
545 and a standard deviation (after truncation, if used) `stddev = sqrt(scale /
546 n)`, where `n` is:
548 - number of input units in the weight tensor, if `mode="fan_in"`
549 - number of output units, if `mode="fan_out"`
550 - average of the numbers of input and output units, if `mode="fan_avg"`
552 With `distribution="uniform"`, samples are drawn from a uniform distribution
553 within `[-limit, limit]`, where `limit = sqrt(3 * scale / n)`.
555 Examples:
557 >>> # Standalone usage:
558 >>> initializer = tf.keras.initializers.VarianceScaling(
559 ... scale=0.1, mode='fan_in', distribution='uniform')
560 >>> values = initializer(shape=(2, 2))
562 >>> # Usage in a Keras layer:
563 >>> initializer = tf.keras.initializers.VarianceScaling(
564 ... scale=0.1, mode='fan_in', distribution='uniform')
565 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
567 Args:
568 scale: Scaling factor (positive float).
569 mode: One of `"fan_in"`, `"fan_out"`, `"fan_avg"`.
570 distribution: Random distribution to use. One of `"truncated_normal"`,
571 `"untruncated_normal"`, or `"uniform"`.
572 seed: A Python integer. Used to make the behavior of the initializer
573 deterministic. Note that a seeded initializer will produce the same
574 random values across multiple calls.
575 """
577 def __init__(
578 self,
579 scale=1.0,
580 mode="fan_in",
581 distribution="truncated_normal",
582 seed=None,
583 ):
584 if scale <= 0.0:
585 raise ValueError(
586 f"`scale` must be positive float. Received: scale={scale}."
587 )
588 allowed_modes = {"fan_in", "fan_out", "fan_avg"}
589 if mode not in allowed_modes:
590 raise ValueError(
591 f"Invalid `mode` argument: {mode}. "
592 f"Please use one of the {allowed_modes}."
593 )
594 distribution = distribution.lower()
595 # Compatibility with keras-team/keras.
596 if distribution == "normal":
597 distribution = "truncated_normal"
598 allowed_distributions = {
599 "uniform",
600 "truncated_normal",
601 "untruncated_normal",
602 }
603 if distribution not in allowed_distributions:
604 raise ValueError(
605 f"Invalid `distribution` argument: {distribution}."
606 f"Allowed distributions: {allowed_distributions}."
607 )
608 self.scale = scale
609 self.mode = mode
610 self.distribution = distribution
611 self.seed = seed
612 self._random_generator = backend.RandomGenerator(
613 seed, rng_type="stateless"
614 )
616 def __call__(self, shape, dtype=None, **kwargs):
617 """Returns a tensor object initialized as specified by the initializer.
619 Args:
620 shape: Shape of the tensor.
621 dtype: Optional dtype of the tensor. Only floating point types are
622 supported. If not specified, `tf.keras.backend.floatx()` is used,
623 which default to `float32` unless you configured it otherwise (via
624 `tf.keras.backend.set_floatx(float_dtype)`)
625 **kwargs: Additional keyword arguments.
626 """
627 _validate_kwargs(self.__class__.__name__, kwargs)
628 dtype = _assert_float_dtype(_get_dtype(dtype))
629 if _PARTITION_SHAPE in kwargs:
630 shape = kwargs[_PARTITION_SHAPE]
631 partition_offset = kwargs.get(_PARTITION_OFFSET, None)
632 if partition_offset is None:
633 # We skip the reuse warning for partitioned variable, since the same
634 # initializer will be called multiple times for each partition.
635 self._warn_reuse()
636 nonce = hash(partition_offset) if partition_offset else None
637 layout = kwargs.pop("layout", None)
638 if layout:
639 _ensure_keras_seeded()
640 return utils.call_with_layout(
641 self._generate_init_val,
642 layout,
643 shape=shape,
644 dtype=dtype,
645 nonce=nonce,
646 )
647 return self._generate_init_val(shape=shape, dtype=dtype, nonce=nonce)
649 def _generate_init_val(self, shape, dtype, nonce):
650 scale = self.scale
651 fan_in, fan_out = _compute_fans(shape)
652 if self.mode == "fan_in":
653 scale /= max(1.0, fan_in)
654 elif self.mode == "fan_out":
655 scale /= max(1.0, fan_out)
656 else:
657 scale /= max(1.0, (fan_in + fan_out) / 2.0)
658 if self.distribution == "truncated_normal":
659 # constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0.,
660 # scale=1.)
661 stddev = math.sqrt(scale) / 0.87962566103423978
662 return self._random_generator.truncated_normal(
663 shape, 0.0, stddev, dtype, nonce
664 )
665 elif self.distribution == "untruncated_normal":
666 stddev = math.sqrt(scale)
667 return self._random_generator.random_normal(
668 shape, 0.0, stddev, dtype, nonce
669 )
670 else:
671 limit = math.sqrt(3.0 * scale)
672 return self._random_generator.random_uniform(
673 shape, -limit, limit, dtype, nonce
674 )
676 def get_config(self):
677 return {
678 "scale": self.scale,
679 "mode": self.mode,
680 "distribution": self.distribution,
681 "seed": self.seed,
682 }
685@keras_export(
686 "keras.initializers.Orthogonal", "keras.initializers.orthogonal", v1=[]
687)
688class Orthogonal(Initializer):
689 """Initializer that generates an orthogonal matrix.
691 Also available via the shortcut function `tf.keras.initializers.orthogonal`.
693 If the shape of the tensor to initialize is two-dimensional, it is
694 initialized with an orthogonal matrix obtained from the QR decomposition of
695 a matrix of random numbers drawn from a normal distribution. If the matrix
696 has fewer rows than columns then the output will have orthogonal rows.
697 Otherwise, the output will have orthogonal columns.
699 If the shape of the tensor to initialize is more than two-dimensional,
700 a matrix of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])`
701 is initialized, where `n` is the length of the shape vector.
702 The matrix is subsequently reshaped to give a tensor of the desired shape.
704 Examples:
706 >>> # Standalone usage:
707 >>> initializer = tf.keras.initializers.Orthogonal()
708 >>> values = initializer(shape=(2, 2))
710 >>> # Usage in a Keras layer:
711 >>> initializer = tf.keras.initializers.Orthogonal()
712 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
714 Args:
715 gain: multiplicative factor to apply to the orthogonal matrix
716 seed: A Python integer. Used to make the behavior of the initializer
717 deterministic. Note that a seeded initializer will produce the same
718 random values across multiple calls.
720 References:
721 - [Saxe et al., 2014](https://openreview.net/forum?id=_wzZwKpTDF_9C)
722 """
724 def __init__(self, gain=1.0, seed=None):
725 self.gain = gain
726 self.seed = seed
727 self._random_generator = backend.RandomGenerator(
728 seed, rng_type="stateless"
729 )
731 def __call__(self, shape, dtype=None, **kwargs):
732 """Returns a tensor object initialized to an orthogonal matrix.
734 Args:
735 shape: Shape of the tensor.
736 dtype: Optional dtype of the tensor. Only floating point types are
737 supported. If not specified, `tf.keras.backend.floatx()` is used,
738 which default to `float32` unless you configured it otherwise
739 (via `tf.keras.backend.set_floatx(float_dtype)`)
740 **kwargs: Additional keyword arguments.
741 """
742 _validate_kwargs(
743 self.__class__.__name__, kwargs, support_partition=False
744 )
745 dtype = _assert_float_dtype(_get_dtype(dtype))
746 # Check the shape
747 if len(shape) < 2:
748 raise ValueError(
749 "The tensor to initialize must be "
750 "at least two-dimensional. Received: "
751 f"shape={shape} of rank {len(shape)}."
752 )
753 self._warn_reuse()
754 layout = kwargs.pop("layout", None)
755 if layout:
756 _ensure_keras_seeded()
757 return utils.call_with_layout(
758 self._generate_init_val, layout, shape=shape, dtype=dtype
759 )
760 return self._generate_init_val(shape, dtype)
762 def _generate_init_val(self, shape, dtype):
763 # Flatten the input shape with the last dimension remaining
764 # its original shape so it works for conv2d
765 num_rows = 1
766 for dim in shape[:-1]:
767 num_rows *= dim
768 num_cols = shape[-1]
769 flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows))
771 # Generate a random matrix
772 a = self._random_generator.random_normal(flat_shape, dtype=dtype)
773 # Compute the qr factorization
774 q, r = tf.linalg.qr(a, full_matrices=False)
775 # Make Q uniform
776 d = tf.linalg.tensor_diag_part(r)
777 q *= tf.sign(d)
778 if num_rows < num_cols:
779 q = tf.linalg.matrix_transpose(q)
780 return self.gain * tf.reshape(q, shape)
782 def get_config(self):
783 return {"gain": self.gain, "seed": self.seed}
786@keras_export(
787 "keras.initializers.Identity", "keras.initializers.identity", v1=[]
788)
789class Identity(Initializer):
790 """Initializer that generates the identity matrix.
792 Also available via the shortcut function `tf.keras.initializers.identity`.
794 Only usable for generating 2D matrices.
796 Examples:
798 >>> # Standalone usage:
799 >>> initializer = tf.keras.initializers.Identity()
800 >>> values = initializer(shape=(2, 2))
802 >>> # Usage in a Keras layer:
803 >>> initializer = tf.keras.initializers.Identity()
804 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
806 Args:
807 gain: Multiplicative factor to apply to the identity matrix.
808 """
810 def __init__(self, gain=1.0):
811 self.gain = gain
813 def __call__(self, shape, dtype=None, **kwargs):
814 """Returns a tensor object initialized to a 2D identity matrix.
816 Args:
817 shape: Shape of the tensor. It should have exactly rank 2.
818 dtype: Optional dtype of the tensor. Only floating point types are
819 supported. If not specified, `tf.keras.backend.floatx()` is used,
820 which default to `float32` unless you configured it otherwise
821 (via `tf.keras.backend.set_floatx(float_dtype)`)
822 **kwargs: Additional keyword arguments.
823 """
824 _validate_kwargs(
825 self.__class__.__name__, kwargs, support_partition=False
826 )
827 dtype = _assert_float_dtype(_get_dtype(dtype))
828 if len(shape) != 2:
829 raise ValueError(
830 "Identity matrix initializer can only be used for 2D matrices. "
831 f"Received: shape={shape} of rank {len(shape)}."
832 )
833 layout = kwargs.pop("layout", None)
834 if layout:
835 return utils.call_with_layout(
836 self._generate_init_val, layout, shape=shape, dtype=dtype
837 )
838 return self._generate_init_val(shape, dtype)
840 def _generate_init_val(self, shape, dtype):
841 initializer = tf.eye(*shape, dtype=dtype)
842 return self.gain * initializer
844 def get_config(self):
845 return {"gain": self.gain}
848@keras_export(
849 "keras.initializers.GlorotUniform",
850 "keras.initializers.glorot_uniform",
851 v1=[],
852)
853class GlorotUniform(VarianceScaling):
854 """The Glorot uniform initializer, also called Xavier uniform initializer.
856 Also available via the shortcut function
857 `tf.keras.initializers.glorot_uniform`.
859 Draws samples from a uniform distribution within `[-limit, limit]`, where
860 `limit = sqrt(6 / (fan_in + fan_out))` (`fan_in` is the number of input
861 units in the weight tensor and `fan_out` is the number of output units).
863 Examples:
865 >>> # Standalone usage:
866 >>> initializer = tf.keras.initializers.GlorotUniform()
867 >>> values = initializer(shape=(2, 2))
869 >>> # Usage in a Keras layer:
870 >>> initializer = tf.keras.initializers.GlorotUniform()
871 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
873 Args:
874 seed: A Python integer. Used to make the behavior of the initializer
875 deterministic. Note that a seeded initializer will not produce the same
876 random values across multiple calls, but multiple initializers will
877 produce the same sequence when constructed with the same seed value.
879 References:
880 - [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)
881 """
883 def __init__(self, seed=None):
884 super().__init__(
885 scale=1.0, mode="fan_avg", distribution="uniform", seed=seed
886 )
888 def get_config(self):
889 return {"seed": self.seed}
892@keras_export(
893 "keras.initializers.GlorotNormal", "keras.initializers.glorot_normal", v1=[]
894)
895class GlorotNormal(VarianceScaling):
896 """The Glorot normal initializer, also called Xavier normal initializer.
898 Also available via the shortcut function
899 `tf.keras.initializers.glorot_normal`.
901 Draws samples from a truncated normal distribution centered on 0 with
902 `stddev = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number of
903 input units in the weight tensor and `fan_out` is the number of output units
904 in the weight tensor.
906 Examples:
908 >>> # Standalone usage:
909 >>> initializer = tf.keras.initializers.GlorotNormal()
910 >>> values = initializer(shape=(2, 2))
912 >>> # Usage in a Keras layer:
913 >>> initializer = tf.keras.initializers.GlorotNormal()
914 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
916 Args:
917 seed: A Python integer. Used to make the behavior of the initializer
918 deterministic. Note that a seeded initializer will not produce the same
919 random values across multiple calls, but multiple initializers will
920 produce the same sequence when constructed with the same seed value.
922 References:
923 - [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)
924 """
926 def __init__(self, seed=None):
927 super().__init__(
928 scale=1.0,
929 mode="fan_avg",
930 distribution="truncated_normal",
931 seed=seed,
932 )
934 def get_config(self):
935 return {"seed": self.seed}
938@keras_export(
939 "keras.initializers.LecunNormal", "keras.initializers.lecun_normal", v1=[]
940)
941class LecunNormal(VarianceScaling):
942 """Lecun normal initializer.
944 Also available via the shortcut function
945 `tf.keras.initializers.lecun_normal`.
947 Initializers allow you to pre-specify an initialization strategy, encoded in
948 the Initializer object, without knowing the shape and dtype of the variable
949 being initialized.
951 Draws samples from a truncated normal distribution centered on 0 with
952 `stddev = sqrt(1 / fan_in)` where `fan_in` is the number of input units in
953 the weight tensor.
955 Examples:
957 >>> # Standalone usage:
958 >>> initializer = tf.keras.initializers.LecunNormal()
959 >>> values = initializer(shape=(2, 2))
961 >>> # Usage in a Keras layer:
962 >>> initializer = tf.keras.initializers.LecunNormal()
963 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
965 Args:
966 seed: A Python integer. Used to make the behavior of the initializer
967 deterministic. Note that a seeded initializer will not produce the same
968 random values across multiple calls, but multiple initializers will
969 produce the same sequence when constructed with the same seed value.
971 References:
972 - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515)
973 """
975 def __init__(self, seed=None):
976 super().__init__(
977 scale=1.0, mode="fan_in", distribution="truncated_normal", seed=seed
978 )
980 def get_config(self):
981 return {"seed": self.seed}
984@keras_export(
985 "keras.initializers.LecunUniform", "keras.initializers.lecun_uniform", v1=[]
986)
987class LecunUniform(VarianceScaling):
988 """Lecun uniform initializer.
990 Also available via the shortcut function
991 `tf.keras.initializers.lecun_uniform`.
993 Draws samples from a uniform distribution within `[-limit, limit]`, where
994 `limit = sqrt(3 / fan_in)` (`fan_in` is the number of input units in the
995 weight tensor).
997 Examples:
999 >>> # Standalone usage:
1000 >>> initializer = tf.keras.initializers.LecunUniform()
1001 >>> values = initializer(shape=(2, 2))
1003 >>> # Usage in a Keras layer:
1004 >>> initializer = tf.keras.initializers.LecunUniform()
1005 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
1007 Args:
1008 seed: A Python integer. Used to make the behavior of the initializer
1009 deterministic. Note that a seeded initializer will not produce the same
1010 random values across multiple calls, but multiple initializers will
1011 produce the same sequence when constructed with the same seed value.
1013 References:
1014 - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515)
1015 """
1017 def __init__(self, seed=None):
1018 super().__init__(
1019 scale=1.0, mode="fan_in", distribution="uniform", seed=seed
1020 )
1022 def get_config(self):
1023 return {"seed": self.seed}
1026@keras_export(
1027 "keras.initializers.HeNormal", "keras.initializers.he_normal", v1=[]
1028)
1029class HeNormal(VarianceScaling):
1030 """He normal initializer.
1032 Also available via the shortcut function
1033 `tf.keras.initializers.he_normal`.
1035 It draws samples from a truncated normal distribution centered on 0 with
1036 `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in
1037 the weight tensor.
1039 Examples:
1041 >>> # Standalone usage:
1042 >>> initializer = tf.keras.initializers.HeNormal()
1043 >>> values = initializer(shape=(2, 2))
1045 >>> # Usage in a Keras layer:
1046 >>> initializer = tf.keras.initializers.HeNormal()
1047 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
1049 Args:
1050 seed: A Python integer. Used to make the behavior of the initializer
1051 deterministic. Note that a seeded initializer will not produce the same
1052 random values across multiple calls, but multiple initializers will
1053 produce the same sequence when constructed with the same seed value.
1055 References:
1056 - [He et al., 2015](https://arxiv.org/abs/1502.01852)
1057 """
1059 def __init__(self, seed=None):
1060 super().__init__(
1061 scale=2.0, mode="fan_in", distribution="truncated_normal", seed=seed
1062 )
1064 def get_config(self):
1065 return {"seed": self.seed}
1068@keras_export(
1069 "keras.initializers.HeUniform", "keras.initializers.he_uniform", v1=[]
1070)
1071class HeUniform(VarianceScaling):
1072 """He uniform variance scaling initializer.
1074 Also available via the shortcut function
1075 `tf.keras.initializers.he_uniform`.
1077 Draws samples from a uniform distribution within `[-limit, limit]`, where
1078 `limit = sqrt(6 / fan_in)` (`fan_in` is the number of input units in the
1079 weight tensor).
1081 Examples:
1083 >>> # Standalone usage:
1084 >>> initializer = tf.keras.initializers.HeUniform()
1085 >>> values = initializer(shape=(2, 2))
1087 >>> # Usage in a Keras layer:
1088 >>> initializer = tf.keras.initializers.HeUniform()
1089 >>> layer = tf.keras.layers.Dense(3, kernel_initializer=initializer)
1091 Args:
1092 seed: A Python integer. Used to make the behavior of the initializer
1093 deterministic. Note that a seeded initializer will not produce the same
1094 random values across multiple calls, but multiple initializers will
1095 produce the same sequence when constructed with the same seed value.
1097 References:
1098 - [He et al., 2015](https://arxiv.org/abs/1502.01852)
1099 """
1101 def __init__(self, seed=None):
1102 super().__init__(
1103 scale=2.0, mode="fan_in", distribution="uniform", seed=seed
1104 )
1106 def get_config(self):
1107 return {"seed": self.seed}
1110def _get_dtype(dtype):
1111 if dtype is None:
1112 dtype = backend.floatx()
1113 return tf.as_dtype(dtype)
1116def _assert_float_dtype(dtype):
1117 """Validate and return floating point type based on `dtype`.
1119 `dtype` must be a floating point type.
1121 Args:
1122 dtype: The data type to validate.
1124 Returns:
1125 Validated type.
1127 Raises:
1128 ValueError: if `dtype` is not a floating point type.
1129 """
1130 dtype = tf.as_dtype(dtype)
1131 if not dtype.is_floating:
1132 raise ValueError(f"Expected floating point type, got {dtype}.")
1133 return dtype
1136def _compute_fans(shape):
1137 """Computes the number of input and output units for a weight shape.
1139 Args:
1140 shape: Integer shape tuple or TF tensor shape.
1142 Returns:
1143 A tuple of integer scalars (fan_in, fan_out).
1144 """
1145 if len(shape) < 1: # Just to avoid errors for constants.
1146 fan_in = fan_out = 1
1147 elif len(shape) == 1:
1148 fan_in = fan_out = shape[0]
1149 elif len(shape) == 2:
1150 fan_in = shape[0]
1151 fan_out = shape[1]
1152 else:
1153 # Assuming convolution kernels (2D, 3D, or more).
1154 # kernel shape: (..., input_depth, depth)
1155 receptive_field_size = 1
1156 for dim in shape[:-2]:
1157 receptive_field_size *= dim
1158 fan_in = shape[-2] * receptive_field_size
1159 fan_out = shape[-1] * receptive_field_size
1160 return int(fan_in), int(fan_out)
1163def _validate_kwargs(cls_name, kwargs, support_partition=True):
1164 invalid_kwargs = [k for k in kwargs if k not in _ALLOWED_INITIALIZER_KWARGS]
1165 if invalid_kwargs:
1166 raise TypeError(
1167 f"Unknown keyword arguments: {invalid_kwargs}. Allowed "
1168 f"keyword arguments: {_ALLOWED_INITIALIZER_KWARGS}."
1169 )
1170 if not support_partition and (
1171 _PARTITION_SHAPE in kwargs or _PARTITION_OFFSET in kwargs
1172 ):
1173 raise ValueError(
1174 f"{cls_name} initializer doesn't support "
1175 "partition-related arguments."
1176 )
1179def _ensure_keras_seeded():
1180 """Make sure the keras.backend global seed generator is set.
1182 This is important for DTensor use case to ensure that each client are
1183 initialized with same seed for tf.random.Generator, so that the value
1184 created are in sync among all the clients.
1185 """
1186 if not getattr(backend._SEED_GENERATOR, "generator", None):
1187 raise ValueError(
1188 "When using DTensor APIs, you need to set the global seed "
1189 "before using any Keras initializers. Please make sure "
1190 "to call `tf.keras.utils.set_random_seed()` in your code."
1191 )