Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/keras/layers/embeddings.py: 31%
68 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Embedding layer."""
16# pylint: disable=g-classes-have-attributes
18from tensorflow.python.keras import backend
19from tensorflow.python.keras import constraints
20from tensorflow.python.keras import initializers
21from tensorflow.python.keras import regularizers
22from tensorflow.python.keras.engine import base_layer_utils
23from tensorflow.python.keras.engine.base_layer import Layer
24from tensorflow.python.keras.utils import tf_utils
25from tensorflow.python.ops import embedding_ops
26from tensorflow.python.ops import math_ops
27from tensorflow.python.util.tf_export import keras_export
30@keras_export('keras.layers.Embedding')
31class Embedding(Layer):
32 """Turns positive integers (indexes) into dense vectors of fixed size.
34 e.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]`
36 This layer can only be used as the first layer in a model.
38 Example:
40 >>> model = tf.keras.Sequential()
41 >>> model.add(tf.keras.layers.Embedding(1000, 64, input_length=10))
42 >>> # The model will take as input an integer matrix of size (batch,
43 >>> # input_length), and the largest integer (i.e. word index) in the input
44 >>> # should be no larger than 999 (vocabulary size).
45 >>> # Now model.output_shape is (None, 10, 64), where `None` is the batch
46 >>> # dimension.
47 >>> input_array = np.random.randint(1000, size=(32, 10))
48 >>> model.compile('rmsprop', 'mse')
49 >>> output_array = model.predict(input_array)
50 >>> print(output_array.shape)
51 (32, 10, 64)
53 Args:
54 input_dim: Integer. Size of the vocabulary,
55 i.e. maximum integer index + 1.
56 output_dim: Integer. Dimension of the dense embedding.
57 embeddings_initializer: Initializer for the `embeddings`
58 matrix (see `keras.initializers`).
59 embeddings_regularizer: Regularizer function applied to
60 the `embeddings` matrix (see `keras.regularizers`).
61 embeddings_constraint: Constraint function applied to
62 the `embeddings` matrix (see `keras.constraints`).
63 mask_zero: Boolean, whether or not the input value 0 is a special "padding"
64 value that should be masked out.
65 This is useful when using recurrent layers
66 which may take variable length input.
67 If this is `True`, then all subsequent layers
68 in the model need to support masking or an exception will be raised.
69 If mask_zero is set to True, as a consequence, index 0 cannot be
70 used in the vocabulary (input_dim should equal size of
71 vocabulary + 1).
72 input_length: Length of input sequences, when it is constant.
73 This argument is required if you are going to connect
74 `Flatten` then `Dense` layers upstream
75 (without it, the shape of the dense outputs cannot be computed).
77 Input shape:
78 2D tensor with shape: `(batch_size, input_length)`.
80 Output shape:
81 3D tensor with shape: `(batch_size, input_length, output_dim)`.
83 **Note on variable placement:**
84 By default, if a GPU is available, the embedding matrix will be placed on
85 the GPU. This achieves the best performance, but it might cause issues:
87 - You may be using an optimizer that does not support sparse GPU kernels.
88 In this case you will see an error upon training your model.
89 - Your embedding matrix may be too large to fit on your GPU. In this case
90 you will see an Out Of Memory (OOM) error.
92 In such cases, you should place the embedding matrix on the CPU memory.
93 You can do so with a device scope, as such:
95 ```python
96 with tf.device('cpu:0'):
97 embedding_layer = Embedding(...)
98 embedding_layer.build()
99 ```
101 The pre-built `embedding_layer` instance can then be added to a `Sequential`
102 model (e.g. `model.add(embedding_layer)`), called in a Functional model
103 (e.g. `x = embedding_layer(x)`), or used in a subclassed model.
104 """
106 def __init__(self,
107 input_dim,
108 output_dim,
109 embeddings_initializer='uniform',
110 embeddings_regularizer=None,
111 activity_regularizer=None,
112 embeddings_constraint=None,
113 mask_zero=False,
114 input_length=None,
115 **kwargs):
116 if 'input_shape' not in kwargs:
117 if input_length:
118 kwargs['input_shape'] = (input_length,)
119 else:
120 kwargs['input_shape'] = (None,)
121 if input_dim <= 0 or output_dim <= 0:
122 raise ValueError('Both `input_dim` and `output_dim` should be positive, '
123 'found input_dim {} and output_dim {}'.format(
124 input_dim, output_dim))
125 if (not base_layer_utils.v2_dtype_behavior_enabled() and
126 'dtype' not in kwargs):
127 # In TF1, the dtype defaults to the input dtype which is typically int32,
128 # so explicitly set it to floatx
129 kwargs['dtype'] = backend.floatx()
130 # We set autocast to False, as we do not want to cast floating- point inputs
131 # to self.dtype. In call(), we cast to int32, and casting to self.dtype
132 # before casting to int32 might cause the int32 values to be different due
133 # to a loss of precision.
134 kwargs['autocast'] = False
135 super(Embedding, self).__init__(**kwargs)
137 self.input_dim = input_dim
138 self.output_dim = output_dim
139 self.embeddings_initializer = initializers.get(embeddings_initializer)
140 self.embeddings_regularizer = regularizers.get(embeddings_regularizer)
141 self.activity_regularizer = regularizers.get(activity_regularizer)
142 self.embeddings_constraint = constraints.get(embeddings_constraint)
143 self.mask_zero = mask_zero
144 self.supports_masking = mask_zero
145 self.input_length = input_length
147 @tf_utils.shape_type_conversion
148 def build(self, input_shape=None):
149 self.embeddings = self.add_weight(
150 shape=(self.input_dim, self.output_dim),
151 initializer=self.embeddings_initializer,
152 name='embeddings',
153 regularizer=self.embeddings_regularizer,
154 constraint=self.embeddings_constraint,
155 experimental_autocast=False)
156 self.built = True
158 def compute_mask(self, inputs, mask=None):
159 if not self.mask_zero:
160 return None
161 return math_ops.not_equal(inputs, 0)
163 @tf_utils.shape_type_conversion
164 def compute_output_shape(self, input_shape):
165 if self.input_length is None:
166 return input_shape + (self.output_dim,)
167 else:
168 # input_length can be tuple if input is 3D or higher
169 if isinstance(self.input_length, (list, tuple)):
170 in_lens = list(self.input_length)
171 else:
172 in_lens = [self.input_length]
173 if len(in_lens) != len(input_shape) - 1:
174 raise ValueError('"input_length" is %s, '
175 'but received input has shape %s' % (str(
176 self.input_length), str(input_shape)))
177 else:
178 for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])):
179 if s1 is not None and s2 is not None and s1 != s2:
180 raise ValueError('"input_length" is %s, '
181 'but received input has shape %s' % (str(
182 self.input_length), str(input_shape)))
183 elif s1 is None:
184 in_lens[i] = s2
185 return (input_shape[0],) + tuple(in_lens) + (self.output_dim,)
187 def call(self, inputs):
188 dtype = backend.dtype(inputs)
189 if dtype != 'int32' and dtype != 'int64':
190 inputs = math_ops.cast(inputs, 'int32')
191 out = embedding_ops.embedding_lookup_v2(self.embeddings, inputs)
192 if self._dtype_policy.compute_dtype != self._dtype_policy.variable_dtype:
193 # Instead of casting the variable as in most layers, cast the output, as
194 # this is mathematically equivalent but is faster.
195 out = math_ops.cast(out, self._dtype_policy.compute_dtype)
196 return out
198 def get_config(self):
199 config = {
200 'input_dim': self.input_dim,
201 'output_dim': self.output_dim,
202 'embeddings_initializer':
203 initializers.serialize(self.embeddings_initializer),
204 'embeddings_regularizer':
205 regularizers.serialize(self.embeddings_regularizer),
206 'activity_regularizer':
207 regularizers.serialize(self.activity_regularizer),
208 'embeddings_constraint':
209 constraints.serialize(self.embeddings_constraint),
210 'mask_zero': self.mask_zero,
211 'input_length': self.input_length
212 }
213 base_config = super(Embedding, self).get_config()
214 return dict(list(base_config.items()) + list(config.items()))