Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/layers/preprocessing/preprocessing_utils.py: 22%
72 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Utils for preprocessing layers."""
17import numpy as np
18import tensorflow.compat.v2 as tf
20from keras.src.utils import tf_utils
22INT = "int"
23ONE_HOT = "one_hot"
24MULTI_HOT = "multi_hot"
25COUNT = "count"
26TF_IDF = "tf_idf"
29def ensure_tensor(inputs, dtype=None):
30 """Ensures the input is a Tensor, SparseTensor or RaggedTensor."""
31 if not isinstance(inputs, (tf.Tensor, tf.RaggedTensor, tf.SparseTensor)):
32 inputs = tf.convert_to_tensor(inputs, dtype)
33 if dtype is not None and inputs.dtype != dtype:
34 inputs = tf.cast(inputs, dtype)
35 return inputs
38def listify_tensors(x):
39 """Convert any tensors or numpy arrays to lists for config serialization."""
40 if tf.is_tensor(x):
41 x = x.numpy()
42 if isinstance(x, np.ndarray):
43 x = x.tolist()
44 return x
47def sparse_bincount(inputs, depth, binary_output, dtype, count_weights=None):
48 """Apply binary or count encoding to an input and return a sparse tensor."""
49 result = tf.sparse.bincount(
50 inputs,
51 weights=count_weights,
52 minlength=depth,
53 maxlength=depth,
54 axis=-1,
55 binary_output=binary_output,
56 )
57 result = tf.cast(result, dtype)
58 if inputs.shape.rank == 1:
59 output_shape = (depth,)
60 else:
61 batch_size = tf.shape(result)[0]
62 output_shape = (batch_size, depth)
63 result = tf.SparseTensor(
64 indices=result.indices, values=result.values, dense_shape=output_shape
65 )
66 return result
69def dense_bincount(inputs, depth, binary_output, dtype, count_weights=None):
70 """Apply binary or count encoding to an input."""
71 result = tf.math.bincount(
72 inputs,
73 weights=count_weights,
74 minlength=depth,
75 maxlength=depth,
76 dtype=dtype,
77 axis=-1,
78 binary_output=binary_output,
79 )
80 if inputs.shape.rank == 1:
81 result.set_shape(tf.TensorShape((depth,)))
82 else:
83 batch_size = inputs.shape.as_list()[0]
84 result.set_shape(tf.TensorShape((batch_size, depth)))
85 return result
88def expand_dims(inputs, axis):
89 """Expand dims on sparse, ragged, or dense tensors."""
90 if tf_utils.is_sparse(inputs):
91 return tf.sparse.expand_dims(inputs, axis)
92 else:
93 return tf.expand_dims(inputs, axis)
96def encode_categorical_inputs(
97 inputs,
98 output_mode,
99 depth,
100 dtype="float32",
101 sparse=False,
102 count_weights=None,
103 idf_weights=None,
104):
105 """Encodes categoical inputs according to output_mode."""
106 if output_mode == INT:
107 return tf.identity(tf.cast(inputs, dtype))
109 original_shape = inputs.shape
110 # In all cases, we should uprank scalar input to a single sample.
111 if inputs.shape.rank == 0:
112 inputs = expand_dims(inputs, -1)
113 # One hot will unprank only if the final output dimension is not already 1.
114 if output_mode == ONE_HOT:
115 if inputs.shape[-1] != 1:
116 inputs = expand_dims(inputs, -1)
118 # TODO(b/190445202): remove output rank restriction.
119 if inputs.shape.rank > 2:
120 raise ValueError(
121 "When output_mode is not `'int'`, maximum supported output rank "
122 f"is 2. Received output_mode {output_mode} and input shape "
123 f"{original_shape}, "
124 f"which would result in output rank {inputs.shape.rank}."
125 )
127 binary_output = output_mode in (MULTI_HOT, ONE_HOT)
128 if sparse:
129 bincounts = sparse_bincount(
130 inputs, depth, binary_output, dtype, count_weights
131 )
132 else:
133 bincounts = dense_bincount(
134 inputs, depth, binary_output, dtype, count_weights
135 )
137 if output_mode != TF_IDF:
138 return bincounts
140 if idf_weights is None:
141 raise ValueError(
142 "When output mode is `'tf_idf'`, idf_weights must be provided. "
143 f"Received: output_mode={output_mode} and idf_weights={idf_weights}"
144 )
146 if sparse:
147 value_weights = tf.gather(idf_weights, bincounts.indices[:, -1])
148 return tf.SparseTensor(
149 bincounts.indices,
150 value_weights * bincounts.values,
151 bincounts.dense_shape,
152 )
153 else:
154 return tf.multiply(bincounts, idf_weights)
157def compute_shape_for_encode_categorical(shape, output_mode, depth):
158 """Computes the output shape of `encode_categorical_inputs`."""
159 if output_mode == INT:
160 return tf.TensorShape(shape)
161 if not shape:
162 return tf.TensorShape([depth])
163 if output_mode == ONE_HOT and shape[-1] != 1:
164 return tf.TensorShape(shape + [depth])
165 else:
166 return tf.TensorShape(shape[:-1] + [depth])