Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/layers/preprocessing/preprocessing

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Utils for preprocessing layers."""

17import numpy as np

18import tensorflow.compat.v2 as tf

20from keras.src.utils import tf_utils

22INT = "int"

23ONE_HOT = "one_hot"

24MULTI_HOT = "multi_hot"

25COUNT = "count"

26TF_IDF = "tf_idf"

29def ensure_tensor(inputs, dtype=None):

30 """Ensures the input is a Tensor, SparseTensor or RaggedTensor."""

31 if not isinstance(inputs, (tf.Tensor, tf.RaggedTensor, tf.SparseTensor)):

32 inputs = tf.convert_to_tensor(inputs, dtype)

33 if dtype is not None and inputs.dtype != dtype:

34 inputs = tf.cast(inputs, dtype)

35 return inputs

38def listify_tensors(x):

39 """Convert any tensors or numpy arrays to lists for config serialization."""

40 if tf.is_tensor(x):

41 x = x.numpy()

42 if isinstance(x, np.ndarray):

43 x = x.tolist()

44 return x

47def sparse_bincount(inputs, depth, binary_output, dtype, count_weights=None):

48 """Apply binary or count encoding to an input and return a sparse tensor."""

49 result = tf.sparse.bincount(

50 inputs,

51 weights=count_weights,

52 minlength=depth,

53 maxlength=depth,

54 axis=-1,

55 binary_output=binary_output,

56 )

57 result = tf.cast(result, dtype)

58 if inputs.shape.rank == 1:

59 output_shape = (depth,)

60 else:

61 batch_size = tf.shape(result)[0]

62 output_shape = (batch_size, depth)

63 result = tf.SparseTensor(

64 indices=result.indices, values=result.values, dense_shape=output_shape

65 )

66 return result

69def dense_bincount(inputs, depth, binary_output, dtype, count_weights=None):

70 """Apply binary or count encoding to an input."""

71 result = tf.math.bincount(

72 inputs,

73 weights=count_weights,

74 minlength=depth,

75 maxlength=depth,

76 dtype=dtype,

77 axis=-1,

78 binary_output=binary_output,

79 )

80 if inputs.shape.rank == 1:

81 result.set_shape(tf.TensorShape((depth,)))

82 else:

83 batch_size = inputs.shape.as_list()[0]

84 result.set_shape(tf.TensorShape((batch_size, depth)))

85 return result

88def expand_dims(inputs, axis):

89 """Expand dims on sparse, ragged, or dense tensors."""

90 if tf_utils.is_sparse(inputs):

91 return tf.sparse.expand_dims(inputs, axis)

92 else:

93 return tf.expand_dims(inputs, axis)

96def encode_categorical_inputs(

97 inputs,

98 output_mode,

99 depth,

100 dtype="float32",

101 sparse=False,

102 count_weights=None,

103 idf_weights=None,

104):

105 """Encodes categoical inputs according to output_mode."""

106 if output_mode == INT:

107 return tf.identity(tf.cast(inputs, dtype))

108

109 original_shape = inputs.shape

110 # In all cases, we should uprank scalar input to a single sample.

111 if inputs.shape.rank == 0:

112 inputs = expand_dims(inputs, -1)

113 # One hot will unprank only if the final output dimension is not already 1.

114 if output_mode == ONE_HOT:

115 if inputs.shape[-1] != 1:

116 inputs = expand_dims(inputs, -1)

117

118 # TODO(b/190445202): remove output rank restriction.

119 if inputs.shape.rank > 2:

120 raise ValueError(

121 "When output_mode is not `'int'`, maximum supported output rank "

122 f"is 2. Received output_mode {output_mode} and input shape "

123 f"{original_shape}, "

124 f"which would result in output rank {inputs.shape.rank}."

125 )

126

127 binary_output = output_mode in (MULTI_HOT, ONE_HOT)

128 if sparse:

129 bincounts = sparse_bincount(

130 inputs, depth, binary_output, dtype, count_weights

131 )

132 else:

133 bincounts = dense_bincount(

134 inputs, depth, binary_output, dtype, count_weights

135 )

136

137 if output_mode != TF_IDF:

138 return bincounts

139

140 if idf_weights is None:

141 raise ValueError(

142 "When output mode is `'tf_idf'`, idf_weights must be provided. "

143 f"Received: output_mode={output_mode} and idf_weights={idf_weights}"

144 )

145

146 if sparse:

147 value_weights = tf.gather(idf_weights, bincounts.indices[:, -1])

148 return tf.SparseTensor(

149 bincounts.indices,

150 value_weights * bincounts.values,

151 bincounts.dense_shape,

152 )

153 else:

154 return tf.multiply(bincounts, idf_weights)

155

156

157def compute_shape_for_encode_categorical(shape, output_mode, depth):

158 """Computes the output shape of `encode_categorical_inputs`."""

159 if output_mode == INT:

160 return tf.TensorShape(shape)

161 if not shape:

162 return tf.TensorShape([depth])

163 if output_mode == ONE_HOT and shape[-1] != 1:

164 return tf.TensorShape(shape + [depth])

165 else:

166 return tf.TensorShape(shape[:-1] + [depth])

167

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/layers/preprocessing/preprocessing_utils.py: 22%

72 statements