Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/layers/preprocessing/preprocessing_utils.py: 22%

72 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2021 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Utils for preprocessing layers.""" 

16 

17import numpy as np 

18import tensorflow.compat.v2 as tf 

19 

20from keras.src.utils import tf_utils 

21 

22INT = "int" 

23ONE_HOT = "one_hot" 

24MULTI_HOT = "multi_hot" 

25COUNT = "count" 

26TF_IDF = "tf_idf" 

27 

28 

29def ensure_tensor(inputs, dtype=None): 

30 """Ensures the input is a Tensor, SparseTensor or RaggedTensor.""" 

31 if not isinstance(inputs, (tf.Tensor, tf.RaggedTensor, tf.SparseTensor)): 

32 inputs = tf.convert_to_tensor(inputs, dtype) 

33 if dtype is not None and inputs.dtype != dtype: 

34 inputs = tf.cast(inputs, dtype) 

35 return inputs 

36 

37 

38def listify_tensors(x): 

39 """Convert any tensors or numpy arrays to lists for config serialization.""" 

40 if tf.is_tensor(x): 

41 x = x.numpy() 

42 if isinstance(x, np.ndarray): 

43 x = x.tolist() 

44 return x 

45 

46 

47def sparse_bincount(inputs, depth, binary_output, dtype, count_weights=None): 

48 """Apply binary or count encoding to an input and return a sparse tensor.""" 

49 result = tf.sparse.bincount( 

50 inputs, 

51 weights=count_weights, 

52 minlength=depth, 

53 maxlength=depth, 

54 axis=-1, 

55 binary_output=binary_output, 

56 ) 

57 result = tf.cast(result, dtype) 

58 if inputs.shape.rank == 1: 

59 output_shape = (depth,) 

60 else: 

61 batch_size = tf.shape(result)[0] 

62 output_shape = (batch_size, depth) 

63 result = tf.SparseTensor( 

64 indices=result.indices, values=result.values, dense_shape=output_shape 

65 ) 

66 return result 

67 

68 

69def dense_bincount(inputs, depth, binary_output, dtype, count_weights=None): 

70 """Apply binary or count encoding to an input.""" 

71 result = tf.math.bincount( 

72 inputs, 

73 weights=count_weights, 

74 minlength=depth, 

75 maxlength=depth, 

76 dtype=dtype, 

77 axis=-1, 

78 binary_output=binary_output, 

79 ) 

80 if inputs.shape.rank == 1: 

81 result.set_shape(tf.TensorShape((depth,))) 

82 else: 

83 batch_size = inputs.shape.as_list()[0] 

84 result.set_shape(tf.TensorShape((batch_size, depth))) 

85 return result 

86 

87 

88def expand_dims(inputs, axis): 

89 """Expand dims on sparse, ragged, or dense tensors.""" 

90 if tf_utils.is_sparse(inputs): 

91 return tf.sparse.expand_dims(inputs, axis) 

92 else: 

93 return tf.expand_dims(inputs, axis) 

94 

95 

96def encode_categorical_inputs( 

97 inputs, 

98 output_mode, 

99 depth, 

100 dtype="float32", 

101 sparse=False, 

102 count_weights=None, 

103 idf_weights=None, 

104): 

105 """Encodes categoical inputs according to output_mode.""" 

106 if output_mode == INT: 

107 return tf.identity(tf.cast(inputs, dtype)) 

108 

109 original_shape = inputs.shape 

110 # In all cases, we should uprank scalar input to a single sample. 

111 if inputs.shape.rank == 0: 

112 inputs = expand_dims(inputs, -1) 

113 # One hot will unprank only if the final output dimension is not already 1. 

114 if output_mode == ONE_HOT: 

115 if inputs.shape[-1] != 1: 

116 inputs = expand_dims(inputs, -1) 

117 

118 # TODO(b/190445202): remove output rank restriction. 

119 if inputs.shape.rank > 2: 

120 raise ValueError( 

121 "When output_mode is not `'int'`, maximum supported output rank " 

122 f"is 2. Received output_mode {output_mode} and input shape " 

123 f"{original_shape}, " 

124 f"which would result in output rank {inputs.shape.rank}." 

125 ) 

126 

127 binary_output = output_mode in (MULTI_HOT, ONE_HOT) 

128 if sparse: 

129 bincounts = sparse_bincount( 

130 inputs, depth, binary_output, dtype, count_weights 

131 ) 

132 else: 

133 bincounts = dense_bincount( 

134 inputs, depth, binary_output, dtype, count_weights 

135 ) 

136 

137 if output_mode != TF_IDF: 

138 return bincounts 

139 

140 if idf_weights is None: 

141 raise ValueError( 

142 "When output mode is `'tf_idf'`, idf_weights must be provided. " 

143 f"Received: output_mode={output_mode} and idf_weights={idf_weights}" 

144 ) 

145 

146 if sparse: 

147 value_weights = tf.gather(idf_weights, bincounts.indices[:, -1]) 

148 return tf.SparseTensor( 

149 bincounts.indices, 

150 value_weights * bincounts.values, 

151 bincounts.dense_shape, 

152 ) 

153 else: 

154 return tf.multiply(bincounts, idf_weights) 

155 

156 

157def compute_shape_for_encode_categorical(shape, output_mode, depth): 

158 """Computes the output shape of `encode_categorical_inputs`.""" 

159 if output_mode == INT: 

160 return tf.TensorShape(shape) 

161 if not shape: 

162 return tf.TensorShape([depth]) 

163 if output_mode == ONE_HOT and shape[-1] != 1: 

164 return tf.TensorShape(shape + [depth]) 

165 else: 

166 return tf.TensorShape(shape[:-1] + [depth]) 

167