Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/layers/kernelized.py: 23%

73 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2019 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15 

16"""Keras layers that implement explicit (approximate) kernel feature maps.""" 

17 

18import numpy as np 

19import tensorflow.compat.v2 as tf 

20 

21from keras.src import initializers 

22from keras.src.engine import base_layer 

23from keras.src.engine import input_spec 

24 

25# isort: off 

26from tensorflow.python.util.tf_export import keras_export 

27 

28_SUPPORTED_RBF_KERNEL_TYPES = ["gaussian", "laplacian"] 

29 

30 

31@keras_export("keras.layers.experimental.RandomFourierFeatures") 

32class RandomFourierFeatures(base_layer.Layer): 

33 r"""Layer that projects its inputs into a random feature space. 

34 

35 This layer implements a mapping from input space to a space with 

36 `output_dim` dimensions, which approximates shift-invariant kernels. A 

37 kernel function `K(x, y)` is shift-invariant if `K(x, y) == k(x - y)` for 

38 some function `k`. Many popular Radial Basis Functions (RBF), including 

39 Gaussian and Laplacian kernels, are shift-invariant. 

40 

41 The implementation of this layer is based on the following paper: 

42 ["Random Features for Large-Scale Kernel Machines"]( 

43 https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf) 

44 by Ali Rahimi and Ben Recht. 

45 

46 The distribution from which the parameters of the random features map 

47 (layer) are sampled determines which shift-invariant kernel the layer 

48 approximates (see paper for more details). You can use the distribution of 

49 your choice. The layer supports out-of-the-box approximations of the 

50 following two RBF kernels: 

51 

52 - Gaussian: `K(x, y) == exp(- square(x - y) / (2 * square(scale)))` 

53 - Laplacian: `K(x, y) = exp(-abs(x - y) / scale))` 

54 

55 **Note:** Unlike what is described in the paper and unlike what is used in 

56 the Scikit-Learn implementation, the output of this layer does not apply 

57 the `sqrt(2 / D)` normalization factor. 

58 

59 **Usage:** Typically, this layer is used to "kernelize" linear models by 

60 applying a non-linear transformation (this layer) to the input features and 

61 then training a linear model on top of the transformed features. Depending 

62 on the loss function of the linear model, the composition of this layer and 

63 the linear model results to models that are equivalent (up to approximation) 

64 to kernel SVMs (for hinge loss), kernel logistic regression (for logistic 

65 loss), kernel linear regression (for squared loss), etc. 

66 

67 Examples: 

68 

69 A kernel multinomial logistic regression model with Gaussian kernel for 

70 MNIST: 

71 

72 ```python 

73 model = keras.Sequential([ 

74 keras.Input(shape=(784,)), 

75 RandomFourierFeatures( 

76 output_dim=4096, 

77 scale=10., 

78 kernel_initializer='gaussian'), 

79 layers.Dense(units=10, activation='softmax'), 

80 ]) 

81 model.compile( 

82 optimizer='adam', 

83 loss='categorical_crossentropy', 

84 metrics=['categorical_accuracy'] 

85 ) 

86 ``` 

87 

88 A quasi-SVM classifier for MNIST: 

89 

90 ```python 

91 model = keras.Sequential([ 

92 keras.Input(shape=(784,)), 

93 RandomFourierFeatures( 

94 output_dim=4096, 

95 scale=10., 

96 kernel_initializer='gaussian'), 

97 layers.Dense(units=10), 

98 ]) 

99 model.compile( 

100 optimizer='adam', 

101 loss='hinge', 

102 metrics=['categorical_accuracy'] 

103 ) 

104 ``` 

105 

106 To use another kernel, just replace the layer creation line with: 

107 

108 ```python 

109 random_features_layer = RandomFourierFeatures( 

110 output_dim=500, 

111 kernel_initializer=<my_initializer>, 

112 scale=..., 

113 ...) 

114 ``` 

115 

116 Args: 

117 output_dim: Positive integer, the dimension of the layer's output, i.e., 

118 the number of random features used to approximate the kernel. 

119 kernel_initializer: Determines the distribution of the parameters of the 

120 random features map (and therefore the kernel approximated by the 

121 layer). It can be either a string identifier or a Keras `Initializer` 

122 instance. Currently only 'gaussian' and 'laplacian' are supported 

123 string identifiers (case insensitive). Note that the kernel matrix is 

124 not trainable. 

125 scale: For Gaussian and Laplacian kernels, this corresponds to a scaling 

126 factor of the corresponding kernel approximated by the layer (see 

127 concrete definitions above). When provided, it should be a positive 

128 float. If None, a default value is used: if the kernel initializer is 

129 set to "gaussian", `scale` becomes `sqrt(input_dim / 2)`, otherwise, 

130 it becomes 1.0. Both the approximation error of the kernel and the 

131 classification quality are sensitive to this parameter. If `trainable` 

132 is set to `True`, this parameter is learned end-to-end during training 

133 and the provided value serves as the initial value. 

134 **Note:** When features from this layer are fed to a linear model, 

135 by making `scale` trainable, the resulting optimization problem is 

136 no longer convex (even if the loss function used by the linear model 

137 is convex). 

138 Defaults to `None`. 

139 trainable: Whether the scaling parameter of the layer should be trainable. 

140 Defaults to `False`. 

141 name: String, name to use for this layer. 

142 """ 

143 

144 def __init__( 

145 self, 

146 output_dim, 

147 kernel_initializer="gaussian", 

148 scale=None, 

149 trainable=False, 

150 name=None, 

151 **kwargs, 

152 ): 

153 if output_dim <= 0: 

154 raise ValueError( 

155 "`output_dim` should be a positive integer. " 

156 f"Received: {output_dim}" 

157 ) 

158 if isinstance(kernel_initializer, str): 

159 if kernel_initializer.lower() not in _SUPPORTED_RBF_KERNEL_TYPES: 

160 raise ValueError( 

161 f"Unsupported `kernel_initializer`: {kernel_initializer} " 

162 f"Expected one of: {_SUPPORTED_RBF_KERNEL_TYPES}" 

163 ) 

164 if scale is not None and scale <= 0.0: 

165 raise ValueError( 

166 "When provided, `scale` should be a positive float. " 

167 f"Received: {scale}" 

168 ) 

169 super().__init__(trainable=trainable, name=name, **kwargs) 

170 self.output_dim = output_dim 

171 self.kernel_initializer = kernel_initializer 

172 self.scale = scale 

173 

174 def build(self, input_shape): 

175 input_shape = tf.TensorShape(input_shape) 

176 # TODO(pmol): Allow higher dimension inputs. Currently the input is 

177 # expected to have shape [batch_size, dimension]. 

178 if input_shape.rank != 2: 

179 raise ValueError( 

180 "The rank of the input tensor should be 2. " 

181 f"Received input with rank {input_shape.ndims} instead. " 

182 f"Full input shape received: {input_shape}" 

183 ) 

184 if input_shape.dims[1].value is None: 

185 raise ValueError( 

186 "The last dimension of the input tensor should be defined. " 

187 f"Found `None`. Full input shape received: {input_shape}" 

188 ) 

189 self.input_spec = input_spec.InputSpec( 

190 ndim=2, axes={1: input_shape.dims[1].value} 

191 ) 

192 input_dim = input_shape.dims[1].value 

193 

194 kernel_initializer = _get_random_features_initializer( 

195 self.kernel_initializer, shape=(input_dim, self.output_dim) 

196 ) 

197 

198 self.unscaled_kernel = self.add_weight( 

199 name="unscaled_kernel", 

200 shape=(input_dim, self.output_dim), 

201 dtype=tf.float32, 

202 initializer=kernel_initializer, 

203 trainable=False, 

204 ) 

205 

206 self.bias = self.add_weight( 

207 name="bias", 

208 shape=(self.output_dim,), 

209 dtype=tf.float32, 

210 initializer=initializers.RandomUniform( 

211 minval=0.0, maxval=2 * np.pi 

212 ), 

213 trainable=False, 

214 ) 

215 

216 if self.scale is None: 

217 self.scale = _get_default_scale(self.kernel_initializer, input_dim) 

218 self.kernel_scale = self.add_weight( 

219 name="kernel_scale", 

220 shape=(1,), 

221 dtype=tf.float32, 

222 initializer=tf.compat.v1.constant_initializer(self.scale), 

223 trainable=True, 

224 constraint="NonNeg", 

225 ) 

226 super().build(input_shape) 

227 

228 def call(self, inputs): 

229 inputs = tf.convert_to_tensor(inputs, dtype=self.dtype) 

230 inputs = tf.cast(inputs, tf.float32) 

231 kernel = (1.0 / self.kernel_scale) * self.unscaled_kernel 

232 outputs = tf.matmul(a=inputs, b=kernel) 

233 outputs = tf.nn.bias_add(outputs, self.bias) 

234 return tf.cos(outputs) 

235 

236 def compute_output_shape(self, input_shape): 

237 input_shape = tf.TensorShape(input_shape) 

238 input_shape = input_shape.with_rank(2) 

239 if input_shape.dims[-1].value is None: 

240 raise ValueError( 

241 "The last dimension of the input tensor should be defined. " 

242 f"Found `None`. Full input shape received: {input_shape}" 

243 ) 

244 return input_shape[:-1].concatenate(self.output_dim) 

245 

246 def get_config(self): 

247 kernel_initializer = self.kernel_initializer 

248 if not isinstance(kernel_initializer, str): 

249 kernel_initializer = initializers.serialize(kernel_initializer) 

250 config = { 

251 "output_dim": self.output_dim, 

252 "kernel_initializer": kernel_initializer, 

253 "scale": self.scale, 

254 } 

255 base_config = super().get_config() 

256 return dict(list(base_config.items()) + list(config.items())) 

257 

258 

259def _get_random_features_initializer(initializer, shape): 

260 """Returns Initializer object for random features.""" 

261 

262 def _get_cauchy_samples(loc, scale, shape): 

263 probs = np.random.uniform(low=0.0, high=1.0, size=shape) 

264 return loc + scale * np.tan(np.pi * (probs - 0.5)) 

265 

266 random_features_initializer = initializer 

267 if isinstance(initializer, str): 

268 if initializer.lower() == "gaussian": 

269 random_features_initializer = initializers.RandomNormal(stddev=1.0) 

270 elif initializer.lower() == "laplacian": 

271 random_features_initializer = initializers.Constant( 

272 _get_cauchy_samples(loc=0.0, scale=1.0, shape=shape) 

273 ) 

274 

275 else: 

276 raise ValueError( 

277 f'Unsupported `kernel_initializer`: "{initializer}" ' 

278 f"Expected one of: {_SUPPORTED_RBF_KERNEL_TYPES}" 

279 ) 

280 return random_features_initializer 

281 

282 

283def _get_default_scale(initializer, input_dim): 

284 if isinstance(initializer, str) and initializer.lower() == "gaussian": 

285 return np.sqrt(input_dim / 2.0) 

286 return 1.0 

287