Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/layers/kernelized.py: 23%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

16"""Keras layers that implement explicit (approximate) kernel feature maps."""

18import numpy as np

19import tensorflow.compat.v2 as tf

21from keras.src import initializers

22from keras.src.engine import base_layer

23from keras.src.engine import input_spec

25# isort: off

26from tensorflow.python.util.tf_export import keras_export

28_SUPPORTED_RBF_KERNEL_TYPES = ["gaussian", "laplacian"]

31@keras_export("keras.layers.experimental.RandomFourierFeatures")

32class RandomFourierFeatures(base_layer.Layer):

33 r"""Layer that projects its inputs into a random feature space.

35 This layer implements a mapping from input space to a space with

36 `output_dim` dimensions, which approximates shift-invariant kernels. A

37 kernel function `K(x, y)` is shift-invariant if `K(x, y) == k(x - y)` for

38 some function `k`. Many popular Radial Basis Functions (RBF), including

39 Gaussian and Laplacian kernels, are shift-invariant.

41 The implementation of this layer is based on the following paper:

42 ["Random Features for Large-Scale Kernel Machines"](

43 https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf)

44 by Ali Rahimi and Ben Recht.

46 The distribution from which the parameters of the random features map

47 (layer) are sampled determines which shift-invariant kernel the layer

48 approximates (see paper for more details). You can use the distribution of

49 your choice. The layer supports out-of-the-box approximations of the

50 following two RBF kernels:

52 - Gaussian: `K(x, y) == exp(- square(x - y) / (2 * square(scale)))`

53 - Laplacian: `K(x, y) = exp(-abs(x - y) / scale))`

55 **Note:** Unlike what is described in the paper and unlike what is used in

56 the Scikit-Learn implementation, the output of this layer does not apply

57 the `sqrt(2 / D)` normalization factor.

59 **Usage:** Typically, this layer is used to "kernelize" linear models by

60 applying a non-linear transformation (this layer) to the input features and

61 then training a linear model on top of the transformed features. Depending

62 on the loss function of the linear model, the composition of this layer and

63 the linear model results to models that are equivalent (up to approximation)

64 to kernel SVMs (for hinge loss), kernel logistic regression (for logistic

65 loss), kernel linear regression (for squared loss), etc.

67 Examples:

69 A kernel multinomial logistic regression model with Gaussian kernel for

70 MNIST:

72 ```python

73 model = keras.Sequential([

74 keras.Input(shape=(784,)),

75 RandomFourierFeatures(

76 output_dim=4096,

77 scale=10.,

78 kernel_initializer='gaussian'),

79 layers.Dense(units=10, activation='softmax'),

80 ])

81 model.compile(

82 optimizer='adam',

83 loss='categorical_crossentropy',

84 metrics=['categorical_accuracy']

85 )

86 ```

88 A quasi-SVM classifier for MNIST:

90 ```python

91 model = keras.Sequential([

92 keras.Input(shape=(784,)),

93 RandomFourierFeatures(

94 output_dim=4096,

95 scale=10.,

96 kernel_initializer='gaussian'),

97 layers.Dense(units=10),

98 ])

99 model.compile(

100 optimizer='adam',

101 loss='hinge',

102 metrics=['categorical_accuracy']

103 )

104 ```

105

106 To use another kernel, just replace the layer creation line with:

107

108 ```python

109 random_features_layer = RandomFourierFeatures(

110 output_dim=500,

111 kernel_initializer=<my_initializer>,

112 scale=...,

113 ...)

114 ```

115

116 Args:

117 output_dim: Positive integer, the dimension of the layer's output, i.e.,

118 the number of random features used to approximate the kernel.

119 kernel_initializer: Determines the distribution of the parameters of the

120 random features map (and therefore the kernel approximated by the

121 layer). It can be either a string identifier or a Keras `Initializer`

122 instance. Currently only 'gaussian' and 'laplacian' are supported

123 string identifiers (case insensitive). Note that the kernel matrix is

124 not trainable.

125 scale: For Gaussian and Laplacian kernels, this corresponds to a scaling

126 factor of the corresponding kernel approximated by the layer (see

127 concrete definitions above). When provided, it should be a positive

128 float. If None, a default value is used: if the kernel initializer is

129 set to "gaussian", `scale` becomes `sqrt(input_dim / 2)`, otherwise,

130 it becomes 1.0. Both the approximation error of the kernel and the

131 classification quality are sensitive to this parameter. If `trainable`

132 is set to `True`, this parameter is learned end-to-end during training

133 and the provided value serves as the initial value.

134 **Note:** When features from this layer are fed to a linear model,

135 by making `scale` trainable, the resulting optimization problem is

136 no longer convex (even if the loss function used by the linear model

137 is convex).

138 Defaults to `None`.

139 trainable: Whether the scaling parameter of the layer should be trainable.

140 Defaults to `False`.

141 name: String, name to use for this layer.

142 """

143

144 def __init__(

145 self,

146 output_dim,

147 kernel_initializer="gaussian",

148 scale=None,

149 trainable=False,

150 name=None,

151 **kwargs,

152 ):

153 if output_dim <= 0:

154 raise ValueError(

155 "`output_dim` should be a positive integer. "

156 f"Received: {output_dim}"

157 )

158 if isinstance(kernel_initializer, str):

159 if kernel_initializer.lower() not in _SUPPORTED_RBF_KERNEL_TYPES:

160 raise ValueError(

161 f"Unsupported `kernel_initializer`: {kernel_initializer} "

162 f"Expected one of: {_SUPPORTED_RBF_KERNEL_TYPES}"

163 )

164 if scale is not None and scale <= 0.0:

165 raise ValueError(

166 "When provided, `scale` should be a positive float. "

167 f"Received: {scale}"

168 )

169 super().__init__(trainable=trainable, name=name, **kwargs)

170 self.output_dim = output_dim

171 self.kernel_initializer = kernel_initializer

172 self.scale = scale

173

174 def build(self, input_shape):

175 input_shape = tf.TensorShape(input_shape)

176 # TODO(pmol): Allow higher dimension inputs. Currently the input is

177 # expected to have shape [batch_size, dimension].

178 if input_shape.rank != 2:

179 raise ValueError(

180 "The rank of the input tensor should be 2. "

181 f"Received input with rank {input_shape.ndims} instead. "

182 f"Full input shape received: {input_shape}"

183 )

184 if input_shape.dims[1].value is None:

185 raise ValueError(

186 "The last dimension of the input tensor should be defined. "

187 f"Found `None`. Full input shape received: {input_shape}"

188 )

189 self.input_spec = input_spec.InputSpec(

190 ndim=2, axes={1: input_shape.dims[1].value}

191 )

192 input_dim = input_shape.dims[1].value

193

194 kernel_initializer = _get_random_features_initializer(

195 self.kernel_initializer, shape=(input_dim, self.output_dim)

196 )

197

198 self.unscaled_kernel = self.add_weight(

199 name="unscaled_kernel",

200 shape=(input_dim, self.output_dim),

201 dtype=tf.float32,

202 initializer=kernel_initializer,

203 trainable=False,

204 )

205

206 self.bias = self.add_weight(

207 name="bias",

208 shape=(self.output_dim,),

209 dtype=tf.float32,

210 initializer=initializers.RandomUniform(

211 minval=0.0, maxval=2 * np.pi

212 ),

213 trainable=False,

214 )

215

216 if self.scale is None:

217 self.scale = _get_default_scale(self.kernel_initializer, input_dim)

218 self.kernel_scale = self.add_weight(

219 name="kernel_scale",

220 shape=(1,),

221 dtype=tf.float32,

222 initializer=tf.compat.v1.constant_initializer(self.scale),

223 trainable=True,

224 constraint="NonNeg",

225 )

226 super().build(input_shape)

227

228 def call(self, inputs):

229 inputs = tf.convert_to_tensor(inputs, dtype=self.dtype)

230 inputs = tf.cast(inputs, tf.float32)

231 kernel = (1.0 / self.kernel_scale) * self.unscaled_kernel

232 outputs = tf.matmul(a=inputs, b=kernel)

233 outputs = tf.nn.bias_add(outputs, self.bias)

234 return tf.cos(outputs)

235

236 def compute_output_shape(self, input_shape):

237 input_shape = tf.TensorShape(input_shape)

238 input_shape = input_shape.with_rank(2)

239 if input_shape.dims[-1].value is None:

240 raise ValueError(

241 "The last dimension of the input tensor should be defined. "

242 f"Found `None`. Full input shape received: {input_shape}"

243 )

244 return input_shape[:-1].concatenate(self.output_dim)

245

246 def get_config(self):

247 kernel_initializer = self.kernel_initializer

248 if not isinstance(kernel_initializer, str):

249 kernel_initializer = initializers.serialize(kernel_initializer)

250 config = {

251 "output_dim": self.output_dim,

252 "kernel_initializer": kernel_initializer,

253 "scale": self.scale,

254 }

255 base_config = super().get_config()

256 return dict(list(base_config.items()) + list(config.items()))

257

258

259def _get_random_features_initializer(initializer, shape):

260 """Returns Initializer object for random features."""

261

262 def _get_cauchy_samples(loc, scale, shape):

263 probs = np.random.uniform(low=0.0, high=1.0, size=shape)

264 return loc + scale * np.tan(np.pi * (probs - 0.5))

265

266 random_features_initializer = initializer

267 if isinstance(initializer, str):

268 if initializer.lower() == "gaussian":

269 random_features_initializer = initializers.RandomNormal(stddev=1.0)

270 elif initializer.lower() == "laplacian":

271 random_features_initializer = initializers.Constant(

272 _get_cauchy_samples(loc=0.0, scale=1.0, shape=shape)

273 )

274

275 else:

276 raise ValueError(

277 f'Unsupported `kernel_initializer`: "{initializer}" '

278 f"Expected one of: {_SUPPORTED_RBF_KERNEL_TYPES}"

279 )

280 return random_features_initializer

281

282

283def _get_default_scale(initializer, input_dim):

284 if isinstance(initializer, str) and initializer.lower() == "gaussian":

285 return np.sqrt(input_dim / 2.0)

286 return 1.0

287