Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/applications/mobilenet.py: 17%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

16"""MobileNet v1 models for Keras.

18MobileNet is a general architecture and can be used for multiple use cases.

19Depending on the use case, it can use different input layer size and

20different width factors. This allows different width models to reduce

21the number of multiply-adds and thereby

22reduce inference cost on mobile devices.

24MobileNets support any input size greater than 32 x 32, with larger image sizes

25offering better performance.

26The number of parameters and number of multiply-adds

27can be modified by using the `alpha` parameter,

28which increases/decreases the number of filters in each layer.

29By altering the image size and `alpha` parameter,

30all 16 models from the paper can be built, with ImageNet weights provided.

32The paper demonstrates the performance of MobileNets using `alpha` values of

331.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25.

34For each of these `alpha` values, weights for 4 different input image sizes

35are provided (224, 192, 160, 128).

37The following table describes the size and accuracy of the 100% MobileNet

38on size 224 x 224:

39----------------------------------------------------------------------------

40Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M)

41-------------------------|---------------|-------------------|--------------

42| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 |

43| 0.75 MobileNet-224 | 68.4 % | 325 | 2.6 |

44| 0.50 MobileNet-224 | 63.7 % | 149 | 1.3 |

45| 0.25 MobileNet-224 | 50.6 % | 41 | 0.5 |

47The following table describes the performance of

48the 100 % MobileNet on various input sizes:

49------------------------------------------------------------------------

50Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M)

51----------------------|---------------|-------------------|----------------

52| 1.0 MobileNet-224 | 70.6 % | 569 | 4.2 |

53| 1.0 MobileNet-192 | 69.1 % | 418 | 4.2 |

54| 1.0 MobileNet-160 | 67.2 % | 290 | 4.2 |

55| 1.0 MobileNet-128 | 64.4 % | 186 | 4.2 |

57Reference:

58 - [MobileNets: Efficient Convolutional Neural Networks

59 for Mobile Vision Applications](

60 https://arxiv.org/abs/1704.04861)

61"""

63import tensorflow.compat.v2 as tf

65from keras.src import backend

66from keras.src.applications import imagenet_utils

67from keras.src.engine import training

68from keras.src.layers import VersionAwareLayers

69from keras.src.utils import data_utils

70from keras.src.utils import layer_utils

72# isort: off

73from tensorflow.python.platform import tf_logging as logging

74from tensorflow.python.util.tf_export import keras_export

76BASE_WEIGHT_PATH = (

77 "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/"

78)

79layers = None

82@keras_export(

83 "keras.applications.mobilenet.MobileNet", "keras.applications.MobileNet"

84)

85def MobileNet(

86 input_shape=None,

87 alpha=1.0,

88 depth_multiplier=1,

89 dropout=1e-3,

90 include_top=True,

91 weights="imagenet",

92 input_tensor=None,

93 pooling=None,

94 classes=1000,

95 classifier_activation="softmax",

96 **kwargs,

97):

98 """Instantiates the MobileNet architecture.

100 Reference:

101 - [MobileNets: Efficient Convolutional Neural Networks

102 for Mobile Vision Applications](

103 https://arxiv.org/abs/1704.04861)

104

105 This function returns a Keras image classification model,

106 optionally loaded with weights pre-trained on ImageNet.

107

108 For image classification use cases, see

109 [this page for detailed examples](

110 https://keras.io/api/applications/#usage-examples-for-image-classification-models).

111

112 For transfer learning use cases, make sure to read the

113 [guide to transfer learning & fine-tuning](

114 https://keras.io/guides/transfer_learning/).

115

116 Note: each Keras Application expects a specific kind of input preprocessing.

117 For MobileNet, call `tf.keras.applications.mobilenet.preprocess_input`

118 on your inputs before passing them to the model.

119 `mobilenet.preprocess_input` will scale input pixels between -1 and 1.

120

121 Args:

122 input_shape: Optional shape tuple, only to be specified if `include_top`

123 is False (otherwise the input shape has to be `(224, 224, 3)` (with

124 `channels_last` data format) or (3, 224, 224) (with `channels_first`

125 data format). It should have exactly 3 inputs channels, and width and

126 height should be no smaller than 32. E.g. `(200, 200, 3)` would be one

127 valid value. Defaults to `None`.

128 `input_shape` will be ignored if the `input_tensor` is provided.

129 alpha: Controls the width of the network. This is known as the width

130 multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally

131 decreases the number of filters in each layer. - If `alpha` > 1.0,

132 proportionally increases the number of filters in each layer. - If

133 `alpha` = 1, default number of filters from the paper are used at each

134 layer. Defaults to `1.0`.

135 depth_multiplier: Depth multiplier for depthwise convolution. This is

136 called the resolution multiplier in the MobileNet paper.

137 Defaults to `1.0`.

138 dropout: Dropout rate. Defaults to `0.001`.

139 include_top: Boolean, whether to include the fully-connected layer at the

140 top of the network. Defaults to `True`.

141 weights: One of `None` (random initialization), 'imagenet' (pre-training

142 on ImageNet), or the path to the weights file to be loaded. Defaults to

143 `imagenet`.

144 input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to

145 use as image input for the model. `input_tensor` is useful for sharing

146 inputs between multiple different networks. Defaults to `None`.

147 pooling: Optional pooling mode for feature extraction when `include_top`

148 is `False`.

149 - `None` (default) means that the output of the model will be

150 the 4D tensor output of the last convolutional block.

151 - `avg` means that global average pooling

152 will be applied to the output of the

153 last convolutional block, and thus

154 the output of the model will be a 2D tensor.

155 - `max` means that global max pooling will be applied.

156 classes: Optional number of classes to classify images into, only to be

157 specified if `include_top` is True, and if no `weights` argument is

158 specified. Defaults to `1000`.

159 classifier_activation: A `str` or callable. The activation function to use

160 on the "top" layer. Ignored unless `include_top=True`. Set

161 `classifier_activation=None` to return the logits of the "top" layer.

162 When loading pretrained weights, `classifier_activation` can only

163 be `None` or `"softmax"`.

164 **kwargs: For backwards compatibility only.

165 Returns:

166 A `keras.Model` instance.

167 """

168 global layers

169 if "layers" in kwargs:

170 layers = kwargs.pop("layers")

171 else:

172 layers = VersionAwareLayers()

173 if kwargs:

174 raise ValueError(f"Unknown argument(s): {(kwargs,)}")

175 if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)):

176 raise ValueError(

177 "The `weights` argument should be either "

178 "`None` (random initialization), `imagenet` "

179 "(pre-training on ImageNet), "

180 "or the path to the weights file to be loaded. "

181 f"Received weights={weights}"

182 )

183

184 if weights == "imagenet" and include_top and classes != 1000:

185 raise ValueError(

186 'If using `weights` as `"imagenet"` with `include_top` '

187 "as true, `classes` should be 1000. "

188 f"Received classes={classes}"

189 )

190

191 # Determine proper input shape and default size.

192 if input_shape is None:

193 default_size = 224

194 else:

195 if backend.image_data_format() == "channels_first":

196 rows = input_shape[1]

197 cols = input_shape[2]

198 else:

199 rows = input_shape[0]

200 cols = input_shape[1]

201

202 if rows == cols and rows in [128, 160, 192, 224]:

203 default_size = rows

204 else:

205 default_size = 224

206

207 input_shape = imagenet_utils.obtain_input_shape(

208 input_shape,

209 default_size=default_size,

210 min_size=32,

211 data_format=backend.image_data_format(),

212 require_flatten=include_top,

213 weights=weights,

214 )

215

216 if backend.image_data_format() == "channels_last":

217 row_axis, col_axis = (0, 1)

218 else:

219 row_axis, col_axis = (1, 2)

220 rows = input_shape[row_axis]

221 cols = input_shape[col_axis]

222

223 if weights == "imagenet":

224 if depth_multiplier != 1:

225 raise ValueError(

226 "If imagenet weights are being loaded, "

227 "depth multiplier must be 1. "

228 f"Received depth_multiplier={depth_multiplier}"

229 )

230

231 if alpha not in [0.25, 0.50, 0.75, 1.0]:

232 raise ValueError(

233 "If imagenet weights are being loaded, "

234 "alpha can be one of"

235 "`0.25`, `0.50`, `0.75` or `1.0` only. "

236 f"Received alpha={alpha}"

237 )

238

239 if rows != cols or rows not in [128, 160, 192, 224]:

240 rows = 224

241 logging.warning(

242 "`input_shape` is undefined or non-square, "

243 "or `rows` is not in [128, 160, 192, 224]. "

244 "Weights for input shape (224, 224) will be "

245 "loaded as the default."

246 )

247

248 if input_tensor is None:

249 img_input = layers.Input(shape=input_shape)

250 else:

251 if not backend.is_keras_tensor(input_tensor):

252 img_input = layers.Input(tensor=input_tensor, shape=input_shape)

253 else:

254 img_input = input_tensor

255

256 x = _conv_block(img_input, 32, alpha, strides=(2, 2))

257 x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)

258

259 x = _depthwise_conv_block(

260 x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2

261 )

262 x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)

263

264 x = _depthwise_conv_block(

265 x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4

266 )

267 x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)

268

269 x = _depthwise_conv_block(

270 x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6

271 )

272 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)

273 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)

274 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)

275 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)

276 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)

277

278 x = _depthwise_conv_block(

279 x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12

280 )

281 x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)

282

283 if include_top:

284 x = layers.GlobalAveragePooling2D(keepdims=True)(x)

285 x = layers.Dropout(dropout, name="dropout")(x)

286 x = layers.Conv2D(classes, (1, 1), padding="same", name="conv_preds")(x)

287 x = layers.Reshape((classes,), name="reshape_2")(x)

288 imagenet_utils.validate_activation(classifier_activation, weights)

289 x = layers.Activation(

290 activation=classifier_activation, name="predictions"

291 )(x)

292 else:

293 if pooling == "avg":

294 x = layers.GlobalAveragePooling2D()(x)

295 elif pooling == "max":

296 x = layers.GlobalMaxPooling2D()(x)

297

298 # Ensure that the model takes into account

299 # any potential predecessors of `input_tensor`.

300 if input_tensor is not None:

301 inputs = layer_utils.get_source_inputs(input_tensor)

302 else:

303 inputs = img_input

304

305 # Create model.

306 model = training.Model(inputs, x, name=f"mobilenet_{alpha:0.2f}_{rows}")

307

308 # Load weights.

309 if weights == "imagenet":

310 if alpha == 1.0:

311 alpha_text = "1_0"

312 elif alpha == 0.75:

313 alpha_text = "7_5"

314 elif alpha == 0.50:

315 alpha_text = "5_0"

316 else:

317 alpha_text = "2_5"

318

319 if include_top:

320 model_name = "mobilenet_%s_%d_tf.h5" % (alpha_text, rows)

321 weight_path = BASE_WEIGHT_PATH + model_name

322 weights_path = data_utils.get_file(

323 model_name, weight_path, cache_subdir="models"

324 )

325 else:

326 model_name = "mobilenet_%s_%d_tf_no_top.h5" % (alpha_text, rows)

327 weight_path = BASE_WEIGHT_PATH + model_name

328 weights_path = data_utils.get_file(

329 model_name, weight_path, cache_subdir="models"

330 )

331 model.load_weights(weights_path)

332 elif weights is not None:

333 model.load_weights(weights)

334

335 return model

336

337

338def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):

339 """Adds an initial convolution layer (with batch normalization and relu6).

340

341 Args:

342 inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last`

343 data format) or (3, rows, cols) (with `channels_first` data format).

344 It should have exactly 3 inputs channels, and width and height should

345 be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value.

346 filters: Integer, the dimensionality of the output space (i.e. the

347 number of output filters in the convolution).

348 alpha: controls the width of the network. - If `alpha` < 1.0,

349 proportionally decreases the number of filters in each layer. - If

350 `alpha` > 1.0, proportionally increases the number of filters in each

351 layer. - If `alpha` = 1, default number of filters from the paper are

352 used at each layer.

353 kernel: An integer or tuple/list of 2 integers, specifying the width and

354 height of the 2D convolution window. Can be a single integer to

355 specify the same value for all spatial dimensions.

356 strides: An integer or tuple/list of 2 integers, specifying the strides

357 of the convolution along the width and height. Can be a single integer

358 to specify the same value for all spatial dimensions. Specifying any

359 stride value != 1 is incompatible with specifying any `dilation_rate`

360 value != 1. # Input shape

361 4D tensor with shape: `(samples, channels, rows, cols)` if

362 data_format='channels_first'

363 or 4D tensor with shape: `(samples, rows, cols, channels)` if

364 data_format='channels_last'. # Output shape

365 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if

366 data_format='channels_first'

367 or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if

368 data_format='channels_last'. `rows` and `cols` values might have

369 changed due to stride.

370

371 Returns:

372 Output tensor of block.

373 """

374 channel_axis = 1 if backend.image_data_format() == "channels_first" else -1

375 filters = int(filters * alpha)

376 x = layers.Conv2D(

377 filters,

378 kernel,

379 padding="same",

380 use_bias=False,

381 strides=strides,

382 name="conv1",

383 )(inputs)

384 x = layers.BatchNormalization(axis=channel_axis, name="conv1_bn")(x)

385 return layers.ReLU(6.0, name="conv1_relu")(x)

386

387

388def _depthwise_conv_block(

389 inputs,

390 pointwise_conv_filters,

391 alpha,

392 depth_multiplier=1,

393 strides=(1, 1),

394 block_id=1,

395):

396 """Adds a depthwise convolution block.

397

398 A depthwise convolution block consists of a depthwise conv,

399 batch normalization, relu6, pointwise convolution,

400 batch normalization and relu6 activation.

401

402 Args:

403 inputs: Input tensor of shape `(rows, cols, channels)` (with

404 `channels_last` data format) or (channels, rows, cols) (with

405 `channels_first` data format).

406 pointwise_conv_filters: Integer, the dimensionality of the output space

407 (i.e. the number of output filters in the pointwise convolution).

408 alpha: controls the width of the network. - If `alpha` < 1.0,

409 proportionally decreases the number of filters in each layer. - If

410 `alpha` > 1.0, proportionally increases the number of filters in each

411 layer. - If `alpha` = 1, default number of filters from the paper are

412 used at each layer.

413 depth_multiplier: The number of depthwise convolution output channels

414 for each input channel. The total number of depthwise convolution

415 output channels will be equal to `filters_in * depth_multiplier`.

416 strides: An integer or tuple/list of 2 integers, specifying the strides

417 of the convolution along the width and height. Can be a single integer

418 to specify the same value for all spatial dimensions. Specifying any

419 stride value != 1 is incompatible with specifying any `dilation_rate`

420 value != 1.

421 block_id: Integer, a unique identification designating the block number.

422 # Input shape

423 4D tensor with shape: `(batch, channels, rows, cols)` if

424 data_format='channels_first'

425 or 4D tensor with shape: `(batch, rows, cols, channels)` if

426 data_format='channels_last'. # Output shape

427 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if

428 data_format='channels_first'

429 or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if

430 data_format='channels_last'. `rows` and `cols` values might have

431 changed due to stride.

432

433 Returns:

434 Output tensor of block.

435 """

436 channel_axis = 1 if backend.image_data_format() == "channels_first" else -1

437 pointwise_conv_filters = int(pointwise_conv_filters * alpha)

438

439 if strides == (1, 1):

440 x = inputs

441 else:

442 x = layers.ZeroPadding2D(

443 ((0, 1), (0, 1)), name="conv_pad_%d" % block_id

444 )(inputs)

445 x = layers.DepthwiseConv2D(

446 (3, 3),

447 padding="same" if strides == (1, 1) else "valid",

448 depth_multiplier=depth_multiplier,

449 strides=strides,

450 use_bias=False,

451 name="conv_dw_%d" % block_id,

452 )(x)

453 x = layers.BatchNormalization(

454 axis=channel_axis, name="conv_dw_%d_bn" % block_id

455 )(x)

456 x = layers.ReLU(6.0, name="conv_dw_%d_relu" % block_id)(x)

457

458 x = layers.Conv2D(

459 pointwise_conv_filters,

460 (1, 1),

461 padding="same",

462 use_bias=False,

463 strides=(1, 1),

464 name="conv_pw_%d" % block_id,

465 )(x)

466 x = layers.BatchNormalization(

467 axis=channel_axis, name="conv_pw_%d_bn" % block_id

468 )(x)

469 return layers.ReLU(6.0, name="conv_pw_%d_relu" % block_id)(x)

470

471

472@keras_export("keras.applications.mobilenet.preprocess_input")

473def preprocess_input(x, data_format=None):

474 return imagenet_utils.preprocess_input(

475 x, data_format=data_format, mode="tf"

476 )

477

478

479@keras_export("keras.applications.mobilenet.decode_predictions")

480def decode_predictions(preds, top=5):

481 return imagenet_utils.decode_predictions(preds, top=top)

482

483

484preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format(

485 mode="",

486 ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF,

487 error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC,

488)

489decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__

490