Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/applications/mobilenet.py: 17%

126 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15 

16"""MobileNet v1 models for Keras. 

17 

18MobileNet is a general architecture and can be used for multiple use cases. 

19Depending on the use case, it can use different input layer size and 

20different width factors. This allows different width models to reduce 

21the number of multiply-adds and thereby 

22reduce inference cost on mobile devices. 

23 

24MobileNets support any input size greater than 32 x 32, with larger image sizes 

25offering better performance. 

26The number of parameters and number of multiply-adds 

27can be modified by using the `alpha` parameter, 

28which increases/decreases the number of filters in each layer. 

29By altering the image size and `alpha` parameter, 

30all 16 models from the paper can be built, with ImageNet weights provided. 

31 

32The paper demonstrates the performance of MobileNets using `alpha` values of 

331.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25. 

34For each of these `alpha` values, weights for 4 different input image sizes 

35are provided (224, 192, 160, 128). 

36 

37The following table describes the size and accuracy of the 100% MobileNet 

38on size 224 x 224: 

39---------------------------------------------------------------------------- 

40Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M) 

41-------------------------|---------------|-------------------|-------------- 

42| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | 

43| 0.75 MobileNet-224 | 68.4 % | 325 | 2.6 | 

44| 0.50 MobileNet-224 | 63.7 % | 149 | 1.3 | 

45| 0.25 MobileNet-224 | 50.6 % | 41 | 0.5 | 

46 

47The following table describes the performance of 

48the 100 % MobileNet on various input sizes: 

49------------------------------------------------------------------------ 

50Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) 

51----------------------|---------------|-------------------|---------------- 

52| 1.0 MobileNet-224 | 70.6 % | 569 | 4.2 | 

53| 1.0 MobileNet-192 | 69.1 % | 418 | 4.2 | 

54| 1.0 MobileNet-160 | 67.2 % | 290 | 4.2 | 

55| 1.0 MobileNet-128 | 64.4 % | 186 | 4.2 | 

56 

57Reference: 

58 - [MobileNets: Efficient Convolutional Neural Networks 

59 for Mobile Vision Applications]( 

60 https://arxiv.org/abs/1704.04861) 

61""" 

62 

63import tensorflow.compat.v2 as tf 

64 

65from keras.src import backend 

66from keras.src.applications import imagenet_utils 

67from keras.src.engine import training 

68from keras.src.layers import VersionAwareLayers 

69from keras.src.utils import data_utils 

70from keras.src.utils import layer_utils 

71 

72# isort: off 

73from tensorflow.python.platform import tf_logging as logging 

74from tensorflow.python.util.tf_export import keras_export 

75 

76BASE_WEIGHT_PATH = ( 

77 "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/" 

78) 

79layers = None 

80 

81 

82@keras_export( 

83 "keras.applications.mobilenet.MobileNet", "keras.applications.MobileNet" 

84) 

85def MobileNet( 

86 input_shape=None, 

87 alpha=1.0, 

88 depth_multiplier=1, 

89 dropout=1e-3, 

90 include_top=True, 

91 weights="imagenet", 

92 input_tensor=None, 

93 pooling=None, 

94 classes=1000, 

95 classifier_activation="softmax", 

96 **kwargs, 

97): 

98 """Instantiates the MobileNet architecture. 

99 

100 Reference: 

101 - [MobileNets: Efficient Convolutional Neural Networks 

102 for Mobile Vision Applications]( 

103 https://arxiv.org/abs/1704.04861) 

104 

105 This function returns a Keras image classification model, 

106 optionally loaded with weights pre-trained on ImageNet. 

107 

108 For image classification use cases, see 

109 [this page for detailed examples]( 

110 https://keras.io/api/applications/#usage-examples-for-image-classification-models). 

111 

112 For transfer learning use cases, make sure to read the 

113 [guide to transfer learning & fine-tuning]( 

114 https://keras.io/guides/transfer_learning/). 

115 

116 Note: each Keras Application expects a specific kind of input preprocessing. 

117 For MobileNet, call `tf.keras.applications.mobilenet.preprocess_input` 

118 on your inputs before passing them to the model. 

119 `mobilenet.preprocess_input` will scale input pixels between -1 and 1. 

120 

121 Args: 

122 input_shape: Optional shape tuple, only to be specified if `include_top` 

123 is False (otherwise the input shape has to be `(224, 224, 3)` (with 

124 `channels_last` data format) or (3, 224, 224) (with `channels_first` 

125 data format). It should have exactly 3 inputs channels, and width and 

126 height should be no smaller than 32. E.g. `(200, 200, 3)` would be one 

127 valid value. Defaults to `None`. 

128 `input_shape` will be ignored if the `input_tensor` is provided. 

129 alpha: Controls the width of the network. This is known as the width 

130 multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally 

131 decreases the number of filters in each layer. - If `alpha` > 1.0, 

132 proportionally increases the number of filters in each layer. - If 

133 `alpha` = 1, default number of filters from the paper are used at each 

134 layer. Defaults to `1.0`. 

135 depth_multiplier: Depth multiplier for depthwise convolution. This is 

136 called the resolution multiplier in the MobileNet paper. 

137 Defaults to `1.0`. 

138 dropout: Dropout rate. Defaults to `0.001`. 

139 include_top: Boolean, whether to include the fully-connected layer at the 

140 top of the network. Defaults to `True`. 

141 weights: One of `None` (random initialization), 'imagenet' (pre-training 

142 on ImageNet), or the path to the weights file to be loaded. Defaults to 

143 `imagenet`. 

144 input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to 

145 use as image input for the model. `input_tensor` is useful for sharing 

146 inputs between multiple different networks. Defaults to `None`. 

147 pooling: Optional pooling mode for feature extraction when `include_top` 

148 is `False`. 

149 - `None` (default) means that the output of the model will be 

150 the 4D tensor output of the last convolutional block. 

151 - `avg` means that global average pooling 

152 will be applied to the output of the 

153 last convolutional block, and thus 

154 the output of the model will be a 2D tensor. 

155 - `max` means that global max pooling will be applied. 

156 classes: Optional number of classes to classify images into, only to be 

157 specified if `include_top` is True, and if no `weights` argument is 

158 specified. Defaults to `1000`. 

159 classifier_activation: A `str` or callable. The activation function to use 

160 on the "top" layer. Ignored unless `include_top=True`. Set 

161 `classifier_activation=None` to return the logits of the "top" layer. 

162 When loading pretrained weights, `classifier_activation` can only 

163 be `None` or `"softmax"`. 

164 **kwargs: For backwards compatibility only. 

165 Returns: 

166 A `keras.Model` instance. 

167 """ 

168 global layers 

169 if "layers" in kwargs: 

170 layers = kwargs.pop("layers") 

171 else: 

172 layers = VersionAwareLayers() 

173 if kwargs: 

174 raise ValueError(f"Unknown argument(s): {(kwargs,)}") 

175 if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): 

176 raise ValueError( 

177 "The `weights` argument should be either " 

178 "`None` (random initialization), `imagenet` " 

179 "(pre-training on ImageNet), " 

180 "or the path to the weights file to be loaded. " 

181 f"Received weights={weights}" 

182 ) 

183 

184 if weights == "imagenet" and include_top and classes != 1000: 

185 raise ValueError( 

186 'If using `weights` as `"imagenet"` with `include_top` ' 

187 "as true, `classes` should be 1000. " 

188 f"Received classes={classes}" 

189 ) 

190 

191 # Determine proper input shape and default size. 

192 if input_shape is None: 

193 default_size = 224 

194 else: 

195 if backend.image_data_format() == "channels_first": 

196 rows = input_shape[1] 

197 cols = input_shape[2] 

198 else: 

199 rows = input_shape[0] 

200 cols = input_shape[1] 

201 

202 if rows == cols and rows in [128, 160, 192, 224]: 

203 default_size = rows 

204 else: 

205 default_size = 224 

206 

207 input_shape = imagenet_utils.obtain_input_shape( 

208 input_shape, 

209 default_size=default_size, 

210 min_size=32, 

211 data_format=backend.image_data_format(), 

212 require_flatten=include_top, 

213 weights=weights, 

214 ) 

215 

216 if backend.image_data_format() == "channels_last": 

217 row_axis, col_axis = (0, 1) 

218 else: 

219 row_axis, col_axis = (1, 2) 

220 rows = input_shape[row_axis] 

221 cols = input_shape[col_axis] 

222 

223 if weights == "imagenet": 

224 if depth_multiplier != 1: 

225 raise ValueError( 

226 "If imagenet weights are being loaded, " 

227 "depth multiplier must be 1. " 

228 f"Received depth_multiplier={depth_multiplier}" 

229 ) 

230 

231 if alpha not in [0.25, 0.50, 0.75, 1.0]: 

232 raise ValueError( 

233 "If imagenet weights are being loaded, " 

234 "alpha can be one of" 

235 "`0.25`, `0.50`, `0.75` or `1.0` only. " 

236 f"Received alpha={alpha}" 

237 ) 

238 

239 if rows != cols or rows not in [128, 160, 192, 224]: 

240 rows = 224 

241 logging.warning( 

242 "`input_shape` is undefined or non-square, " 

243 "or `rows` is not in [128, 160, 192, 224]. " 

244 "Weights for input shape (224, 224) will be " 

245 "loaded as the default." 

246 ) 

247 

248 if input_tensor is None: 

249 img_input = layers.Input(shape=input_shape) 

250 else: 

251 if not backend.is_keras_tensor(input_tensor): 

252 img_input = layers.Input(tensor=input_tensor, shape=input_shape) 

253 else: 

254 img_input = input_tensor 

255 

256 x = _conv_block(img_input, 32, alpha, strides=(2, 2)) 

257 x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) 

258 

259 x = _depthwise_conv_block( 

260 x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2 

261 ) 

262 x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) 

263 

264 x = _depthwise_conv_block( 

265 x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4 

266 ) 

267 x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) 

268 

269 x = _depthwise_conv_block( 

270 x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6 

271 ) 

272 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) 

273 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) 

274 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) 

275 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) 

276 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) 

277 

278 x = _depthwise_conv_block( 

279 x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12 

280 ) 

281 x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) 

282 

283 if include_top: 

284 x = layers.GlobalAveragePooling2D(keepdims=True)(x) 

285 x = layers.Dropout(dropout, name="dropout")(x) 

286 x = layers.Conv2D(classes, (1, 1), padding="same", name="conv_preds")(x) 

287 x = layers.Reshape((classes,), name="reshape_2")(x) 

288 imagenet_utils.validate_activation(classifier_activation, weights) 

289 x = layers.Activation( 

290 activation=classifier_activation, name="predictions" 

291 )(x) 

292 else: 

293 if pooling == "avg": 

294 x = layers.GlobalAveragePooling2D()(x) 

295 elif pooling == "max": 

296 x = layers.GlobalMaxPooling2D()(x) 

297 

298 # Ensure that the model takes into account 

299 # any potential predecessors of `input_tensor`. 

300 if input_tensor is not None: 

301 inputs = layer_utils.get_source_inputs(input_tensor) 

302 else: 

303 inputs = img_input 

304 

305 # Create model. 

306 model = training.Model(inputs, x, name=f"mobilenet_{alpha:0.2f}_{rows}") 

307 

308 # Load weights. 

309 if weights == "imagenet": 

310 if alpha == 1.0: 

311 alpha_text = "1_0" 

312 elif alpha == 0.75: 

313 alpha_text = "7_5" 

314 elif alpha == 0.50: 

315 alpha_text = "5_0" 

316 else: 

317 alpha_text = "2_5" 

318 

319 if include_top: 

320 model_name = "mobilenet_%s_%d_tf.h5" % (alpha_text, rows) 

321 weight_path = BASE_WEIGHT_PATH + model_name 

322 weights_path = data_utils.get_file( 

323 model_name, weight_path, cache_subdir="models" 

324 ) 

325 else: 

326 model_name = "mobilenet_%s_%d_tf_no_top.h5" % (alpha_text, rows) 

327 weight_path = BASE_WEIGHT_PATH + model_name 

328 weights_path = data_utils.get_file( 

329 model_name, weight_path, cache_subdir="models" 

330 ) 

331 model.load_weights(weights_path) 

332 elif weights is not None: 

333 model.load_weights(weights) 

334 

335 return model 

336 

337 

338def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): 

339 """Adds an initial convolution layer (with batch normalization and relu6). 

340 

341 Args: 

342 inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last` 

343 data format) or (3, rows, cols) (with `channels_first` data format). 

344 It should have exactly 3 inputs channels, and width and height should 

345 be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. 

346 filters: Integer, the dimensionality of the output space (i.e. the 

347 number of output filters in the convolution). 

348 alpha: controls the width of the network. - If `alpha` < 1.0, 

349 proportionally decreases the number of filters in each layer. - If 

350 `alpha` > 1.0, proportionally increases the number of filters in each 

351 layer. - If `alpha` = 1, default number of filters from the paper are 

352 used at each layer. 

353 kernel: An integer or tuple/list of 2 integers, specifying the width and 

354 height of the 2D convolution window. Can be a single integer to 

355 specify the same value for all spatial dimensions. 

356 strides: An integer or tuple/list of 2 integers, specifying the strides 

357 of the convolution along the width and height. Can be a single integer 

358 to specify the same value for all spatial dimensions. Specifying any 

359 stride value != 1 is incompatible with specifying any `dilation_rate` 

360 value != 1. # Input shape 

361 4D tensor with shape: `(samples, channels, rows, cols)` if 

362 data_format='channels_first' 

363 or 4D tensor with shape: `(samples, rows, cols, channels)` if 

364 data_format='channels_last'. # Output shape 

365 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if 

366 data_format='channels_first' 

367 or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if 

368 data_format='channels_last'. `rows` and `cols` values might have 

369 changed due to stride. 

370 

371 Returns: 

372 Output tensor of block. 

373 """ 

374 channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 

375 filters = int(filters * alpha) 

376 x = layers.Conv2D( 

377 filters, 

378 kernel, 

379 padding="same", 

380 use_bias=False, 

381 strides=strides, 

382 name="conv1", 

383 )(inputs) 

384 x = layers.BatchNormalization(axis=channel_axis, name="conv1_bn")(x) 

385 return layers.ReLU(6.0, name="conv1_relu")(x) 

386 

387 

388def _depthwise_conv_block( 

389 inputs, 

390 pointwise_conv_filters, 

391 alpha, 

392 depth_multiplier=1, 

393 strides=(1, 1), 

394 block_id=1, 

395): 

396 """Adds a depthwise convolution block. 

397 

398 A depthwise convolution block consists of a depthwise conv, 

399 batch normalization, relu6, pointwise convolution, 

400 batch normalization and relu6 activation. 

401 

402 Args: 

403 inputs: Input tensor of shape `(rows, cols, channels)` (with 

404 `channels_last` data format) or (channels, rows, cols) (with 

405 `channels_first` data format). 

406 pointwise_conv_filters: Integer, the dimensionality of the output space 

407 (i.e. the number of output filters in the pointwise convolution). 

408 alpha: controls the width of the network. - If `alpha` < 1.0, 

409 proportionally decreases the number of filters in each layer. - If 

410 `alpha` > 1.0, proportionally increases the number of filters in each 

411 layer. - If `alpha` = 1, default number of filters from the paper are 

412 used at each layer. 

413 depth_multiplier: The number of depthwise convolution output channels 

414 for each input channel. The total number of depthwise convolution 

415 output channels will be equal to `filters_in * depth_multiplier`. 

416 strides: An integer or tuple/list of 2 integers, specifying the strides 

417 of the convolution along the width and height. Can be a single integer 

418 to specify the same value for all spatial dimensions. Specifying any 

419 stride value != 1 is incompatible with specifying any `dilation_rate` 

420 value != 1. 

421 block_id: Integer, a unique identification designating the block number. 

422 # Input shape 

423 4D tensor with shape: `(batch, channels, rows, cols)` if 

424 data_format='channels_first' 

425 or 4D tensor with shape: `(batch, rows, cols, channels)` if 

426 data_format='channels_last'. # Output shape 

427 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if 

428 data_format='channels_first' 

429 or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if 

430 data_format='channels_last'. `rows` and `cols` values might have 

431 changed due to stride. 

432 

433 Returns: 

434 Output tensor of block. 

435 """ 

436 channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 

437 pointwise_conv_filters = int(pointwise_conv_filters * alpha) 

438 

439 if strides == (1, 1): 

440 x = inputs 

441 else: 

442 x = layers.ZeroPadding2D( 

443 ((0, 1), (0, 1)), name="conv_pad_%d" % block_id 

444 )(inputs) 

445 x = layers.DepthwiseConv2D( 

446 (3, 3), 

447 padding="same" if strides == (1, 1) else "valid", 

448 depth_multiplier=depth_multiplier, 

449 strides=strides, 

450 use_bias=False, 

451 name="conv_dw_%d" % block_id, 

452 )(x) 

453 x = layers.BatchNormalization( 

454 axis=channel_axis, name="conv_dw_%d_bn" % block_id 

455 )(x) 

456 x = layers.ReLU(6.0, name="conv_dw_%d_relu" % block_id)(x) 

457 

458 x = layers.Conv2D( 

459 pointwise_conv_filters, 

460 (1, 1), 

461 padding="same", 

462 use_bias=False, 

463 strides=(1, 1), 

464 name="conv_pw_%d" % block_id, 

465 )(x) 

466 x = layers.BatchNormalization( 

467 axis=channel_axis, name="conv_pw_%d_bn" % block_id 

468 )(x) 

469 return layers.ReLU(6.0, name="conv_pw_%d_relu" % block_id)(x) 

470 

471 

472@keras_export("keras.applications.mobilenet.preprocess_input") 

473def preprocess_input(x, data_format=None): 

474 return imagenet_utils.preprocess_input( 

475 x, data_format=data_format, mode="tf" 

476 ) 

477 

478 

479@keras_export("keras.applications.mobilenet.decode_predictions") 

480def decode_predictions(preds, top=5): 

481 return imagenet_utils.decode_predictions(preds, top=top) 

482 

483 

484preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( 

485 mode="", 

486 ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, 

487 error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, 

488) 

489decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ 

490