Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/applications/mobilenet_v2.py: 13%

170 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2018 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15 

16"""MobileNet v2 models for Keras. 

17 

18MobileNetV2 is a general architecture and can be used for multiple use cases. 

19Depending on the use case, it can use different input layer size and 

20different width factors. This allows different width models to reduce 

21the number of multiply-adds and thereby 

22reduce inference cost on mobile devices. 

23 

24MobileNetV2 is very similar to the original MobileNet, 

25except that it uses inverted residual blocks with 

26bottlenecking features. It has a drastically lower 

27parameter count than the original MobileNet. 

28MobileNets support any input size greater 

29than 32 x 32, with larger image sizes 

30offering better performance. 

31 

32The number of parameters and number of multiply-adds 

33can be modified by using the `alpha` parameter, 

34which increases/decreases the number of filters in each layer. 

35By altering the image size and `alpha` parameter, 

36all 22 models from the paper can be built, with ImageNet weights provided. 

37 

38The paper demonstrates the performance of MobileNets using `alpha` values of 

391.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4 

40For each of these `alpha` values, weights for 5 different input image sizes 

41are provided (224, 192, 160, 128, and 96). 

42 

43The following table describes the performance of 

44MobileNet on various input sizes: 

45------------------------------------------------------------------------ 

46MACs stands for Multiply Adds 

47Classification Checkpoint|MACs (M)|Parameters (M)|Top 1 Accuracy|Top 5 Accuracy 

48--------------------------|------------|---------------|---------|------------ 

49| [mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 | 

50| [mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 | 

51| [mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 | 

52| [mobilenet_v2_1.0_192] | 221 | 3.47 | 70.7 | 90.1 | 

53| [mobilenet_v2_1.0_160] | 154 | 3.47 | 68.8 | 89.0 | 

54| [mobilenet_v2_1.0_128] | 99 | 3.47 | 65.3 | 86.9 | 

55| [mobilenet_v2_1.0_96] | 56 | 3.47 | 60.3 | 83.2 | 

56| [mobilenet_v2_0.75_224] | 209 | 2.61 | 69.8 | 89.6 | 

57| [mobilenet_v2_0.75_192] | 153 | 2.61 | 68.7 | 88.9 | 

58| [mobilenet_v2_0.75_160] | 107 | 2.61 | 66.4 | 87.3 | 

59| [mobilenet_v2_0.75_128] | 69 | 2.61 | 63.2 | 85.3 | 

60| [mobilenet_v2_0.75_96] | 39 | 2.61 | 58.8 | 81.6 | 

61| [mobilenet_v2_0.5_224] | 97 | 1.95 | 65.4 | 86.4 | 

62| [mobilenet_v2_0.5_192] | 71 | 1.95 | 63.9 | 85.4 | 

63| [mobilenet_v2_0.5_160] | 50 | 1.95 | 61.0 | 83.2 | 

64| [mobilenet_v2_0.5_128] | 32 | 1.95 | 57.7 | 80.8 | 

65| [mobilenet_v2_0.5_96] | 18 | 1.95 | 51.2 | 75.8 | 

66| [mobilenet_v2_0.35_224] | 59 | 1.66 | 60.3 | 82.9 | 

67| [mobilenet_v2_0.35_192] | 43 | 1.66 | 58.2 | 81.2 | 

68| [mobilenet_v2_0.35_160] | 30 | 1.66 | 55.7 | 79.1 | 

69| [mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 | 

70| [mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 | 

71 

72 Reference: 

73 - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( 

74 https://arxiv.org/abs/1801.04381) (CVPR 2018) 

75""" 

76 

77import tensorflow.compat.v2 as tf 

78 

79from keras.src import backend 

80from keras.src.applications import imagenet_utils 

81from keras.src.engine import training 

82from keras.src.layers import VersionAwareLayers 

83from keras.src.utils import data_utils 

84from keras.src.utils import layer_utils 

85 

86# isort: off 

87from tensorflow.python.platform import tf_logging as logging 

88from tensorflow.python.util.tf_export import keras_export 

89 

90BASE_WEIGHT_PATH = ( 

91 "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/" 

92) 

93layers = None 

94 

95 

96@keras_export( 

97 "keras.applications.mobilenet_v2.MobileNetV2", 

98 "keras.applications.MobileNetV2", 

99) 

100def MobileNetV2( 

101 input_shape=None, 

102 alpha=1.0, 

103 include_top=True, 

104 weights="imagenet", 

105 input_tensor=None, 

106 pooling=None, 

107 classes=1000, 

108 classifier_activation="softmax", 

109 **kwargs, 

110): 

111 """Instantiates the MobileNetV2 architecture. 

112 

113 MobileNetV2 is very similar to the original MobileNet, 

114 except that it uses inverted residual blocks with 

115 bottlenecking features. It has a drastically lower 

116 parameter count than the original MobileNet. 

117 MobileNets support any input size greater 

118 than 32 x 32, with larger image sizes 

119 offering better performance. 

120 

121 Reference: 

122 - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( 

123 https://arxiv.org/abs/1801.04381) (CVPR 2018) 

124 

125 This function returns a Keras image classification model, 

126 optionally loaded with weights pre-trained on ImageNet. 

127 

128 For image classification use cases, see 

129 [this page for detailed examples]( 

130 https://keras.io/api/applications/#usage-examples-for-image-classification-models). 

131 

132 For transfer learning use cases, make sure to read the 

133 [guide to transfer learning & fine-tuning]( 

134 https://keras.io/guides/transfer_learning/). 

135 

136 Note: each Keras Application expects a specific kind of input preprocessing. 

137 For MobileNetV2, call `tf.keras.applications.mobilenet_v2.preprocess_input` 

138 on your inputs before passing them to the model. 

139 `mobilenet_v2.preprocess_input` will scale input pixels between -1 and 1. 

140 

141 Args: 

142 input_shape: Optional shape tuple, to be specified if you would 

143 like to use a model with an input image resolution that is not 

144 (224, 224, 3). 

145 It should have exactly 3 inputs channels (224, 224, 3). 

146 You can also omit this option if you would like 

147 to infer input_shape from an input_tensor. 

148 If you choose to include both input_tensor and input_shape then 

149 input_shape will be used if they match, if the shapes 

150 do not match then we will throw an error. 

151 E.g. `(160, 160, 3)` would be one valid value. 

152 alpha: Float, larger than zero, controls the width of the network. This is 

153 known as the width multiplier in the MobileNetV2 paper, but the name is 

154 kept for consistency with `applications.MobileNetV1` model in Keras. 

155 - If `alpha` < 1.0, proportionally decreases the number 

156 of filters in each layer. 

157 - If `alpha` > 1.0, proportionally increases the number 

158 of filters in each layer. 

159 - If `alpha` = 1.0, default number of filters from the paper 

160 are used at each layer. 

161 include_top: Boolean, whether to include the fully-connected layer at the 

162 top of the network. Defaults to `True`. 

163 weights: String, one of `None` (random initialization), 'imagenet' 

164 (pre-training on ImageNet), or the path to the weights file to be 

165 loaded. 

166 input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) 

167 to use as image input for the model. 

168 pooling: String, optional pooling mode for feature extraction when 

169 `include_top` is `False`. 

170 - `None` means that the output of the model 

171 will be the 4D tensor output of the 

172 last convolutional block. 

173 - `avg` means that global average pooling 

174 will be applied to the output of the 

175 last convolutional block, and thus 

176 the output of the model will be a 

177 2D tensor. 

178 - `max` means that global max pooling will 

179 be applied. 

180 classes: Optional integer number of classes to classify images into, only 

181 to be specified if `include_top` is True, and if no `weights` argument 

182 is specified. 

183 classifier_activation: A `str` or callable. The activation function to use 

184 on the "top" layer. Ignored unless `include_top=True`. Set 

185 `classifier_activation=None` to return the logits of the "top" layer. 

186 When loading pretrained weights, `classifier_activation` can only 

187 be `None` or `"softmax"`. 

188 **kwargs: For backwards compatibility only. 

189 

190 Returns: 

191 A `keras.Model` instance. 

192 """ 

193 global layers 

194 if "layers" in kwargs: 

195 layers = kwargs.pop("layers") 

196 else: 

197 layers = VersionAwareLayers() 

198 if kwargs: 

199 raise ValueError(f"Unknown argument(s): {kwargs}") 

200 if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): 

201 raise ValueError( 

202 "The `weights` argument should be either " 

203 "`None` (random initialization), `imagenet` " 

204 "(pre-training on ImageNet), " 

205 "or the path to the weights file to be loaded. " 

206 f"Received `weights={weights}`" 

207 ) 

208 

209 if weights == "imagenet" and include_top and classes != 1000: 

210 raise ValueError( 

211 'If using `weights` as `"imagenet"` with `include_top` ' 

212 f"as true, `classes` should be 1000. Received `classes={classes}`" 

213 ) 

214 

215 # Determine proper input shape and default size. 

216 # If both input_shape and input_tensor are used, they should match 

217 if input_shape is not None and input_tensor is not None: 

218 try: 

219 is_input_t_tensor = backend.is_keras_tensor(input_tensor) 

220 except ValueError: 

221 try: 

222 is_input_t_tensor = backend.is_keras_tensor( 

223 layer_utils.get_source_inputs(input_tensor) 

224 ) 

225 except ValueError: 

226 raise ValueError( 

227 f"input_tensor: {input_tensor}" 

228 "is not type input_tensor. " 

229 f"Received `type(input_tensor)={type(input_tensor)}`" 

230 ) 

231 if is_input_t_tensor: 

232 if backend.image_data_format() == "channels_first": 

233 if backend.int_shape(input_tensor)[1] != input_shape[1]: 

234 raise ValueError( 

235 "input_shape[1] must equal shape(input_tensor)[1] " 

236 "when `image_data_format` is `channels_first`; " 

237 "Received `input_tensor.shape=" 

238 f"{input_tensor.shape}`" 

239 f", `input_shape={input_shape}`" 

240 ) 

241 else: 

242 if backend.int_shape(input_tensor)[2] != input_shape[1]: 

243 raise ValueError( 

244 "input_tensor.shape[2] must equal input_shape[1]; " 

245 "Received `input_tensor.shape=" 

246 f"{input_tensor.shape}`, " 

247 f"`input_shape={input_shape}`" 

248 ) 

249 else: 

250 raise ValueError( 

251 "input_tensor is not a Keras tensor; " 

252 f"Received `input_tensor={input_tensor}`" 

253 ) 

254 

255 # If input_shape is None, infer shape from input_tensor. 

256 if input_shape is None and input_tensor is not None: 

257 

258 try: 

259 backend.is_keras_tensor(input_tensor) 

260 except ValueError: 

261 raise ValueError( 

262 "input_tensor must be a valid Keras tensor type; " 

263 f"Received {input_tensor} of type {type(input_tensor)}" 

264 ) 

265 

266 if input_shape is None and not backend.is_keras_tensor(input_tensor): 

267 default_size = 224 

268 elif input_shape is None and backend.is_keras_tensor(input_tensor): 

269 if backend.image_data_format() == "channels_first": 

270 rows = backend.int_shape(input_tensor)[2] 

271 cols = backend.int_shape(input_tensor)[3] 

272 else: 

273 rows = backend.int_shape(input_tensor)[1] 

274 cols = backend.int_shape(input_tensor)[2] 

275 

276 if rows == cols and rows in [96, 128, 160, 192, 224]: 

277 default_size = rows 

278 else: 

279 default_size = 224 

280 

281 # If input_shape is None and no input_tensor 

282 elif input_shape is None: 

283 default_size = 224 

284 

285 # If input_shape is not None, assume default size. 

286 else: 

287 if backend.image_data_format() == "channels_first": 

288 rows = input_shape[1] 

289 cols = input_shape[2] 

290 else: 

291 rows = input_shape[0] 

292 cols = input_shape[1] 

293 

294 if rows == cols and rows in [96, 128, 160, 192, 224]: 

295 default_size = rows 

296 else: 

297 default_size = 224 

298 

299 input_shape = imagenet_utils.obtain_input_shape( 

300 input_shape, 

301 default_size=default_size, 

302 min_size=32, 

303 data_format=backend.image_data_format(), 

304 require_flatten=include_top, 

305 weights=weights, 

306 ) 

307 

308 if backend.image_data_format() == "channels_last": 

309 row_axis, col_axis = (0, 1) 

310 else: 

311 row_axis, col_axis = (1, 2) 

312 rows = input_shape[row_axis] 

313 cols = input_shape[col_axis] 

314 

315 if weights == "imagenet": 

316 if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: 

317 raise ValueError( 

318 "If imagenet weights are being loaded, " 

319 "alpha must be one of `0.35`, `0.50`, `0.75`, " 

320 "`1.0`, `1.3` or `1.4` only;" 

321 f" Received `alpha={alpha}`" 

322 ) 

323 

324 if rows != cols or rows not in [96, 128, 160, 192, 224]: 

325 rows = 224 

326 logging.warning( 

327 "`input_shape` is undefined or non-square, " 

328 "or `rows` is not in [96, 128, 160, 192, 224]. " 

329 "Weights for input shape (224, 224) will be " 

330 "loaded as the default." 

331 ) 

332 

333 if input_tensor is None: 

334 img_input = layers.Input(shape=input_shape) 

335 else: 

336 if not backend.is_keras_tensor(input_tensor): 

337 img_input = layers.Input(tensor=input_tensor, shape=input_shape) 

338 else: 

339 img_input = input_tensor 

340 

341 channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 

342 

343 first_block_filters = _make_divisible(32 * alpha, 8) 

344 x = layers.Conv2D( 

345 first_block_filters, 

346 kernel_size=3, 

347 strides=(2, 2), 

348 padding="same", 

349 use_bias=False, 

350 name="Conv1", 

351 )(img_input) 

352 x = layers.BatchNormalization( 

353 axis=channel_axis, epsilon=1e-3, momentum=0.999, name="bn_Conv1" 

354 )(x) 

355 x = layers.ReLU(6.0, name="Conv1_relu")(x) 

356 

357 x = _inverted_res_block( 

358 x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0 

359 ) 

360 

361 x = _inverted_res_block( 

362 x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1 

363 ) 

364 x = _inverted_res_block( 

365 x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2 

366 ) 

367 

368 x = _inverted_res_block( 

369 x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3 

370 ) 

371 x = _inverted_res_block( 

372 x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4 

373 ) 

374 x = _inverted_res_block( 

375 x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5 

376 ) 

377 

378 x = _inverted_res_block( 

379 x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6 

380 ) 

381 x = _inverted_res_block( 

382 x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7 

383 ) 

384 x = _inverted_res_block( 

385 x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8 

386 ) 

387 x = _inverted_res_block( 

388 x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9 

389 ) 

390 

391 x = _inverted_res_block( 

392 x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10 

393 ) 

394 x = _inverted_res_block( 

395 x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11 

396 ) 

397 x = _inverted_res_block( 

398 x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12 

399 ) 

400 

401 x = _inverted_res_block( 

402 x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13 

403 ) 

404 x = _inverted_res_block( 

405 x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14 

406 ) 

407 x = _inverted_res_block( 

408 x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15 

409 ) 

410 

411 x = _inverted_res_block( 

412 x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16 

413 ) 

414 

415 # no alpha applied to last conv as stated in the paper: 

416 # if the width multiplier is greater than 1 we increase the number of output 

417 # channels. 

418 if alpha > 1.0: 

419 last_block_filters = _make_divisible(1280 * alpha, 8) 

420 else: 

421 last_block_filters = 1280 

422 

423 x = layers.Conv2D( 

424 last_block_filters, kernel_size=1, use_bias=False, name="Conv_1" 

425 )(x) 

426 x = layers.BatchNormalization( 

427 axis=channel_axis, epsilon=1e-3, momentum=0.999, name="Conv_1_bn" 

428 )(x) 

429 x = layers.ReLU(6.0, name="out_relu")(x) 

430 

431 if include_top: 

432 x = layers.GlobalAveragePooling2D()(x) 

433 imagenet_utils.validate_activation(classifier_activation, weights) 

434 x = layers.Dense( 

435 classes, activation=classifier_activation, name="predictions" 

436 )(x) 

437 

438 else: 

439 if pooling == "avg": 

440 x = layers.GlobalAveragePooling2D()(x) 

441 elif pooling == "max": 

442 x = layers.GlobalMaxPooling2D()(x) 

443 

444 # Ensure that the model takes into account any potential predecessors of 

445 # `input_tensor`. 

446 if input_tensor is not None: 

447 inputs = layer_utils.get_source_inputs(input_tensor) 

448 else: 

449 inputs = img_input 

450 

451 # Create model. 

452 model = training.Model(inputs, x, name=f"mobilenetv2_{alpha:0.2f}_{rows}") 

453 

454 # Load weights. 

455 if weights == "imagenet": 

456 if include_top: 

457 model_name = ( 

458 "mobilenet_v2_weights_tf_dim_ordering_tf_kernels_" 

459 + str(float(alpha)) 

460 + "_" 

461 + str(rows) 

462 + ".h5" 

463 ) 

464 weight_path = BASE_WEIGHT_PATH + model_name 

465 weights_path = data_utils.get_file( 

466 model_name, weight_path, cache_subdir="models" 

467 ) 

468 else: 

469 model_name = ( 

470 "mobilenet_v2_weights_tf_dim_ordering_tf_kernels_" 

471 + str(float(alpha)) 

472 + "_" 

473 + str(rows) 

474 + "_no_top" 

475 + ".h5" 

476 ) 

477 weight_path = BASE_WEIGHT_PATH + model_name 

478 weights_path = data_utils.get_file( 

479 model_name, weight_path, cache_subdir="models" 

480 ) 

481 model.load_weights(weights_path) 

482 elif weights is not None: 

483 model.load_weights(weights) 

484 

485 return model 

486 

487 

488def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): 

489 """Inverted ResNet block.""" 

490 channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 

491 

492 in_channels = backend.int_shape(inputs)[channel_axis] 

493 pointwise_conv_filters = int(filters * alpha) 

494 # Ensure the number of filters on the last 1x1 convolution is divisible by 

495 # 8. 

496 pointwise_filters = _make_divisible(pointwise_conv_filters, 8) 

497 x = inputs 

498 prefix = f"block_{block_id}_" 

499 

500 if block_id: 

501 # Expand with a pointwise 1x1 convolution. 

502 x = layers.Conv2D( 

503 expansion * in_channels, 

504 kernel_size=1, 

505 padding="same", 

506 use_bias=False, 

507 activation=None, 

508 name=prefix + "expand", 

509 )(x) 

510 x = layers.BatchNormalization( 

511 axis=channel_axis, 

512 epsilon=1e-3, 

513 momentum=0.999, 

514 name=prefix + "expand_BN", 

515 )(x) 

516 x = layers.ReLU(6.0, name=prefix + "expand_relu")(x) 

517 else: 

518 prefix = "expanded_conv_" 

519 

520 # Depthwise 3x3 convolution. 

521 if stride == 2: 

522 x = layers.ZeroPadding2D( 

523 padding=imagenet_utils.correct_pad(x, 3), name=prefix + "pad" 

524 )(x) 

525 x = layers.DepthwiseConv2D( 

526 kernel_size=3, 

527 strides=stride, 

528 activation=None, 

529 use_bias=False, 

530 padding="same" if stride == 1 else "valid", 

531 name=prefix + "depthwise", 

532 )(x) 

533 x = layers.BatchNormalization( 

534 axis=channel_axis, 

535 epsilon=1e-3, 

536 momentum=0.999, 

537 name=prefix + "depthwise_BN", 

538 )(x) 

539 

540 x = layers.ReLU(6.0, name=prefix + "depthwise_relu")(x) 

541 

542 # Project with a pointwise 1x1 convolution. 

543 x = layers.Conv2D( 

544 pointwise_filters, 

545 kernel_size=1, 

546 padding="same", 

547 use_bias=False, 

548 activation=None, 

549 name=prefix + "project", 

550 )(x) 

551 x = layers.BatchNormalization( 

552 axis=channel_axis, 

553 epsilon=1e-3, 

554 momentum=0.999, 

555 name=prefix + "project_BN", 

556 )(x) 

557 

558 if in_channels == pointwise_filters and stride == 1: 

559 return layers.Add(name=prefix + "add")([inputs, x]) 

560 return x 

561 

562 

563def _make_divisible(v, divisor, min_value=None): 

564 if min_value is None: 

565 min_value = divisor 

566 new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 

567 # Make sure that round down does not go down by more than 10%. 

568 if new_v < 0.9 * v: 

569 new_v += divisor 

570 return new_v 

571 

572 

573@keras_export("keras.applications.mobilenet_v2.preprocess_input") 

574def preprocess_input(x, data_format=None): 

575 return imagenet_utils.preprocess_input( 

576 x, data_format=data_format, mode="tf" 

577 ) 

578 

579 

580@keras_export("keras.applications.mobilenet_v2.decode_predictions") 

581def decode_predictions(preds, top=5): 

582 return imagenet_utils.decode_predictions(preds, top=top) 

583 

584 

585preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( 

586 mode="", 

587 ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, 

588 error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC, 

589) 

590decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ 

591