Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/applications/mobilenet_v2.py: 13%
170 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
16"""MobileNet v2 models for Keras.
18MobileNetV2 is a general architecture and can be used for multiple use cases.
19Depending on the use case, it can use different input layer size and
20different width factors. This allows different width models to reduce
21the number of multiply-adds and thereby
22reduce inference cost on mobile devices.
24MobileNetV2 is very similar to the original MobileNet,
25except that it uses inverted residual blocks with
26bottlenecking features. It has a drastically lower
27parameter count than the original MobileNet.
28MobileNets support any input size greater
29than 32 x 32, with larger image sizes
30offering better performance.
32The number of parameters and number of multiply-adds
33can be modified by using the `alpha` parameter,
34which increases/decreases the number of filters in each layer.
35By altering the image size and `alpha` parameter,
36all 22 models from the paper can be built, with ImageNet weights provided.
38The paper demonstrates the performance of MobileNets using `alpha` values of
391.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4
40For each of these `alpha` values, weights for 5 different input image sizes
41are provided (224, 192, 160, 128, and 96).
43The following table describes the performance of
44MobileNet on various input sizes:
45------------------------------------------------------------------------
46MACs stands for Multiply Adds
47Classification Checkpoint|MACs (M)|Parameters (M)|Top 1 Accuracy|Top 5 Accuracy
48--------------------------|------------|---------------|---------|------------
49| [mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 |
50| [mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 |
51| [mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 |
52| [mobilenet_v2_1.0_192] | 221 | 3.47 | 70.7 | 90.1 |
53| [mobilenet_v2_1.0_160] | 154 | 3.47 | 68.8 | 89.0 |
54| [mobilenet_v2_1.0_128] | 99 | 3.47 | 65.3 | 86.9 |
55| [mobilenet_v2_1.0_96] | 56 | 3.47 | 60.3 | 83.2 |
56| [mobilenet_v2_0.75_224] | 209 | 2.61 | 69.8 | 89.6 |
57| [mobilenet_v2_0.75_192] | 153 | 2.61 | 68.7 | 88.9 |
58| [mobilenet_v2_0.75_160] | 107 | 2.61 | 66.4 | 87.3 |
59| [mobilenet_v2_0.75_128] | 69 | 2.61 | 63.2 | 85.3 |
60| [mobilenet_v2_0.75_96] | 39 | 2.61 | 58.8 | 81.6 |
61| [mobilenet_v2_0.5_224] | 97 | 1.95 | 65.4 | 86.4 |
62| [mobilenet_v2_0.5_192] | 71 | 1.95 | 63.9 | 85.4 |
63| [mobilenet_v2_0.5_160] | 50 | 1.95 | 61.0 | 83.2 |
64| [mobilenet_v2_0.5_128] | 32 | 1.95 | 57.7 | 80.8 |
65| [mobilenet_v2_0.5_96] | 18 | 1.95 | 51.2 | 75.8 |
66| [mobilenet_v2_0.35_224] | 59 | 1.66 | 60.3 | 82.9 |
67| [mobilenet_v2_0.35_192] | 43 | 1.66 | 58.2 | 81.2 |
68| [mobilenet_v2_0.35_160] | 30 | 1.66 | 55.7 | 79.1 |
69| [mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 |
70| [mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 |
72 Reference:
73 - [MobileNetV2: Inverted Residuals and Linear Bottlenecks](
74 https://arxiv.org/abs/1801.04381) (CVPR 2018)
75"""
77import tensorflow.compat.v2 as tf
79from keras.src import backend
80from keras.src.applications import imagenet_utils
81from keras.src.engine import training
82from keras.src.layers import VersionAwareLayers
83from keras.src.utils import data_utils
84from keras.src.utils import layer_utils
86# isort: off
87from tensorflow.python.platform import tf_logging as logging
88from tensorflow.python.util.tf_export import keras_export
90BASE_WEIGHT_PATH = (
91 "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/"
92)
93layers = None
96@keras_export(
97 "keras.applications.mobilenet_v2.MobileNetV2",
98 "keras.applications.MobileNetV2",
99)
100def MobileNetV2(
101 input_shape=None,
102 alpha=1.0,
103 include_top=True,
104 weights="imagenet",
105 input_tensor=None,
106 pooling=None,
107 classes=1000,
108 classifier_activation="softmax",
109 **kwargs,
110):
111 """Instantiates the MobileNetV2 architecture.
113 MobileNetV2 is very similar to the original MobileNet,
114 except that it uses inverted residual blocks with
115 bottlenecking features. It has a drastically lower
116 parameter count than the original MobileNet.
117 MobileNets support any input size greater
118 than 32 x 32, with larger image sizes
119 offering better performance.
121 Reference:
122 - [MobileNetV2: Inverted Residuals and Linear Bottlenecks](
123 https://arxiv.org/abs/1801.04381) (CVPR 2018)
125 This function returns a Keras image classification model,
126 optionally loaded with weights pre-trained on ImageNet.
128 For image classification use cases, see
129 [this page for detailed examples](
130 https://keras.io/api/applications/#usage-examples-for-image-classification-models).
132 For transfer learning use cases, make sure to read the
133 [guide to transfer learning & fine-tuning](
134 https://keras.io/guides/transfer_learning/).
136 Note: each Keras Application expects a specific kind of input preprocessing.
137 For MobileNetV2, call `tf.keras.applications.mobilenet_v2.preprocess_input`
138 on your inputs before passing them to the model.
139 `mobilenet_v2.preprocess_input` will scale input pixels between -1 and 1.
141 Args:
142 input_shape: Optional shape tuple, to be specified if you would
143 like to use a model with an input image resolution that is not
144 (224, 224, 3).
145 It should have exactly 3 inputs channels (224, 224, 3).
146 You can also omit this option if you would like
147 to infer input_shape from an input_tensor.
148 If you choose to include both input_tensor and input_shape then
149 input_shape will be used if they match, if the shapes
150 do not match then we will throw an error.
151 E.g. `(160, 160, 3)` would be one valid value.
152 alpha: Float, larger than zero, controls the width of the network. This is
153 known as the width multiplier in the MobileNetV2 paper, but the name is
154 kept for consistency with `applications.MobileNetV1` model in Keras.
155 - If `alpha` < 1.0, proportionally decreases the number
156 of filters in each layer.
157 - If `alpha` > 1.0, proportionally increases the number
158 of filters in each layer.
159 - If `alpha` = 1.0, default number of filters from the paper
160 are used at each layer.
161 include_top: Boolean, whether to include the fully-connected layer at the
162 top of the network. Defaults to `True`.
163 weights: String, one of `None` (random initialization), 'imagenet'
164 (pre-training on ImageNet), or the path to the weights file to be
165 loaded.
166 input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`)
167 to use as image input for the model.
168 pooling: String, optional pooling mode for feature extraction when
169 `include_top` is `False`.
170 - `None` means that the output of the model
171 will be the 4D tensor output of the
172 last convolutional block.
173 - `avg` means that global average pooling
174 will be applied to the output of the
175 last convolutional block, and thus
176 the output of the model will be a
177 2D tensor.
178 - `max` means that global max pooling will
179 be applied.
180 classes: Optional integer number of classes to classify images into, only
181 to be specified if `include_top` is True, and if no `weights` argument
182 is specified.
183 classifier_activation: A `str` or callable. The activation function to use
184 on the "top" layer. Ignored unless `include_top=True`. Set
185 `classifier_activation=None` to return the logits of the "top" layer.
186 When loading pretrained weights, `classifier_activation` can only
187 be `None` or `"softmax"`.
188 **kwargs: For backwards compatibility only.
190 Returns:
191 A `keras.Model` instance.
192 """
193 global layers
194 if "layers" in kwargs:
195 layers = kwargs.pop("layers")
196 else:
197 layers = VersionAwareLayers()
198 if kwargs:
199 raise ValueError(f"Unknown argument(s): {kwargs}")
200 if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)):
201 raise ValueError(
202 "The `weights` argument should be either "
203 "`None` (random initialization), `imagenet` "
204 "(pre-training on ImageNet), "
205 "or the path to the weights file to be loaded. "
206 f"Received `weights={weights}`"
207 )
209 if weights == "imagenet" and include_top and classes != 1000:
210 raise ValueError(
211 'If using `weights` as `"imagenet"` with `include_top` '
212 f"as true, `classes` should be 1000. Received `classes={classes}`"
213 )
215 # Determine proper input shape and default size.
216 # If both input_shape and input_tensor are used, they should match
217 if input_shape is not None and input_tensor is not None:
218 try:
219 is_input_t_tensor = backend.is_keras_tensor(input_tensor)
220 except ValueError:
221 try:
222 is_input_t_tensor = backend.is_keras_tensor(
223 layer_utils.get_source_inputs(input_tensor)
224 )
225 except ValueError:
226 raise ValueError(
227 f"input_tensor: {input_tensor}"
228 "is not type input_tensor. "
229 f"Received `type(input_tensor)={type(input_tensor)}`"
230 )
231 if is_input_t_tensor:
232 if backend.image_data_format() == "channels_first":
233 if backend.int_shape(input_tensor)[1] != input_shape[1]:
234 raise ValueError(
235 "input_shape[1] must equal shape(input_tensor)[1] "
236 "when `image_data_format` is `channels_first`; "
237 "Received `input_tensor.shape="
238 f"{input_tensor.shape}`"
239 f", `input_shape={input_shape}`"
240 )
241 else:
242 if backend.int_shape(input_tensor)[2] != input_shape[1]:
243 raise ValueError(
244 "input_tensor.shape[2] must equal input_shape[1]; "
245 "Received `input_tensor.shape="
246 f"{input_tensor.shape}`, "
247 f"`input_shape={input_shape}`"
248 )
249 else:
250 raise ValueError(
251 "input_tensor is not a Keras tensor; "
252 f"Received `input_tensor={input_tensor}`"
253 )
255 # If input_shape is None, infer shape from input_tensor.
256 if input_shape is None and input_tensor is not None:
258 try:
259 backend.is_keras_tensor(input_tensor)
260 except ValueError:
261 raise ValueError(
262 "input_tensor must be a valid Keras tensor type; "
263 f"Received {input_tensor} of type {type(input_tensor)}"
264 )
266 if input_shape is None and not backend.is_keras_tensor(input_tensor):
267 default_size = 224
268 elif input_shape is None and backend.is_keras_tensor(input_tensor):
269 if backend.image_data_format() == "channels_first":
270 rows = backend.int_shape(input_tensor)[2]
271 cols = backend.int_shape(input_tensor)[3]
272 else:
273 rows = backend.int_shape(input_tensor)[1]
274 cols = backend.int_shape(input_tensor)[2]
276 if rows == cols and rows in [96, 128, 160, 192, 224]:
277 default_size = rows
278 else:
279 default_size = 224
281 # If input_shape is None and no input_tensor
282 elif input_shape is None:
283 default_size = 224
285 # If input_shape is not None, assume default size.
286 else:
287 if backend.image_data_format() == "channels_first":
288 rows = input_shape[1]
289 cols = input_shape[2]
290 else:
291 rows = input_shape[0]
292 cols = input_shape[1]
294 if rows == cols and rows in [96, 128, 160, 192, 224]:
295 default_size = rows
296 else:
297 default_size = 224
299 input_shape = imagenet_utils.obtain_input_shape(
300 input_shape,
301 default_size=default_size,
302 min_size=32,
303 data_format=backend.image_data_format(),
304 require_flatten=include_top,
305 weights=weights,
306 )
308 if backend.image_data_format() == "channels_last":
309 row_axis, col_axis = (0, 1)
310 else:
311 row_axis, col_axis = (1, 2)
312 rows = input_shape[row_axis]
313 cols = input_shape[col_axis]
315 if weights == "imagenet":
316 if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
317 raise ValueError(
318 "If imagenet weights are being loaded, "
319 "alpha must be one of `0.35`, `0.50`, `0.75`, "
320 "`1.0`, `1.3` or `1.4` only;"
321 f" Received `alpha={alpha}`"
322 )
324 if rows != cols or rows not in [96, 128, 160, 192, 224]:
325 rows = 224
326 logging.warning(
327 "`input_shape` is undefined or non-square, "
328 "or `rows` is not in [96, 128, 160, 192, 224]. "
329 "Weights for input shape (224, 224) will be "
330 "loaded as the default."
331 )
333 if input_tensor is None:
334 img_input = layers.Input(shape=input_shape)
335 else:
336 if not backend.is_keras_tensor(input_tensor):
337 img_input = layers.Input(tensor=input_tensor, shape=input_shape)
338 else:
339 img_input = input_tensor
341 channel_axis = 1 if backend.image_data_format() == "channels_first" else -1
343 first_block_filters = _make_divisible(32 * alpha, 8)
344 x = layers.Conv2D(
345 first_block_filters,
346 kernel_size=3,
347 strides=(2, 2),
348 padding="same",
349 use_bias=False,
350 name="Conv1",
351 )(img_input)
352 x = layers.BatchNormalization(
353 axis=channel_axis, epsilon=1e-3, momentum=0.999, name="bn_Conv1"
354 )(x)
355 x = layers.ReLU(6.0, name="Conv1_relu")(x)
357 x = _inverted_res_block(
358 x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0
359 )
361 x = _inverted_res_block(
362 x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1
363 )
364 x = _inverted_res_block(
365 x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2
366 )
368 x = _inverted_res_block(
369 x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3
370 )
371 x = _inverted_res_block(
372 x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4
373 )
374 x = _inverted_res_block(
375 x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5
376 )
378 x = _inverted_res_block(
379 x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6
380 )
381 x = _inverted_res_block(
382 x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7
383 )
384 x = _inverted_res_block(
385 x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8
386 )
387 x = _inverted_res_block(
388 x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9
389 )
391 x = _inverted_res_block(
392 x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10
393 )
394 x = _inverted_res_block(
395 x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11
396 )
397 x = _inverted_res_block(
398 x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12
399 )
401 x = _inverted_res_block(
402 x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13
403 )
404 x = _inverted_res_block(
405 x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14
406 )
407 x = _inverted_res_block(
408 x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15
409 )
411 x = _inverted_res_block(
412 x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16
413 )
415 # no alpha applied to last conv as stated in the paper:
416 # if the width multiplier is greater than 1 we increase the number of output
417 # channels.
418 if alpha > 1.0:
419 last_block_filters = _make_divisible(1280 * alpha, 8)
420 else:
421 last_block_filters = 1280
423 x = layers.Conv2D(
424 last_block_filters, kernel_size=1, use_bias=False, name="Conv_1"
425 )(x)
426 x = layers.BatchNormalization(
427 axis=channel_axis, epsilon=1e-3, momentum=0.999, name="Conv_1_bn"
428 )(x)
429 x = layers.ReLU(6.0, name="out_relu")(x)
431 if include_top:
432 x = layers.GlobalAveragePooling2D()(x)
433 imagenet_utils.validate_activation(classifier_activation, weights)
434 x = layers.Dense(
435 classes, activation=classifier_activation, name="predictions"
436 )(x)
438 else:
439 if pooling == "avg":
440 x = layers.GlobalAveragePooling2D()(x)
441 elif pooling == "max":
442 x = layers.GlobalMaxPooling2D()(x)
444 # Ensure that the model takes into account any potential predecessors of
445 # `input_tensor`.
446 if input_tensor is not None:
447 inputs = layer_utils.get_source_inputs(input_tensor)
448 else:
449 inputs = img_input
451 # Create model.
452 model = training.Model(inputs, x, name=f"mobilenetv2_{alpha:0.2f}_{rows}")
454 # Load weights.
455 if weights == "imagenet":
456 if include_top:
457 model_name = (
458 "mobilenet_v2_weights_tf_dim_ordering_tf_kernels_"
459 + str(float(alpha))
460 + "_"
461 + str(rows)
462 + ".h5"
463 )
464 weight_path = BASE_WEIGHT_PATH + model_name
465 weights_path = data_utils.get_file(
466 model_name, weight_path, cache_subdir="models"
467 )
468 else:
469 model_name = (
470 "mobilenet_v2_weights_tf_dim_ordering_tf_kernels_"
471 + str(float(alpha))
472 + "_"
473 + str(rows)
474 + "_no_top"
475 + ".h5"
476 )
477 weight_path = BASE_WEIGHT_PATH + model_name
478 weights_path = data_utils.get_file(
479 model_name, weight_path, cache_subdir="models"
480 )
481 model.load_weights(weights_path)
482 elif weights is not None:
483 model.load_weights(weights)
485 return model
488def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
489 """Inverted ResNet block."""
490 channel_axis = 1 if backend.image_data_format() == "channels_first" else -1
492 in_channels = backend.int_shape(inputs)[channel_axis]
493 pointwise_conv_filters = int(filters * alpha)
494 # Ensure the number of filters on the last 1x1 convolution is divisible by
495 # 8.
496 pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
497 x = inputs
498 prefix = f"block_{block_id}_"
500 if block_id:
501 # Expand with a pointwise 1x1 convolution.
502 x = layers.Conv2D(
503 expansion * in_channels,
504 kernel_size=1,
505 padding="same",
506 use_bias=False,
507 activation=None,
508 name=prefix + "expand",
509 )(x)
510 x = layers.BatchNormalization(
511 axis=channel_axis,
512 epsilon=1e-3,
513 momentum=0.999,
514 name=prefix + "expand_BN",
515 )(x)
516 x = layers.ReLU(6.0, name=prefix + "expand_relu")(x)
517 else:
518 prefix = "expanded_conv_"
520 # Depthwise 3x3 convolution.
521 if stride == 2:
522 x = layers.ZeroPadding2D(
523 padding=imagenet_utils.correct_pad(x, 3), name=prefix + "pad"
524 )(x)
525 x = layers.DepthwiseConv2D(
526 kernel_size=3,
527 strides=stride,
528 activation=None,
529 use_bias=False,
530 padding="same" if stride == 1 else "valid",
531 name=prefix + "depthwise",
532 )(x)
533 x = layers.BatchNormalization(
534 axis=channel_axis,
535 epsilon=1e-3,
536 momentum=0.999,
537 name=prefix + "depthwise_BN",
538 )(x)
540 x = layers.ReLU(6.0, name=prefix + "depthwise_relu")(x)
542 # Project with a pointwise 1x1 convolution.
543 x = layers.Conv2D(
544 pointwise_filters,
545 kernel_size=1,
546 padding="same",
547 use_bias=False,
548 activation=None,
549 name=prefix + "project",
550 )(x)
551 x = layers.BatchNormalization(
552 axis=channel_axis,
553 epsilon=1e-3,
554 momentum=0.999,
555 name=prefix + "project_BN",
556 )(x)
558 if in_channels == pointwise_filters and stride == 1:
559 return layers.Add(name=prefix + "add")([inputs, x])
560 return x
563def _make_divisible(v, divisor, min_value=None):
564 if min_value is None:
565 min_value = divisor
566 new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
567 # Make sure that round down does not go down by more than 10%.
568 if new_v < 0.9 * v:
569 new_v += divisor
570 return new_v
573@keras_export("keras.applications.mobilenet_v2.preprocess_input")
574def preprocess_input(x, data_format=None):
575 return imagenet_utils.preprocess_input(
576 x, data_format=data_format, mode="tf"
577 )
580@keras_export("keras.applications.mobilenet_v2.decode_predictions")
581def decode_predictions(preds, top=5):
582 return imagenet_utils.decode_predictions(preds, top=top)
585preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format(
586 mode="",
587 ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF,
588 error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC,
589)
590decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__