Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/applications/efficientnet_v2.py: 25%
192 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
17"""EfficientNet V2 models for Keras.
19Reference:
20- [EfficientNetV2: Smaller Models and Faster Training](
21 https://arxiv.org/abs/2104.00298) (ICML 2021)
22"""
24import copy
25import math
27import tensorflow.compat.v2 as tf
29from keras.src import backend
30from keras.src import layers
31from keras.src.applications import imagenet_utils
32from keras.src.engine import training
33from keras.src.utils import data_utils
34from keras.src.utils import layer_utils
36# isort: off
37from tensorflow.python.util.tf_export import keras_export
39BASE_WEIGHTS_PATH = "https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/" # noqa: E501
41WEIGHTS_HASHES = {
42 "b0": (
43 "21ecbf6da12460d5c40bb2f29ceb2188",
44 "893217f2bb855e2983157299931e43ff",
45 ),
46 "b1": (
47 "069f0534ff22adf035c89e2d9547a9dc",
48 "0e80663031ca32d657f9caa404b6ec37",
49 ),
50 "b2": (
51 "424e49f28180edbde1e94797771950a7",
52 "1dfe2e7a5d45b6632553a8961ea609eb",
53 ),
54 "b3": (
55 "1f1fc43bd98a6e4fd8fdfd551e02c7a0",
56 "f6abf7b5849ac99a89b50dd3fd532856",
57 ),
58 "-s": (
59 "e1d88a8495beba45748fedd0cecbe016",
60 "af0682fb74e8c54910f2d4393339c070",
61 ),
62 "-m": (
63 "a3bf6aa3276309f4fc6a34aa114c95cd",
64 "1b8dc055df72dde80d614482840fe342",
65 ),
66 "-l": (
67 "27e6d408b53c7ebc868fefa357689935",
68 "b0b66b5c863aef5b46e8608fe1711615",
69 ),
70}
72DEFAULT_BLOCKS_ARGS = {
73 "efficientnetv2-s": [
74 {
75 "kernel_size": 3,
76 "num_repeat": 2,
77 "input_filters": 24,
78 "output_filters": 24,
79 "expand_ratio": 1,
80 "se_ratio": 0.0,
81 "strides": 1,
82 "conv_type": 1,
83 },
84 {
85 "kernel_size": 3,
86 "num_repeat": 4,
87 "input_filters": 24,
88 "output_filters": 48,
89 "expand_ratio": 4,
90 "se_ratio": 0.0,
91 "strides": 2,
92 "conv_type": 1,
93 },
94 {
95 "conv_type": 1,
96 "expand_ratio": 4,
97 "input_filters": 48,
98 "kernel_size": 3,
99 "num_repeat": 4,
100 "output_filters": 64,
101 "se_ratio": 0,
102 "strides": 2,
103 },
104 {
105 "conv_type": 0,
106 "expand_ratio": 4,
107 "input_filters": 64,
108 "kernel_size": 3,
109 "num_repeat": 6,
110 "output_filters": 128,
111 "se_ratio": 0.25,
112 "strides": 2,
113 },
114 {
115 "conv_type": 0,
116 "expand_ratio": 6,
117 "input_filters": 128,
118 "kernel_size": 3,
119 "num_repeat": 9,
120 "output_filters": 160,
121 "se_ratio": 0.25,
122 "strides": 1,
123 },
124 {
125 "conv_type": 0,
126 "expand_ratio": 6,
127 "input_filters": 160,
128 "kernel_size": 3,
129 "num_repeat": 15,
130 "output_filters": 256,
131 "se_ratio": 0.25,
132 "strides": 2,
133 },
134 ],
135 "efficientnetv2-m": [
136 {
137 "kernel_size": 3,
138 "num_repeat": 3,
139 "input_filters": 24,
140 "output_filters": 24,
141 "expand_ratio": 1,
142 "se_ratio": 0,
143 "strides": 1,
144 "conv_type": 1,
145 },
146 {
147 "kernel_size": 3,
148 "num_repeat": 5,
149 "input_filters": 24,
150 "output_filters": 48,
151 "expand_ratio": 4,
152 "se_ratio": 0,
153 "strides": 2,
154 "conv_type": 1,
155 },
156 {
157 "kernel_size": 3,
158 "num_repeat": 5,
159 "input_filters": 48,
160 "output_filters": 80,
161 "expand_ratio": 4,
162 "se_ratio": 0,
163 "strides": 2,
164 "conv_type": 1,
165 },
166 {
167 "kernel_size": 3,
168 "num_repeat": 7,
169 "input_filters": 80,
170 "output_filters": 160,
171 "expand_ratio": 4,
172 "se_ratio": 0.25,
173 "strides": 2,
174 "conv_type": 0,
175 },
176 {
177 "kernel_size": 3,
178 "num_repeat": 14,
179 "input_filters": 160,
180 "output_filters": 176,
181 "expand_ratio": 6,
182 "se_ratio": 0.25,
183 "strides": 1,
184 "conv_type": 0,
185 },
186 {
187 "kernel_size": 3,
188 "num_repeat": 18,
189 "input_filters": 176,
190 "output_filters": 304,
191 "expand_ratio": 6,
192 "se_ratio": 0.25,
193 "strides": 2,
194 "conv_type": 0,
195 },
196 {
197 "kernel_size": 3,
198 "num_repeat": 5,
199 "input_filters": 304,
200 "output_filters": 512,
201 "expand_ratio": 6,
202 "se_ratio": 0.25,
203 "strides": 1,
204 "conv_type": 0,
205 },
206 ],
207 "efficientnetv2-l": [
208 {
209 "kernel_size": 3,
210 "num_repeat": 4,
211 "input_filters": 32,
212 "output_filters": 32,
213 "expand_ratio": 1,
214 "se_ratio": 0,
215 "strides": 1,
216 "conv_type": 1,
217 },
218 {
219 "kernel_size": 3,
220 "num_repeat": 7,
221 "input_filters": 32,
222 "output_filters": 64,
223 "expand_ratio": 4,
224 "se_ratio": 0,
225 "strides": 2,
226 "conv_type": 1,
227 },
228 {
229 "kernel_size": 3,
230 "num_repeat": 7,
231 "input_filters": 64,
232 "output_filters": 96,
233 "expand_ratio": 4,
234 "se_ratio": 0,
235 "strides": 2,
236 "conv_type": 1,
237 },
238 {
239 "kernel_size": 3,
240 "num_repeat": 10,
241 "input_filters": 96,
242 "output_filters": 192,
243 "expand_ratio": 4,
244 "se_ratio": 0.25,
245 "strides": 2,
246 "conv_type": 0,
247 },
248 {
249 "kernel_size": 3,
250 "num_repeat": 19,
251 "input_filters": 192,
252 "output_filters": 224,
253 "expand_ratio": 6,
254 "se_ratio": 0.25,
255 "strides": 1,
256 "conv_type": 0,
257 },
258 {
259 "kernel_size": 3,
260 "num_repeat": 25,
261 "input_filters": 224,
262 "output_filters": 384,
263 "expand_ratio": 6,
264 "se_ratio": 0.25,
265 "strides": 2,
266 "conv_type": 0,
267 },
268 {
269 "kernel_size": 3,
270 "num_repeat": 7,
271 "input_filters": 384,
272 "output_filters": 640,
273 "expand_ratio": 6,
274 "se_ratio": 0.25,
275 "strides": 1,
276 "conv_type": 0,
277 },
278 ],
279 "efficientnetv2-b0": [
280 {
281 "kernel_size": 3,
282 "num_repeat": 1,
283 "input_filters": 32,
284 "output_filters": 16,
285 "expand_ratio": 1,
286 "se_ratio": 0,
287 "strides": 1,
288 "conv_type": 1,
289 },
290 {
291 "kernel_size": 3,
292 "num_repeat": 2,
293 "input_filters": 16,
294 "output_filters": 32,
295 "expand_ratio": 4,
296 "se_ratio": 0,
297 "strides": 2,
298 "conv_type": 1,
299 },
300 {
301 "kernel_size": 3,
302 "num_repeat": 2,
303 "input_filters": 32,
304 "output_filters": 48,
305 "expand_ratio": 4,
306 "se_ratio": 0,
307 "strides": 2,
308 "conv_type": 1,
309 },
310 {
311 "kernel_size": 3,
312 "num_repeat": 3,
313 "input_filters": 48,
314 "output_filters": 96,
315 "expand_ratio": 4,
316 "se_ratio": 0.25,
317 "strides": 2,
318 "conv_type": 0,
319 },
320 {
321 "kernel_size": 3,
322 "num_repeat": 5,
323 "input_filters": 96,
324 "output_filters": 112,
325 "expand_ratio": 6,
326 "se_ratio": 0.25,
327 "strides": 1,
328 "conv_type": 0,
329 },
330 {
331 "kernel_size": 3,
332 "num_repeat": 8,
333 "input_filters": 112,
334 "output_filters": 192,
335 "expand_ratio": 6,
336 "se_ratio": 0.25,
337 "strides": 2,
338 "conv_type": 0,
339 },
340 ],
341 "efficientnetv2-b1": [
342 {
343 "kernel_size": 3,
344 "num_repeat": 1,
345 "input_filters": 32,
346 "output_filters": 16,
347 "expand_ratio": 1,
348 "se_ratio": 0,
349 "strides": 1,
350 "conv_type": 1,
351 },
352 {
353 "kernel_size": 3,
354 "num_repeat": 2,
355 "input_filters": 16,
356 "output_filters": 32,
357 "expand_ratio": 4,
358 "se_ratio": 0,
359 "strides": 2,
360 "conv_type": 1,
361 },
362 {
363 "kernel_size": 3,
364 "num_repeat": 2,
365 "input_filters": 32,
366 "output_filters": 48,
367 "expand_ratio": 4,
368 "se_ratio": 0,
369 "strides": 2,
370 "conv_type": 1,
371 },
372 {
373 "kernel_size": 3,
374 "num_repeat": 3,
375 "input_filters": 48,
376 "output_filters": 96,
377 "expand_ratio": 4,
378 "se_ratio": 0.25,
379 "strides": 2,
380 "conv_type": 0,
381 },
382 {
383 "kernel_size": 3,
384 "num_repeat": 5,
385 "input_filters": 96,
386 "output_filters": 112,
387 "expand_ratio": 6,
388 "se_ratio": 0.25,
389 "strides": 1,
390 "conv_type": 0,
391 },
392 {
393 "kernel_size": 3,
394 "num_repeat": 8,
395 "input_filters": 112,
396 "output_filters": 192,
397 "expand_ratio": 6,
398 "se_ratio": 0.25,
399 "strides": 2,
400 "conv_type": 0,
401 },
402 ],
403 "efficientnetv2-b2": [
404 {
405 "kernel_size": 3,
406 "num_repeat": 1,
407 "input_filters": 32,
408 "output_filters": 16,
409 "expand_ratio": 1,
410 "se_ratio": 0,
411 "strides": 1,
412 "conv_type": 1,
413 },
414 {
415 "kernel_size": 3,
416 "num_repeat": 2,
417 "input_filters": 16,
418 "output_filters": 32,
419 "expand_ratio": 4,
420 "se_ratio": 0,
421 "strides": 2,
422 "conv_type": 1,
423 },
424 {
425 "kernel_size": 3,
426 "num_repeat": 2,
427 "input_filters": 32,
428 "output_filters": 48,
429 "expand_ratio": 4,
430 "se_ratio": 0,
431 "strides": 2,
432 "conv_type": 1,
433 },
434 {
435 "kernel_size": 3,
436 "num_repeat": 3,
437 "input_filters": 48,
438 "output_filters": 96,
439 "expand_ratio": 4,
440 "se_ratio": 0.25,
441 "strides": 2,
442 "conv_type": 0,
443 },
444 {
445 "kernel_size": 3,
446 "num_repeat": 5,
447 "input_filters": 96,
448 "output_filters": 112,
449 "expand_ratio": 6,
450 "se_ratio": 0.25,
451 "strides": 1,
452 "conv_type": 0,
453 },
454 {
455 "kernel_size": 3,
456 "num_repeat": 8,
457 "input_filters": 112,
458 "output_filters": 192,
459 "expand_ratio": 6,
460 "se_ratio": 0.25,
461 "strides": 2,
462 "conv_type": 0,
463 },
464 ],
465 "efficientnetv2-b3": [
466 {
467 "kernel_size": 3,
468 "num_repeat": 1,
469 "input_filters": 32,
470 "output_filters": 16,
471 "expand_ratio": 1,
472 "se_ratio": 0,
473 "strides": 1,
474 "conv_type": 1,
475 },
476 {
477 "kernel_size": 3,
478 "num_repeat": 2,
479 "input_filters": 16,
480 "output_filters": 32,
481 "expand_ratio": 4,
482 "se_ratio": 0,
483 "strides": 2,
484 "conv_type": 1,
485 },
486 {
487 "kernel_size": 3,
488 "num_repeat": 2,
489 "input_filters": 32,
490 "output_filters": 48,
491 "expand_ratio": 4,
492 "se_ratio": 0,
493 "strides": 2,
494 "conv_type": 1,
495 },
496 {
497 "kernel_size": 3,
498 "num_repeat": 3,
499 "input_filters": 48,
500 "output_filters": 96,
501 "expand_ratio": 4,
502 "se_ratio": 0.25,
503 "strides": 2,
504 "conv_type": 0,
505 },
506 {
507 "kernel_size": 3,
508 "num_repeat": 5,
509 "input_filters": 96,
510 "output_filters": 112,
511 "expand_ratio": 6,
512 "se_ratio": 0.25,
513 "strides": 1,
514 "conv_type": 0,
515 },
516 {
517 "kernel_size": 3,
518 "num_repeat": 8,
519 "input_filters": 112,
520 "output_filters": 192,
521 "expand_ratio": 6,
522 "se_ratio": 0.25,
523 "strides": 2,
524 "conv_type": 0,
525 },
526 ],
527}
529CONV_KERNEL_INITIALIZER = {
530 "class_name": "VarianceScaling",
531 "config": {
532 "scale": 2.0,
533 "mode": "fan_out",
534 "distribution": "truncated_normal",
535 },
536}
538DENSE_KERNEL_INITIALIZER = {
539 "class_name": "VarianceScaling",
540 "config": {
541 "scale": 1.0 / 3.0,
542 "mode": "fan_out",
543 "distribution": "uniform",
544 },
545}
547BASE_DOCSTRING = """Instantiates the {name} architecture.
549 Reference:
550 - [EfficientNetV2: Smaller Models and Faster Training](
551 https://arxiv.org/abs/2104.00298) (ICML 2021)
553 This function returns a Keras image classification model,
554 optionally loaded with weights pre-trained on ImageNet.
556 For image classification use cases, see
557 [this page for detailed examples](
558 https://keras.io/api/applications/#usage-examples-for-image-classification-models).
560 For transfer learning use cases, make sure to read the
561 [guide to transfer learning & fine-tuning](
562 https://keras.io/guides/transfer_learning/).
564 Note: each Keras Application expects a specific kind of input preprocessing.
565 For EfficientNetV2, by default input preprocessing is included as a part of
566 the model (as a `Rescaling` layer), and thus
567 `tf.keras.applications.efficientnet_v2.preprocess_input` is actually a
568 pass-through function. In this use case, EfficientNetV2 models expect their
569 inputs to be float tensors of pixels with values in the [0-255] range.
570 At the same time, preprocessing as a part of the model (i.e. `Rescaling`
571 layer) can be disabled by setting `include_preprocessing` argument to False.
572 With preprocessing disabled EfficientNetV2 models expect their inputs to be
573 float tensors of pixels with values in the [-1, 1] range.
575 Args:
576 include_top: Boolean, whether to include the fully-connected
577 layer at the top of the network. Defaults to True.
578 weights: One of `None` (random initialization),
579 `"imagenet"` (pre-training on ImageNet),
580 or the path to the weights file to be loaded. Defaults to `"imagenet"`.
581 input_tensor: Optional Keras tensor
582 (i.e. output of `layers.Input()`)
583 to use as image input for the model.
584 input_shape: Optional shape tuple, only to be specified
585 if `include_top` is False.
586 It should have exactly 3 inputs channels.
587 pooling: Optional pooling mode for feature extraction
588 when `include_top` is `False`. Defaults to None.
589 - `None` means that the output of the model will be
590 the 4D tensor output of the
591 last convolutional layer.
592 - `"avg"` means that global average pooling
593 will be applied to the output of the
594 last convolutional layer, and thus
595 the output of the model will be a 2D tensor.
596 - `"max"` means that global max pooling will
597 be applied.
598 classes: Optional number of classes to classify images
599 into, only to be specified if `include_top` is True, and
600 if no `weights` argument is specified. Defaults to 1000 (number of
601 ImageNet classes).
602 classifier_activation: A string or callable. The activation function to use
603 on the `"top"` layer. Ignored unless `include_top=True`. Set
604 `classifier_activation=None` to return the logits of the "top" layer.
605 Defaults to `"softmax"`.
606 When loading pretrained weights, `classifier_activation` can only
607 be `None` or `"softmax"`.
609 Returns:
610 A `keras.Model` instance.
611"""
614def round_filters(filters, width_coefficient, min_depth, depth_divisor):
615 """Round number of filters based on depth multiplier."""
616 filters *= width_coefficient
617 minimum_depth = min_depth or depth_divisor
618 new_filters = max(
619 minimum_depth,
620 int(filters + depth_divisor / 2) // depth_divisor * depth_divisor,
621 )
622 return int(new_filters)
625def round_repeats(repeats, depth_coefficient):
626 """Round number of repeats based on depth multiplier."""
627 return int(math.ceil(depth_coefficient * repeats))
630def MBConvBlock(
631 input_filters: int,
632 output_filters: int,
633 expand_ratio=1,
634 kernel_size=3,
635 strides=1,
636 se_ratio=0.0,
637 bn_momentum=0.9,
638 activation="swish",
639 survival_probability: float = 0.8,
640 name=None,
641):
642 """MBConv block: Mobile Inverted Residual Bottleneck."""
643 bn_axis = 3 if backend.image_data_format() == "channels_last" else 1
645 if name is None:
646 name = backend.get_uid("block0")
648 def apply(inputs):
649 # Expansion phase
650 filters = input_filters * expand_ratio
651 if expand_ratio != 1:
652 x = layers.Conv2D(
653 filters=filters,
654 kernel_size=1,
655 strides=1,
656 kernel_initializer=CONV_KERNEL_INITIALIZER,
657 padding="same",
658 data_format="channels_last",
659 use_bias=False,
660 name=name + "expand_conv",
661 )(inputs)
662 x = layers.BatchNormalization(
663 axis=bn_axis,
664 momentum=bn_momentum,
665 name=name + "expand_bn",
666 )(x)
667 x = layers.Activation(activation, name=name + "expand_activation")(
668 x
669 )
670 else:
671 x = inputs
673 # Depthwise conv
674 x = layers.DepthwiseConv2D(
675 kernel_size=kernel_size,
676 strides=strides,
677 depthwise_initializer=CONV_KERNEL_INITIALIZER,
678 padding="same",
679 data_format="channels_last",
680 use_bias=False,
681 name=name + "dwconv2",
682 )(x)
683 x = layers.BatchNormalization(
684 axis=bn_axis, momentum=bn_momentum, name=name + "bn"
685 )(x)
686 x = layers.Activation(activation, name=name + "activation")(x)
688 # Squeeze and excite
689 if 0 < se_ratio <= 1:
690 filters_se = max(1, int(input_filters * se_ratio))
691 se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x)
692 if bn_axis == 1:
693 se_shape = (filters, 1, 1)
694 else:
695 se_shape = (1, 1, filters)
696 se = layers.Reshape(se_shape, name=name + "se_reshape")(se)
698 se = layers.Conv2D(
699 filters_se,
700 1,
701 padding="same",
702 activation=activation,
703 kernel_initializer=CONV_KERNEL_INITIALIZER,
704 name=name + "se_reduce",
705 )(se)
706 se = layers.Conv2D(
707 filters,
708 1,
709 padding="same",
710 activation="sigmoid",
711 kernel_initializer=CONV_KERNEL_INITIALIZER,
712 name=name + "se_expand",
713 )(se)
715 x = layers.multiply([x, se], name=name + "se_excite")
717 # Output phase
718 x = layers.Conv2D(
719 filters=output_filters,
720 kernel_size=1,
721 strides=1,
722 kernel_initializer=CONV_KERNEL_INITIALIZER,
723 padding="same",
724 data_format="channels_last",
725 use_bias=False,
726 name=name + "project_conv",
727 )(x)
728 x = layers.BatchNormalization(
729 axis=bn_axis, momentum=bn_momentum, name=name + "project_bn"
730 )(x)
732 if strides == 1 and input_filters == output_filters:
733 if survival_probability:
734 x = layers.Dropout(
735 survival_probability,
736 noise_shape=(None, 1, 1, 1),
737 name=name + "drop",
738 )(x)
739 x = layers.add([x, inputs], name=name + "add")
741 return x
743 return apply
746def FusedMBConvBlock(
747 input_filters: int,
748 output_filters: int,
749 expand_ratio=1,
750 kernel_size=3,
751 strides=1,
752 se_ratio=0.0,
753 bn_momentum=0.9,
754 activation="swish",
755 survival_probability: float = 0.8,
756 name=None,
757):
758 """Fused MBConv Block: Fusing the proj conv1x1 and depthwise_conv into a
759 conv2d."""
760 bn_axis = 3 if backend.image_data_format() == "channels_last" else 1
762 if name is None:
763 name = backend.get_uid("block0")
765 def apply(inputs):
766 filters = input_filters * expand_ratio
767 if expand_ratio != 1:
768 x = layers.Conv2D(
769 filters,
770 kernel_size=kernel_size,
771 strides=strides,
772 kernel_initializer=CONV_KERNEL_INITIALIZER,
773 data_format="channels_last",
774 padding="same",
775 use_bias=False,
776 name=name + "expand_conv",
777 )(inputs)
778 x = layers.BatchNormalization(
779 axis=bn_axis, momentum=bn_momentum, name=name + "expand_bn"
780 )(x)
781 x = layers.Activation(
782 activation=activation, name=name + "expand_activation"
783 )(x)
784 else:
785 x = inputs
787 # Squeeze and excite
788 if 0 < se_ratio <= 1:
789 filters_se = max(1, int(input_filters * se_ratio))
790 se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x)
791 if bn_axis == 1:
792 se_shape = (filters, 1, 1)
793 else:
794 se_shape = (1, 1, filters)
796 se = layers.Reshape(se_shape, name=name + "se_reshape")(se)
798 se = layers.Conv2D(
799 filters_se,
800 1,
801 padding="same",
802 activation=activation,
803 kernel_initializer=CONV_KERNEL_INITIALIZER,
804 name=name + "se_reduce",
805 )(se)
806 se = layers.Conv2D(
807 filters,
808 1,
809 padding="same",
810 activation="sigmoid",
811 kernel_initializer=CONV_KERNEL_INITIALIZER,
812 name=name + "se_expand",
813 )(se)
815 x = layers.multiply([x, se], name=name + "se_excite")
817 # Output phase:
818 x = layers.Conv2D(
819 output_filters,
820 kernel_size=1 if expand_ratio != 1 else kernel_size,
821 strides=1 if expand_ratio != 1 else strides,
822 kernel_initializer=CONV_KERNEL_INITIALIZER,
823 padding="same",
824 use_bias=False,
825 name=name + "project_conv",
826 )(x)
827 x = layers.BatchNormalization(
828 axis=bn_axis, momentum=bn_momentum, name=name + "project_bn"
829 )(x)
830 if expand_ratio == 1:
831 x = layers.Activation(
832 activation=activation, name=name + "project_activation"
833 )(x)
835 # Residual:
836 if strides == 1 and input_filters == output_filters:
837 if survival_probability:
838 x = layers.Dropout(
839 survival_probability,
840 noise_shape=(None, 1, 1, 1),
841 name=name + "drop",
842 )(x)
843 x = layers.add([x, inputs], name=name + "add")
844 return x
846 return apply
849def EfficientNetV2(
850 width_coefficient,
851 depth_coefficient,
852 default_size,
853 dropout_rate=0.2,
854 drop_connect_rate=0.2,
855 depth_divisor=8,
856 min_depth=8,
857 bn_momentum=0.9,
858 activation="swish",
859 blocks_args="default",
860 model_name="efficientnetv2",
861 include_top=True,
862 weights="imagenet",
863 input_tensor=None,
864 input_shape=None,
865 pooling=None,
866 classes=1000,
867 classifier_activation="softmax",
868 include_preprocessing=True,
869):
870 """Instantiates the EfficientNetV2 architecture using given scaling
871 coefficients.
873 Args:
874 width_coefficient: float, scaling coefficient for network width.
875 depth_coefficient: float, scaling coefficient for network depth.
876 default_size: integer, default input image size.
877 dropout_rate: float, dropout rate before final classifier layer.
878 drop_connect_rate: float, dropout rate at skip connections.
879 depth_divisor: integer, a unit of network width.
880 min_depth: integer, minimum number of filters.
881 bn_momentum: float. Momentum parameter for Batch Normalization layers.
882 activation: activation function.
883 blocks_args: list of dicts, parameters to construct block modules.
884 model_name: string, model name.
885 include_top: whether to include the fully-connected layer at the top of
886 the network.
887 weights: one of `None` (random initialization), `"imagenet"` (pre-training
888 on ImageNet), or the path to the weights file to be loaded.
889 input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) or
890 numpy array to use as image input for the model.
891 input_shape: optional shape tuple, only to be specified if `include_top`
892 is False. It should have exactly 3 inputs channels.
893 pooling: optional pooling mode for feature extraction when `include_top`
894 is `False`.
895 - `None` means that the output of the model will be the 4D tensor output
896 of the last convolutional layer.
897 - "avg" means that global average pooling will be applied to the output
898 of the last convolutional layer, and thus the output of the model will
899 be a 2D tensor.
900 - `"max"` means that global max pooling will be applied.
901 classes: optional number of classes to classify images into, only to be
902 specified if `include_top` is True, and if no `weights` argument is
903 specified.
904 classifier_activation: A string or callable. The activation function to
905 use on the `"top"` layer. Ignored unless `include_top=True`. Set
906 `classifier_activation=None` to return the logits of the `"top"` layer.
907 include_preprocessing: Boolean, whether to include the preprocessing layer
908 (`Rescaling`) at the bottom of the network. Defaults to `True`.
910 Returns:
911 A `keras.Model` instance.
913 Raises:
914 ValueError: in case of invalid argument for `weights`,
915 or invalid input shape.
916 ValueError: if `classifier_activation` is not `"softmax"` or `None` when
917 using a pretrained top layer.
918 """
920 if blocks_args == "default":
921 blocks_args = DEFAULT_BLOCKS_ARGS[model_name]
923 if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)):
924 raise ValueError(
925 "The `weights` argument should be either "
926 "`None` (random initialization), `imagenet` "
927 "(pre-training on ImageNet), "
928 "or the path to the weights file to be loaded."
929 f"Received: weights={weights}"
930 )
932 if weights == "imagenet" and include_top and classes != 1000:
933 raise ValueError(
934 "If using `weights` as `'imagenet'` with `include_top`"
935 " as true, `classes` should be 1000"
936 f"Received: classes={classes}"
937 )
939 # Determine proper input shape
940 input_shape = imagenet_utils.obtain_input_shape(
941 input_shape,
942 default_size=default_size,
943 min_size=32,
944 data_format=backend.image_data_format(),
945 require_flatten=include_top,
946 weights=weights,
947 )
949 if input_tensor is None:
950 img_input = layers.Input(shape=input_shape)
951 else:
952 if not backend.is_keras_tensor(input_tensor):
953 img_input = layers.Input(tensor=input_tensor, shape=input_shape)
954 else:
955 img_input = input_tensor
957 bn_axis = 3 if backend.image_data_format() == "channels_last" else 1
959 x = img_input
961 if include_preprocessing:
962 # Apply original V1 preprocessing for Bx variants
963 # if number of channels allows it
964 num_channels = input_shape[bn_axis - 1]
965 if model_name.split("-")[-1].startswith("b") and num_channels == 3:
966 x = layers.Rescaling(scale=1.0 / 255)(x)
967 x = layers.Normalization(
968 mean=[0.485, 0.456, 0.406],
969 variance=[0.229**2, 0.224**2, 0.225**2],
970 axis=bn_axis,
971 )(x)
972 else:
973 x = layers.Rescaling(scale=1.0 / 128.0, offset=-1)(x)
975 # Build stem
976 stem_filters = round_filters(
977 filters=blocks_args[0]["input_filters"],
978 width_coefficient=width_coefficient,
979 min_depth=min_depth,
980 depth_divisor=depth_divisor,
981 )
982 x = layers.Conv2D(
983 filters=stem_filters,
984 kernel_size=3,
985 strides=2,
986 kernel_initializer=CONV_KERNEL_INITIALIZER,
987 padding="same",
988 use_bias=False,
989 name="stem_conv",
990 )(x)
991 x = layers.BatchNormalization(
992 axis=bn_axis,
993 momentum=bn_momentum,
994 name="stem_bn",
995 )(x)
996 x = layers.Activation(activation, name="stem_activation")(x)
998 # Build blocks
999 blocks_args = copy.deepcopy(blocks_args)
1000 b = 0
1001 blocks = float(sum(args["num_repeat"] for args in blocks_args))
1003 for i, args in enumerate(blocks_args):
1004 assert args["num_repeat"] > 0
1006 # Update block input and output filters based on depth multiplier.
1007 args["input_filters"] = round_filters(
1008 filters=args["input_filters"],
1009 width_coefficient=width_coefficient,
1010 min_depth=min_depth,
1011 depth_divisor=depth_divisor,
1012 )
1013 args["output_filters"] = round_filters(
1014 filters=args["output_filters"],
1015 width_coefficient=width_coefficient,
1016 min_depth=min_depth,
1017 depth_divisor=depth_divisor,
1018 )
1020 # Determine which conv type to use:
1021 block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")]
1022 repeats = round_repeats(
1023 repeats=args.pop("num_repeat"), depth_coefficient=depth_coefficient
1024 )
1025 for j in range(repeats):
1026 # The first block needs to take care of stride and filter size
1027 # increase.
1028 if j > 0:
1029 args["strides"] = 1
1030 args["input_filters"] = args["output_filters"]
1032 x = block(
1033 activation=activation,
1034 bn_momentum=bn_momentum,
1035 survival_probability=drop_connect_rate * b / blocks,
1036 name=f"block{i + 1}{chr(j + 97)}_",
1037 **args,
1038 )(x)
1039 b += 1
1041 # Build top
1042 top_filters = round_filters(
1043 filters=1280,
1044 width_coefficient=width_coefficient,
1045 min_depth=min_depth,
1046 depth_divisor=depth_divisor,
1047 )
1048 x = layers.Conv2D(
1049 filters=top_filters,
1050 kernel_size=1,
1051 strides=1,
1052 kernel_initializer=CONV_KERNEL_INITIALIZER,
1053 padding="same",
1054 data_format="channels_last",
1055 use_bias=False,
1056 name="top_conv",
1057 )(x)
1058 x = layers.BatchNormalization(
1059 axis=bn_axis,
1060 momentum=bn_momentum,
1061 name="top_bn",
1062 )(x)
1063 x = layers.Activation(activation=activation, name="top_activation")(x)
1065 if include_top:
1066 x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
1067 if dropout_rate > 0:
1068 x = layers.Dropout(dropout_rate, name="top_dropout")(x)
1069 imagenet_utils.validate_activation(classifier_activation, weights)
1070 x = layers.Dense(
1071 classes,
1072 activation=classifier_activation,
1073 kernel_initializer=DENSE_KERNEL_INITIALIZER,
1074 bias_initializer=tf.constant_initializer(0),
1075 name="predictions",
1076 )(x)
1077 else:
1078 if pooling == "avg":
1079 x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
1080 elif pooling == "max":
1081 x = layers.GlobalMaxPooling2D(name="max_pool")(x)
1083 # Ensure that the model takes into account
1084 # any potential predecessors of `input_tensor`.
1085 if input_tensor is not None:
1086 inputs = layer_utils.get_source_inputs(input_tensor)
1087 else:
1088 inputs = img_input
1090 # Create model.
1091 model = training.Model(inputs, x, name=model_name)
1093 # Load weights.
1094 if weights == "imagenet":
1095 if include_top:
1096 file_suffix = ".h5"
1097 file_hash = WEIGHTS_HASHES[model_name[-2:]][0]
1098 else:
1099 file_suffix = "_notop.h5"
1100 file_hash = WEIGHTS_HASHES[model_name[-2:]][1]
1101 file_name = model_name + file_suffix
1102 weights_path = data_utils.get_file(
1103 file_name,
1104 BASE_WEIGHTS_PATH + file_name,
1105 cache_subdir="models",
1106 file_hash=file_hash,
1107 )
1108 model.load_weights(weights_path)
1109 elif weights is not None:
1110 model.load_weights(weights)
1112 return model
1115@keras_export(
1116 "keras.applications.efficientnet_v2.EfficientNetV2B0",
1117 "keras.applications.EfficientNetV2B0",
1118)
1119def EfficientNetV2B0(
1120 include_top=True,
1121 weights="imagenet",
1122 input_tensor=None,
1123 input_shape=None,
1124 pooling=None,
1125 classes=1000,
1126 classifier_activation="softmax",
1127 include_preprocessing=True,
1128):
1129 return EfficientNetV2(
1130 width_coefficient=1.0,
1131 depth_coefficient=1.0,
1132 default_size=224,
1133 model_name="efficientnetv2-b0",
1134 include_top=include_top,
1135 weights=weights,
1136 input_tensor=input_tensor,
1137 input_shape=input_shape,
1138 pooling=pooling,
1139 classes=classes,
1140 classifier_activation=classifier_activation,
1141 include_preprocessing=include_preprocessing,
1142 )
1145@keras_export(
1146 "keras.applications.efficientnet_v2.EfficientNetV2B1",
1147 "keras.applications.EfficientNetV2B1",
1148)
1149def EfficientNetV2B1(
1150 include_top=True,
1151 weights="imagenet",
1152 input_tensor=None,
1153 input_shape=None,
1154 pooling=None,
1155 classes=1000,
1156 classifier_activation="softmax",
1157 include_preprocessing=True,
1158):
1159 return EfficientNetV2(
1160 width_coefficient=1.0,
1161 depth_coefficient=1.1,
1162 default_size=240,
1163 model_name="efficientnetv2-b1",
1164 include_top=include_top,
1165 weights=weights,
1166 input_tensor=input_tensor,
1167 input_shape=input_shape,
1168 pooling=pooling,
1169 classes=classes,
1170 classifier_activation=classifier_activation,
1171 include_preprocessing=include_preprocessing,
1172 )
1175@keras_export(
1176 "keras.applications.efficientnet_v2.EfficientNetV2B2",
1177 "keras.applications.EfficientNetV2B2",
1178)
1179def EfficientNetV2B2(
1180 include_top=True,
1181 weights="imagenet",
1182 input_tensor=None,
1183 input_shape=None,
1184 pooling=None,
1185 classes=1000,
1186 classifier_activation="softmax",
1187 include_preprocessing=True,
1188):
1189 return EfficientNetV2(
1190 width_coefficient=1.1,
1191 depth_coefficient=1.2,
1192 default_size=260,
1193 model_name="efficientnetv2-b2",
1194 include_top=include_top,
1195 weights=weights,
1196 input_tensor=input_tensor,
1197 input_shape=input_shape,
1198 pooling=pooling,
1199 classes=classes,
1200 classifier_activation=classifier_activation,
1201 include_preprocessing=include_preprocessing,
1202 )
1205@keras_export(
1206 "keras.applications.efficientnet_v2.EfficientNetV2B3",
1207 "keras.applications.EfficientNetV2B3",
1208)
1209def EfficientNetV2B3(
1210 include_top=True,
1211 weights="imagenet",
1212 input_tensor=None,
1213 input_shape=None,
1214 pooling=None,
1215 classes=1000,
1216 classifier_activation="softmax",
1217 include_preprocessing=True,
1218):
1219 return EfficientNetV2(
1220 width_coefficient=1.2,
1221 depth_coefficient=1.4,
1222 default_size=300,
1223 model_name="efficientnetv2-b3",
1224 include_top=include_top,
1225 weights=weights,
1226 input_tensor=input_tensor,
1227 input_shape=input_shape,
1228 pooling=pooling,
1229 classes=classes,
1230 classifier_activation=classifier_activation,
1231 include_preprocessing=include_preprocessing,
1232 )
1235@keras_export(
1236 "keras.applications.efficientnet_v2.EfficientNetV2S",
1237 "keras.applications.EfficientNetV2S",
1238)
1239def EfficientNetV2S(
1240 include_top=True,
1241 weights="imagenet",
1242 input_tensor=None,
1243 input_shape=None,
1244 pooling=None,
1245 classes=1000,
1246 classifier_activation="softmax",
1247 include_preprocessing=True,
1248):
1249 return EfficientNetV2(
1250 width_coefficient=1.0,
1251 depth_coefficient=1.0,
1252 default_size=384,
1253 model_name="efficientnetv2-s",
1254 include_top=include_top,
1255 weights=weights,
1256 input_tensor=input_tensor,
1257 input_shape=input_shape,
1258 pooling=pooling,
1259 classes=classes,
1260 classifier_activation=classifier_activation,
1261 include_preprocessing=include_preprocessing,
1262 )
1265@keras_export(
1266 "keras.applications.efficientnet_v2.EfficientNetV2M",
1267 "keras.applications.EfficientNetV2M",
1268)
1269def EfficientNetV2M(
1270 include_top=True,
1271 weights="imagenet",
1272 input_tensor=None,
1273 input_shape=None,
1274 pooling=None,
1275 classes=1000,
1276 classifier_activation="softmax",
1277 include_preprocessing=True,
1278):
1279 return EfficientNetV2(
1280 width_coefficient=1.0,
1281 depth_coefficient=1.0,
1282 default_size=480,
1283 model_name="efficientnetv2-m",
1284 include_top=include_top,
1285 weights=weights,
1286 input_tensor=input_tensor,
1287 input_shape=input_shape,
1288 pooling=pooling,
1289 classes=classes,
1290 classifier_activation=classifier_activation,
1291 include_preprocessing=include_preprocessing,
1292 )
1295@keras_export(
1296 "keras.applications.efficientnet_v2.EfficientNetV2L",
1297 "keras.applications.EfficientNetV2L",
1298)
1299def EfficientNetV2L(
1300 include_top=True,
1301 weights="imagenet",
1302 input_tensor=None,
1303 input_shape=None,
1304 pooling=None,
1305 classes=1000,
1306 classifier_activation="softmax",
1307 include_preprocessing=True,
1308):
1309 return EfficientNetV2(
1310 width_coefficient=1.0,
1311 depth_coefficient=1.0,
1312 default_size=480,
1313 model_name="efficientnetv2-l",
1314 include_top=include_top,
1315 weights=weights,
1316 input_tensor=input_tensor,
1317 input_shape=input_shape,
1318 pooling=pooling,
1319 classes=classes,
1320 classifier_activation=classifier_activation,
1321 include_preprocessing=include_preprocessing,
1322 )
1325EfficientNetV2B0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B0")
1326EfficientNetV2B1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B1")
1327EfficientNetV2B2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B2")
1328EfficientNetV2B3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B3")
1329EfficientNetV2S.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2S")
1330EfficientNetV2M.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2M")
1331EfficientNetV2L.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2L")
1334@keras_export("keras.applications.efficientnet_v2.preprocess_input")
1335def preprocess_input(x, data_format=None):
1336 """A placeholder method for backward compatibility.
1338 The preprocessing logic has been included in the EfficientNetV2 model
1339 implementation. Users are no longer required to call this method to
1340 normalize the input data. This method does nothing and only kept as a
1341 placeholder to align the API surface between old and new version of model.
1343 Args:
1344 x: A floating point `numpy.array` or a `tf.Tensor`.
1345 data_format: Optional data format of the image tensor/array. Defaults to
1346 None, in which case the global setting
1347 `tf.keras.backend.image_data_format()` is used (unless you changed it,
1348 it defaults to "channels_last").{mode}
1350 Returns:
1351 Unchanged `numpy.array` or `tf.Tensor`.
1352 """
1353 return x
1356@keras_export("keras.applications.efficientnet_v2.decode_predictions")
1357def decode_predictions(preds, top=5):
1358 return imagenet_utils.decode_predictions(preds, top=top)
1361decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__