Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/applications/efficientnet_v2.py: 25%

192 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2021 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15 

16 

17"""EfficientNet V2 models for Keras. 

18 

19Reference: 

20- [EfficientNetV2: Smaller Models and Faster Training]( 

21 https://arxiv.org/abs/2104.00298) (ICML 2021) 

22""" 

23 

24import copy 

25import math 

26 

27import tensorflow.compat.v2 as tf 

28 

29from keras.src import backend 

30from keras.src import layers 

31from keras.src.applications import imagenet_utils 

32from keras.src.engine import training 

33from keras.src.utils import data_utils 

34from keras.src.utils import layer_utils 

35 

36# isort: off 

37from tensorflow.python.util.tf_export import keras_export 

38 

39BASE_WEIGHTS_PATH = "https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/" # noqa: E501 

40 

41WEIGHTS_HASHES = { 

42 "b0": ( 

43 "21ecbf6da12460d5c40bb2f29ceb2188", 

44 "893217f2bb855e2983157299931e43ff", 

45 ), 

46 "b1": ( 

47 "069f0534ff22adf035c89e2d9547a9dc", 

48 "0e80663031ca32d657f9caa404b6ec37", 

49 ), 

50 "b2": ( 

51 "424e49f28180edbde1e94797771950a7", 

52 "1dfe2e7a5d45b6632553a8961ea609eb", 

53 ), 

54 "b3": ( 

55 "1f1fc43bd98a6e4fd8fdfd551e02c7a0", 

56 "f6abf7b5849ac99a89b50dd3fd532856", 

57 ), 

58 "-s": ( 

59 "e1d88a8495beba45748fedd0cecbe016", 

60 "af0682fb74e8c54910f2d4393339c070", 

61 ), 

62 "-m": ( 

63 "a3bf6aa3276309f4fc6a34aa114c95cd", 

64 "1b8dc055df72dde80d614482840fe342", 

65 ), 

66 "-l": ( 

67 "27e6d408b53c7ebc868fefa357689935", 

68 "b0b66b5c863aef5b46e8608fe1711615", 

69 ), 

70} 

71 

72DEFAULT_BLOCKS_ARGS = { 

73 "efficientnetv2-s": [ 

74 { 

75 "kernel_size": 3, 

76 "num_repeat": 2, 

77 "input_filters": 24, 

78 "output_filters": 24, 

79 "expand_ratio": 1, 

80 "se_ratio": 0.0, 

81 "strides": 1, 

82 "conv_type": 1, 

83 }, 

84 { 

85 "kernel_size": 3, 

86 "num_repeat": 4, 

87 "input_filters": 24, 

88 "output_filters": 48, 

89 "expand_ratio": 4, 

90 "se_ratio": 0.0, 

91 "strides": 2, 

92 "conv_type": 1, 

93 }, 

94 { 

95 "conv_type": 1, 

96 "expand_ratio": 4, 

97 "input_filters": 48, 

98 "kernel_size": 3, 

99 "num_repeat": 4, 

100 "output_filters": 64, 

101 "se_ratio": 0, 

102 "strides": 2, 

103 }, 

104 { 

105 "conv_type": 0, 

106 "expand_ratio": 4, 

107 "input_filters": 64, 

108 "kernel_size": 3, 

109 "num_repeat": 6, 

110 "output_filters": 128, 

111 "se_ratio": 0.25, 

112 "strides": 2, 

113 }, 

114 { 

115 "conv_type": 0, 

116 "expand_ratio": 6, 

117 "input_filters": 128, 

118 "kernel_size": 3, 

119 "num_repeat": 9, 

120 "output_filters": 160, 

121 "se_ratio": 0.25, 

122 "strides": 1, 

123 }, 

124 { 

125 "conv_type": 0, 

126 "expand_ratio": 6, 

127 "input_filters": 160, 

128 "kernel_size": 3, 

129 "num_repeat": 15, 

130 "output_filters": 256, 

131 "se_ratio": 0.25, 

132 "strides": 2, 

133 }, 

134 ], 

135 "efficientnetv2-m": [ 

136 { 

137 "kernel_size": 3, 

138 "num_repeat": 3, 

139 "input_filters": 24, 

140 "output_filters": 24, 

141 "expand_ratio": 1, 

142 "se_ratio": 0, 

143 "strides": 1, 

144 "conv_type": 1, 

145 }, 

146 { 

147 "kernel_size": 3, 

148 "num_repeat": 5, 

149 "input_filters": 24, 

150 "output_filters": 48, 

151 "expand_ratio": 4, 

152 "se_ratio": 0, 

153 "strides": 2, 

154 "conv_type": 1, 

155 }, 

156 { 

157 "kernel_size": 3, 

158 "num_repeat": 5, 

159 "input_filters": 48, 

160 "output_filters": 80, 

161 "expand_ratio": 4, 

162 "se_ratio": 0, 

163 "strides": 2, 

164 "conv_type": 1, 

165 }, 

166 { 

167 "kernel_size": 3, 

168 "num_repeat": 7, 

169 "input_filters": 80, 

170 "output_filters": 160, 

171 "expand_ratio": 4, 

172 "se_ratio": 0.25, 

173 "strides": 2, 

174 "conv_type": 0, 

175 }, 

176 { 

177 "kernel_size": 3, 

178 "num_repeat": 14, 

179 "input_filters": 160, 

180 "output_filters": 176, 

181 "expand_ratio": 6, 

182 "se_ratio": 0.25, 

183 "strides": 1, 

184 "conv_type": 0, 

185 }, 

186 { 

187 "kernel_size": 3, 

188 "num_repeat": 18, 

189 "input_filters": 176, 

190 "output_filters": 304, 

191 "expand_ratio": 6, 

192 "se_ratio": 0.25, 

193 "strides": 2, 

194 "conv_type": 0, 

195 }, 

196 { 

197 "kernel_size": 3, 

198 "num_repeat": 5, 

199 "input_filters": 304, 

200 "output_filters": 512, 

201 "expand_ratio": 6, 

202 "se_ratio": 0.25, 

203 "strides": 1, 

204 "conv_type": 0, 

205 }, 

206 ], 

207 "efficientnetv2-l": [ 

208 { 

209 "kernel_size": 3, 

210 "num_repeat": 4, 

211 "input_filters": 32, 

212 "output_filters": 32, 

213 "expand_ratio": 1, 

214 "se_ratio": 0, 

215 "strides": 1, 

216 "conv_type": 1, 

217 }, 

218 { 

219 "kernel_size": 3, 

220 "num_repeat": 7, 

221 "input_filters": 32, 

222 "output_filters": 64, 

223 "expand_ratio": 4, 

224 "se_ratio": 0, 

225 "strides": 2, 

226 "conv_type": 1, 

227 }, 

228 { 

229 "kernel_size": 3, 

230 "num_repeat": 7, 

231 "input_filters": 64, 

232 "output_filters": 96, 

233 "expand_ratio": 4, 

234 "se_ratio": 0, 

235 "strides": 2, 

236 "conv_type": 1, 

237 }, 

238 { 

239 "kernel_size": 3, 

240 "num_repeat": 10, 

241 "input_filters": 96, 

242 "output_filters": 192, 

243 "expand_ratio": 4, 

244 "se_ratio": 0.25, 

245 "strides": 2, 

246 "conv_type": 0, 

247 }, 

248 { 

249 "kernel_size": 3, 

250 "num_repeat": 19, 

251 "input_filters": 192, 

252 "output_filters": 224, 

253 "expand_ratio": 6, 

254 "se_ratio": 0.25, 

255 "strides": 1, 

256 "conv_type": 0, 

257 }, 

258 { 

259 "kernel_size": 3, 

260 "num_repeat": 25, 

261 "input_filters": 224, 

262 "output_filters": 384, 

263 "expand_ratio": 6, 

264 "se_ratio": 0.25, 

265 "strides": 2, 

266 "conv_type": 0, 

267 }, 

268 { 

269 "kernel_size": 3, 

270 "num_repeat": 7, 

271 "input_filters": 384, 

272 "output_filters": 640, 

273 "expand_ratio": 6, 

274 "se_ratio": 0.25, 

275 "strides": 1, 

276 "conv_type": 0, 

277 }, 

278 ], 

279 "efficientnetv2-b0": [ 

280 { 

281 "kernel_size": 3, 

282 "num_repeat": 1, 

283 "input_filters": 32, 

284 "output_filters": 16, 

285 "expand_ratio": 1, 

286 "se_ratio": 0, 

287 "strides": 1, 

288 "conv_type": 1, 

289 }, 

290 { 

291 "kernel_size": 3, 

292 "num_repeat": 2, 

293 "input_filters": 16, 

294 "output_filters": 32, 

295 "expand_ratio": 4, 

296 "se_ratio": 0, 

297 "strides": 2, 

298 "conv_type": 1, 

299 }, 

300 { 

301 "kernel_size": 3, 

302 "num_repeat": 2, 

303 "input_filters": 32, 

304 "output_filters": 48, 

305 "expand_ratio": 4, 

306 "se_ratio": 0, 

307 "strides": 2, 

308 "conv_type": 1, 

309 }, 

310 { 

311 "kernel_size": 3, 

312 "num_repeat": 3, 

313 "input_filters": 48, 

314 "output_filters": 96, 

315 "expand_ratio": 4, 

316 "se_ratio": 0.25, 

317 "strides": 2, 

318 "conv_type": 0, 

319 }, 

320 { 

321 "kernel_size": 3, 

322 "num_repeat": 5, 

323 "input_filters": 96, 

324 "output_filters": 112, 

325 "expand_ratio": 6, 

326 "se_ratio": 0.25, 

327 "strides": 1, 

328 "conv_type": 0, 

329 }, 

330 { 

331 "kernel_size": 3, 

332 "num_repeat": 8, 

333 "input_filters": 112, 

334 "output_filters": 192, 

335 "expand_ratio": 6, 

336 "se_ratio": 0.25, 

337 "strides": 2, 

338 "conv_type": 0, 

339 }, 

340 ], 

341 "efficientnetv2-b1": [ 

342 { 

343 "kernel_size": 3, 

344 "num_repeat": 1, 

345 "input_filters": 32, 

346 "output_filters": 16, 

347 "expand_ratio": 1, 

348 "se_ratio": 0, 

349 "strides": 1, 

350 "conv_type": 1, 

351 }, 

352 { 

353 "kernel_size": 3, 

354 "num_repeat": 2, 

355 "input_filters": 16, 

356 "output_filters": 32, 

357 "expand_ratio": 4, 

358 "se_ratio": 0, 

359 "strides": 2, 

360 "conv_type": 1, 

361 }, 

362 { 

363 "kernel_size": 3, 

364 "num_repeat": 2, 

365 "input_filters": 32, 

366 "output_filters": 48, 

367 "expand_ratio": 4, 

368 "se_ratio": 0, 

369 "strides": 2, 

370 "conv_type": 1, 

371 }, 

372 { 

373 "kernel_size": 3, 

374 "num_repeat": 3, 

375 "input_filters": 48, 

376 "output_filters": 96, 

377 "expand_ratio": 4, 

378 "se_ratio": 0.25, 

379 "strides": 2, 

380 "conv_type": 0, 

381 }, 

382 { 

383 "kernel_size": 3, 

384 "num_repeat": 5, 

385 "input_filters": 96, 

386 "output_filters": 112, 

387 "expand_ratio": 6, 

388 "se_ratio": 0.25, 

389 "strides": 1, 

390 "conv_type": 0, 

391 }, 

392 { 

393 "kernel_size": 3, 

394 "num_repeat": 8, 

395 "input_filters": 112, 

396 "output_filters": 192, 

397 "expand_ratio": 6, 

398 "se_ratio": 0.25, 

399 "strides": 2, 

400 "conv_type": 0, 

401 }, 

402 ], 

403 "efficientnetv2-b2": [ 

404 { 

405 "kernel_size": 3, 

406 "num_repeat": 1, 

407 "input_filters": 32, 

408 "output_filters": 16, 

409 "expand_ratio": 1, 

410 "se_ratio": 0, 

411 "strides": 1, 

412 "conv_type": 1, 

413 }, 

414 { 

415 "kernel_size": 3, 

416 "num_repeat": 2, 

417 "input_filters": 16, 

418 "output_filters": 32, 

419 "expand_ratio": 4, 

420 "se_ratio": 0, 

421 "strides": 2, 

422 "conv_type": 1, 

423 }, 

424 { 

425 "kernel_size": 3, 

426 "num_repeat": 2, 

427 "input_filters": 32, 

428 "output_filters": 48, 

429 "expand_ratio": 4, 

430 "se_ratio": 0, 

431 "strides": 2, 

432 "conv_type": 1, 

433 }, 

434 { 

435 "kernel_size": 3, 

436 "num_repeat": 3, 

437 "input_filters": 48, 

438 "output_filters": 96, 

439 "expand_ratio": 4, 

440 "se_ratio": 0.25, 

441 "strides": 2, 

442 "conv_type": 0, 

443 }, 

444 { 

445 "kernel_size": 3, 

446 "num_repeat": 5, 

447 "input_filters": 96, 

448 "output_filters": 112, 

449 "expand_ratio": 6, 

450 "se_ratio": 0.25, 

451 "strides": 1, 

452 "conv_type": 0, 

453 }, 

454 { 

455 "kernel_size": 3, 

456 "num_repeat": 8, 

457 "input_filters": 112, 

458 "output_filters": 192, 

459 "expand_ratio": 6, 

460 "se_ratio": 0.25, 

461 "strides": 2, 

462 "conv_type": 0, 

463 }, 

464 ], 

465 "efficientnetv2-b3": [ 

466 { 

467 "kernel_size": 3, 

468 "num_repeat": 1, 

469 "input_filters": 32, 

470 "output_filters": 16, 

471 "expand_ratio": 1, 

472 "se_ratio": 0, 

473 "strides": 1, 

474 "conv_type": 1, 

475 }, 

476 { 

477 "kernel_size": 3, 

478 "num_repeat": 2, 

479 "input_filters": 16, 

480 "output_filters": 32, 

481 "expand_ratio": 4, 

482 "se_ratio": 0, 

483 "strides": 2, 

484 "conv_type": 1, 

485 }, 

486 { 

487 "kernel_size": 3, 

488 "num_repeat": 2, 

489 "input_filters": 32, 

490 "output_filters": 48, 

491 "expand_ratio": 4, 

492 "se_ratio": 0, 

493 "strides": 2, 

494 "conv_type": 1, 

495 }, 

496 { 

497 "kernel_size": 3, 

498 "num_repeat": 3, 

499 "input_filters": 48, 

500 "output_filters": 96, 

501 "expand_ratio": 4, 

502 "se_ratio": 0.25, 

503 "strides": 2, 

504 "conv_type": 0, 

505 }, 

506 { 

507 "kernel_size": 3, 

508 "num_repeat": 5, 

509 "input_filters": 96, 

510 "output_filters": 112, 

511 "expand_ratio": 6, 

512 "se_ratio": 0.25, 

513 "strides": 1, 

514 "conv_type": 0, 

515 }, 

516 { 

517 "kernel_size": 3, 

518 "num_repeat": 8, 

519 "input_filters": 112, 

520 "output_filters": 192, 

521 "expand_ratio": 6, 

522 "se_ratio": 0.25, 

523 "strides": 2, 

524 "conv_type": 0, 

525 }, 

526 ], 

527} 

528 

529CONV_KERNEL_INITIALIZER = { 

530 "class_name": "VarianceScaling", 

531 "config": { 

532 "scale": 2.0, 

533 "mode": "fan_out", 

534 "distribution": "truncated_normal", 

535 }, 

536} 

537 

538DENSE_KERNEL_INITIALIZER = { 

539 "class_name": "VarianceScaling", 

540 "config": { 

541 "scale": 1.0 / 3.0, 

542 "mode": "fan_out", 

543 "distribution": "uniform", 

544 }, 

545} 

546 

547BASE_DOCSTRING = """Instantiates the {name} architecture. 

548 

549 Reference: 

550 - [EfficientNetV2: Smaller Models and Faster Training]( 

551 https://arxiv.org/abs/2104.00298) (ICML 2021) 

552 

553 This function returns a Keras image classification model, 

554 optionally loaded with weights pre-trained on ImageNet. 

555 

556 For image classification use cases, see 

557 [this page for detailed examples]( 

558 https://keras.io/api/applications/#usage-examples-for-image-classification-models). 

559 

560 For transfer learning use cases, make sure to read the 

561 [guide to transfer learning & fine-tuning]( 

562 https://keras.io/guides/transfer_learning/). 

563 

564 Note: each Keras Application expects a specific kind of input preprocessing. 

565 For EfficientNetV2, by default input preprocessing is included as a part of 

566 the model (as a `Rescaling` layer), and thus 

567 `tf.keras.applications.efficientnet_v2.preprocess_input` is actually a 

568 pass-through function. In this use case, EfficientNetV2 models expect their 

569 inputs to be float tensors of pixels with values in the [0-255] range. 

570 At the same time, preprocessing as a part of the model (i.e. `Rescaling` 

571 layer) can be disabled by setting `include_preprocessing` argument to False. 

572 With preprocessing disabled EfficientNetV2 models expect their inputs to be 

573 float tensors of pixels with values in the [-1, 1] range. 

574 

575 Args: 

576 include_top: Boolean, whether to include the fully-connected 

577 layer at the top of the network. Defaults to True. 

578 weights: One of `None` (random initialization), 

579 `"imagenet"` (pre-training on ImageNet), 

580 or the path to the weights file to be loaded. Defaults to `"imagenet"`. 

581 input_tensor: Optional Keras tensor 

582 (i.e. output of `layers.Input()`) 

583 to use as image input for the model. 

584 input_shape: Optional shape tuple, only to be specified 

585 if `include_top` is False. 

586 It should have exactly 3 inputs channels. 

587 pooling: Optional pooling mode for feature extraction 

588 when `include_top` is `False`. Defaults to None. 

589 - `None` means that the output of the model will be 

590 the 4D tensor output of the 

591 last convolutional layer. 

592 - `"avg"` means that global average pooling 

593 will be applied to the output of the 

594 last convolutional layer, and thus 

595 the output of the model will be a 2D tensor. 

596 - `"max"` means that global max pooling will 

597 be applied. 

598 classes: Optional number of classes to classify images 

599 into, only to be specified if `include_top` is True, and 

600 if no `weights` argument is specified. Defaults to 1000 (number of 

601 ImageNet classes). 

602 classifier_activation: A string or callable. The activation function to use 

603 on the `"top"` layer. Ignored unless `include_top=True`. Set 

604 `classifier_activation=None` to return the logits of the "top" layer. 

605 Defaults to `"softmax"`. 

606 When loading pretrained weights, `classifier_activation` can only 

607 be `None` or `"softmax"`. 

608 

609 Returns: 

610 A `keras.Model` instance. 

611""" 

612 

613 

614def round_filters(filters, width_coefficient, min_depth, depth_divisor): 

615 """Round number of filters based on depth multiplier.""" 

616 filters *= width_coefficient 

617 minimum_depth = min_depth or depth_divisor 

618 new_filters = max( 

619 minimum_depth, 

620 int(filters + depth_divisor / 2) // depth_divisor * depth_divisor, 

621 ) 

622 return int(new_filters) 

623 

624 

625def round_repeats(repeats, depth_coefficient): 

626 """Round number of repeats based on depth multiplier.""" 

627 return int(math.ceil(depth_coefficient * repeats)) 

628 

629 

630def MBConvBlock( 

631 input_filters: int, 

632 output_filters: int, 

633 expand_ratio=1, 

634 kernel_size=3, 

635 strides=1, 

636 se_ratio=0.0, 

637 bn_momentum=0.9, 

638 activation="swish", 

639 survival_probability: float = 0.8, 

640 name=None, 

641): 

642 """MBConv block: Mobile Inverted Residual Bottleneck.""" 

643 bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 

644 

645 if name is None: 

646 name = backend.get_uid("block0") 

647 

648 def apply(inputs): 

649 # Expansion phase 

650 filters = input_filters * expand_ratio 

651 if expand_ratio != 1: 

652 x = layers.Conv2D( 

653 filters=filters, 

654 kernel_size=1, 

655 strides=1, 

656 kernel_initializer=CONV_KERNEL_INITIALIZER, 

657 padding="same", 

658 data_format="channels_last", 

659 use_bias=False, 

660 name=name + "expand_conv", 

661 )(inputs) 

662 x = layers.BatchNormalization( 

663 axis=bn_axis, 

664 momentum=bn_momentum, 

665 name=name + "expand_bn", 

666 )(x) 

667 x = layers.Activation(activation, name=name + "expand_activation")( 

668 x 

669 ) 

670 else: 

671 x = inputs 

672 

673 # Depthwise conv 

674 x = layers.DepthwiseConv2D( 

675 kernel_size=kernel_size, 

676 strides=strides, 

677 depthwise_initializer=CONV_KERNEL_INITIALIZER, 

678 padding="same", 

679 data_format="channels_last", 

680 use_bias=False, 

681 name=name + "dwconv2", 

682 )(x) 

683 x = layers.BatchNormalization( 

684 axis=bn_axis, momentum=bn_momentum, name=name + "bn" 

685 )(x) 

686 x = layers.Activation(activation, name=name + "activation")(x) 

687 

688 # Squeeze and excite 

689 if 0 < se_ratio <= 1: 

690 filters_se = max(1, int(input_filters * se_ratio)) 

691 se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x) 

692 if bn_axis == 1: 

693 se_shape = (filters, 1, 1) 

694 else: 

695 se_shape = (1, 1, filters) 

696 se = layers.Reshape(se_shape, name=name + "se_reshape")(se) 

697 

698 se = layers.Conv2D( 

699 filters_se, 

700 1, 

701 padding="same", 

702 activation=activation, 

703 kernel_initializer=CONV_KERNEL_INITIALIZER, 

704 name=name + "se_reduce", 

705 )(se) 

706 se = layers.Conv2D( 

707 filters, 

708 1, 

709 padding="same", 

710 activation="sigmoid", 

711 kernel_initializer=CONV_KERNEL_INITIALIZER, 

712 name=name + "se_expand", 

713 )(se) 

714 

715 x = layers.multiply([x, se], name=name + "se_excite") 

716 

717 # Output phase 

718 x = layers.Conv2D( 

719 filters=output_filters, 

720 kernel_size=1, 

721 strides=1, 

722 kernel_initializer=CONV_KERNEL_INITIALIZER, 

723 padding="same", 

724 data_format="channels_last", 

725 use_bias=False, 

726 name=name + "project_conv", 

727 )(x) 

728 x = layers.BatchNormalization( 

729 axis=bn_axis, momentum=bn_momentum, name=name + "project_bn" 

730 )(x) 

731 

732 if strides == 1 and input_filters == output_filters: 

733 if survival_probability: 

734 x = layers.Dropout( 

735 survival_probability, 

736 noise_shape=(None, 1, 1, 1), 

737 name=name + "drop", 

738 )(x) 

739 x = layers.add([x, inputs], name=name + "add") 

740 

741 return x 

742 

743 return apply 

744 

745 

746def FusedMBConvBlock( 

747 input_filters: int, 

748 output_filters: int, 

749 expand_ratio=1, 

750 kernel_size=3, 

751 strides=1, 

752 se_ratio=0.0, 

753 bn_momentum=0.9, 

754 activation="swish", 

755 survival_probability: float = 0.8, 

756 name=None, 

757): 

758 """Fused MBConv Block: Fusing the proj conv1x1 and depthwise_conv into a 

759 conv2d.""" 

760 bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 

761 

762 if name is None: 

763 name = backend.get_uid("block0") 

764 

765 def apply(inputs): 

766 filters = input_filters * expand_ratio 

767 if expand_ratio != 1: 

768 x = layers.Conv2D( 

769 filters, 

770 kernel_size=kernel_size, 

771 strides=strides, 

772 kernel_initializer=CONV_KERNEL_INITIALIZER, 

773 data_format="channels_last", 

774 padding="same", 

775 use_bias=False, 

776 name=name + "expand_conv", 

777 )(inputs) 

778 x = layers.BatchNormalization( 

779 axis=bn_axis, momentum=bn_momentum, name=name + "expand_bn" 

780 )(x) 

781 x = layers.Activation( 

782 activation=activation, name=name + "expand_activation" 

783 )(x) 

784 else: 

785 x = inputs 

786 

787 # Squeeze and excite 

788 if 0 < se_ratio <= 1: 

789 filters_se = max(1, int(input_filters * se_ratio)) 

790 se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x) 

791 if bn_axis == 1: 

792 se_shape = (filters, 1, 1) 

793 else: 

794 se_shape = (1, 1, filters) 

795 

796 se = layers.Reshape(se_shape, name=name + "se_reshape")(se) 

797 

798 se = layers.Conv2D( 

799 filters_se, 

800 1, 

801 padding="same", 

802 activation=activation, 

803 kernel_initializer=CONV_KERNEL_INITIALIZER, 

804 name=name + "se_reduce", 

805 )(se) 

806 se = layers.Conv2D( 

807 filters, 

808 1, 

809 padding="same", 

810 activation="sigmoid", 

811 kernel_initializer=CONV_KERNEL_INITIALIZER, 

812 name=name + "se_expand", 

813 )(se) 

814 

815 x = layers.multiply([x, se], name=name + "se_excite") 

816 

817 # Output phase: 

818 x = layers.Conv2D( 

819 output_filters, 

820 kernel_size=1 if expand_ratio != 1 else kernel_size, 

821 strides=1 if expand_ratio != 1 else strides, 

822 kernel_initializer=CONV_KERNEL_INITIALIZER, 

823 padding="same", 

824 use_bias=False, 

825 name=name + "project_conv", 

826 )(x) 

827 x = layers.BatchNormalization( 

828 axis=bn_axis, momentum=bn_momentum, name=name + "project_bn" 

829 )(x) 

830 if expand_ratio == 1: 

831 x = layers.Activation( 

832 activation=activation, name=name + "project_activation" 

833 )(x) 

834 

835 # Residual: 

836 if strides == 1 and input_filters == output_filters: 

837 if survival_probability: 

838 x = layers.Dropout( 

839 survival_probability, 

840 noise_shape=(None, 1, 1, 1), 

841 name=name + "drop", 

842 )(x) 

843 x = layers.add([x, inputs], name=name + "add") 

844 return x 

845 

846 return apply 

847 

848 

849def EfficientNetV2( 

850 width_coefficient, 

851 depth_coefficient, 

852 default_size, 

853 dropout_rate=0.2, 

854 drop_connect_rate=0.2, 

855 depth_divisor=8, 

856 min_depth=8, 

857 bn_momentum=0.9, 

858 activation="swish", 

859 blocks_args="default", 

860 model_name="efficientnetv2", 

861 include_top=True, 

862 weights="imagenet", 

863 input_tensor=None, 

864 input_shape=None, 

865 pooling=None, 

866 classes=1000, 

867 classifier_activation="softmax", 

868 include_preprocessing=True, 

869): 

870 """Instantiates the EfficientNetV2 architecture using given scaling 

871 coefficients. 

872 

873 Args: 

874 width_coefficient: float, scaling coefficient for network width. 

875 depth_coefficient: float, scaling coefficient for network depth. 

876 default_size: integer, default input image size. 

877 dropout_rate: float, dropout rate before final classifier layer. 

878 drop_connect_rate: float, dropout rate at skip connections. 

879 depth_divisor: integer, a unit of network width. 

880 min_depth: integer, minimum number of filters. 

881 bn_momentum: float. Momentum parameter for Batch Normalization layers. 

882 activation: activation function. 

883 blocks_args: list of dicts, parameters to construct block modules. 

884 model_name: string, model name. 

885 include_top: whether to include the fully-connected layer at the top of 

886 the network. 

887 weights: one of `None` (random initialization), `"imagenet"` (pre-training 

888 on ImageNet), or the path to the weights file to be loaded. 

889 input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) or 

890 numpy array to use as image input for the model. 

891 input_shape: optional shape tuple, only to be specified if `include_top` 

892 is False. It should have exactly 3 inputs channels. 

893 pooling: optional pooling mode for feature extraction when `include_top` 

894 is `False`. 

895 - `None` means that the output of the model will be the 4D tensor output 

896 of the last convolutional layer. 

897 - "avg" means that global average pooling will be applied to the output 

898 of the last convolutional layer, and thus the output of the model will 

899 be a 2D tensor. 

900 - `"max"` means that global max pooling will be applied. 

901 classes: optional number of classes to classify images into, only to be 

902 specified if `include_top` is True, and if no `weights` argument is 

903 specified. 

904 classifier_activation: A string or callable. The activation function to 

905 use on the `"top"` layer. Ignored unless `include_top=True`. Set 

906 `classifier_activation=None` to return the logits of the `"top"` layer. 

907 include_preprocessing: Boolean, whether to include the preprocessing layer 

908 (`Rescaling`) at the bottom of the network. Defaults to `True`. 

909 

910 Returns: 

911 A `keras.Model` instance. 

912 

913 Raises: 

914 ValueError: in case of invalid argument for `weights`, 

915 or invalid input shape. 

916 ValueError: if `classifier_activation` is not `"softmax"` or `None` when 

917 using a pretrained top layer. 

918 """ 

919 

920 if blocks_args == "default": 

921 blocks_args = DEFAULT_BLOCKS_ARGS[model_name] 

922 

923 if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): 

924 raise ValueError( 

925 "The `weights` argument should be either " 

926 "`None` (random initialization), `imagenet` " 

927 "(pre-training on ImageNet), " 

928 "or the path to the weights file to be loaded." 

929 f"Received: weights={weights}" 

930 ) 

931 

932 if weights == "imagenet" and include_top and classes != 1000: 

933 raise ValueError( 

934 "If using `weights` as `'imagenet'` with `include_top`" 

935 " as true, `classes` should be 1000" 

936 f"Received: classes={classes}" 

937 ) 

938 

939 # Determine proper input shape 

940 input_shape = imagenet_utils.obtain_input_shape( 

941 input_shape, 

942 default_size=default_size, 

943 min_size=32, 

944 data_format=backend.image_data_format(), 

945 require_flatten=include_top, 

946 weights=weights, 

947 ) 

948 

949 if input_tensor is None: 

950 img_input = layers.Input(shape=input_shape) 

951 else: 

952 if not backend.is_keras_tensor(input_tensor): 

953 img_input = layers.Input(tensor=input_tensor, shape=input_shape) 

954 else: 

955 img_input = input_tensor 

956 

957 bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 

958 

959 x = img_input 

960 

961 if include_preprocessing: 

962 # Apply original V1 preprocessing for Bx variants 

963 # if number of channels allows it 

964 num_channels = input_shape[bn_axis - 1] 

965 if model_name.split("-")[-1].startswith("b") and num_channels == 3: 

966 x = layers.Rescaling(scale=1.0 / 255)(x) 

967 x = layers.Normalization( 

968 mean=[0.485, 0.456, 0.406], 

969 variance=[0.229**2, 0.224**2, 0.225**2], 

970 axis=bn_axis, 

971 )(x) 

972 else: 

973 x = layers.Rescaling(scale=1.0 / 128.0, offset=-1)(x) 

974 

975 # Build stem 

976 stem_filters = round_filters( 

977 filters=blocks_args[0]["input_filters"], 

978 width_coefficient=width_coefficient, 

979 min_depth=min_depth, 

980 depth_divisor=depth_divisor, 

981 ) 

982 x = layers.Conv2D( 

983 filters=stem_filters, 

984 kernel_size=3, 

985 strides=2, 

986 kernel_initializer=CONV_KERNEL_INITIALIZER, 

987 padding="same", 

988 use_bias=False, 

989 name="stem_conv", 

990 )(x) 

991 x = layers.BatchNormalization( 

992 axis=bn_axis, 

993 momentum=bn_momentum, 

994 name="stem_bn", 

995 )(x) 

996 x = layers.Activation(activation, name="stem_activation")(x) 

997 

998 # Build blocks 

999 blocks_args = copy.deepcopy(blocks_args) 

1000 b = 0 

1001 blocks = float(sum(args["num_repeat"] for args in blocks_args)) 

1002 

1003 for i, args in enumerate(blocks_args): 

1004 assert args["num_repeat"] > 0 

1005 

1006 # Update block input and output filters based on depth multiplier. 

1007 args["input_filters"] = round_filters( 

1008 filters=args["input_filters"], 

1009 width_coefficient=width_coefficient, 

1010 min_depth=min_depth, 

1011 depth_divisor=depth_divisor, 

1012 ) 

1013 args["output_filters"] = round_filters( 

1014 filters=args["output_filters"], 

1015 width_coefficient=width_coefficient, 

1016 min_depth=min_depth, 

1017 depth_divisor=depth_divisor, 

1018 ) 

1019 

1020 # Determine which conv type to use: 

1021 block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")] 

1022 repeats = round_repeats( 

1023 repeats=args.pop("num_repeat"), depth_coefficient=depth_coefficient 

1024 ) 

1025 for j in range(repeats): 

1026 # The first block needs to take care of stride and filter size 

1027 # increase. 

1028 if j > 0: 

1029 args["strides"] = 1 

1030 args["input_filters"] = args["output_filters"] 

1031 

1032 x = block( 

1033 activation=activation, 

1034 bn_momentum=bn_momentum, 

1035 survival_probability=drop_connect_rate * b / blocks, 

1036 name=f"block{i + 1}{chr(j + 97)}_", 

1037 **args, 

1038 )(x) 

1039 b += 1 

1040 

1041 # Build top 

1042 top_filters = round_filters( 

1043 filters=1280, 

1044 width_coefficient=width_coefficient, 

1045 min_depth=min_depth, 

1046 depth_divisor=depth_divisor, 

1047 ) 

1048 x = layers.Conv2D( 

1049 filters=top_filters, 

1050 kernel_size=1, 

1051 strides=1, 

1052 kernel_initializer=CONV_KERNEL_INITIALIZER, 

1053 padding="same", 

1054 data_format="channels_last", 

1055 use_bias=False, 

1056 name="top_conv", 

1057 )(x) 

1058 x = layers.BatchNormalization( 

1059 axis=bn_axis, 

1060 momentum=bn_momentum, 

1061 name="top_bn", 

1062 )(x) 

1063 x = layers.Activation(activation=activation, name="top_activation")(x) 

1064 

1065 if include_top: 

1066 x = layers.GlobalAveragePooling2D(name="avg_pool")(x) 

1067 if dropout_rate > 0: 

1068 x = layers.Dropout(dropout_rate, name="top_dropout")(x) 

1069 imagenet_utils.validate_activation(classifier_activation, weights) 

1070 x = layers.Dense( 

1071 classes, 

1072 activation=classifier_activation, 

1073 kernel_initializer=DENSE_KERNEL_INITIALIZER, 

1074 bias_initializer=tf.constant_initializer(0), 

1075 name="predictions", 

1076 )(x) 

1077 else: 

1078 if pooling == "avg": 

1079 x = layers.GlobalAveragePooling2D(name="avg_pool")(x) 

1080 elif pooling == "max": 

1081 x = layers.GlobalMaxPooling2D(name="max_pool")(x) 

1082 

1083 # Ensure that the model takes into account 

1084 # any potential predecessors of `input_tensor`. 

1085 if input_tensor is not None: 

1086 inputs = layer_utils.get_source_inputs(input_tensor) 

1087 else: 

1088 inputs = img_input 

1089 

1090 # Create model. 

1091 model = training.Model(inputs, x, name=model_name) 

1092 

1093 # Load weights. 

1094 if weights == "imagenet": 

1095 if include_top: 

1096 file_suffix = ".h5" 

1097 file_hash = WEIGHTS_HASHES[model_name[-2:]][0] 

1098 else: 

1099 file_suffix = "_notop.h5" 

1100 file_hash = WEIGHTS_HASHES[model_name[-2:]][1] 

1101 file_name = model_name + file_suffix 

1102 weights_path = data_utils.get_file( 

1103 file_name, 

1104 BASE_WEIGHTS_PATH + file_name, 

1105 cache_subdir="models", 

1106 file_hash=file_hash, 

1107 ) 

1108 model.load_weights(weights_path) 

1109 elif weights is not None: 

1110 model.load_weights(weights) 

1111 

1112 return model 

1113 

1114 

1115@keras_export( 

1116 "keras.applications.efficientnet_v2.EfficientNetV2B0", 

1117 "keras.applications.EfficientNetV2B0", 

1118) 

1119def EfficientNetV2B0( 

1120 include_top=True, 

1121 weights="imagenet", 

1122 input_tensor=None, 

1123 input_shape=None, 

1124 pooling=None, 

1125 classes=1000, 

1126 classifier_activation="softmax", 

1127 include_preprocessing=True, 

1128): 

1129 return EfficientNetV2( 

1130 width_coefficient=1.0, 

1131 depth_coefficient=1.0, 

1132 default_size=224, 

1133 model_name="efficientnetv2-b0", 

1134 include_top=include_top, 

1135 weights=weights, 

1136 input_tensor=input_tensor, 

1137 input_shape=input_shape, 

1138 pooling=pooling, 

1139 classes=classes, 

1140 classifier_activation=classifier_activation, 

1141 include_preprocessing=include_preprocessing, 

1142 ) 

1143 

1144 

1145@keras_export( 

1146 "keras.applications.efficientnet_v2.EfficientNetV2B1", 

1147 "keras.applications.EfficientNetV2B1", 

1148) 

1149def EfficientNetV2B1( 

1150 include_top=True, 

1151 weights="imagenet", 

1152 input_tensor=None, 

1153 input_shape=None, 

1154 pooling=None, 

1155 classes=1000, 

1156 classifier_activation="softmax", 

1157 include_preprocessing=True, 

1158): 

1159 return EfficientNetV2( 

1160 width_coefficient=1.0, 

1161 depth_coefficient=1.1, 

1162 default_size=240, 

1163 model_name="efficientnetv2-b1", 

1164 include_top=include_top, 

1165 weights=weights, 

1166 input_tensor=input_tensor, 

1167 input_shape=input_shape, 

1168 pooling=pooling, 

1169 classes=classes, 

1170 classifier_activation=classifier_activation, 

1171 include_preprocessing=include_preprocessing, 

1172 ) 

1173 

1174 

1175@keras_export( 

1176 "keras.applications.efficientnet_v2.EfficientNetV2B2", 

1177 "keras.applications.EfficientNetV2B2", 

1178) 

1179def EfficientNetV2B2( 

1180 include_top=True, 

1181 weights="imagenet", 

1182 input_tensor=None, 

1183 input_shape=None, 

1184 pooling=None, 

1185 classes=1000, 

1186 classifier_activation="softmax", 

1187 include_preprocessing=True, 

1188): 

1189 return EfficientNetV2( 

1190 width_coefficient=1.1, 

1191 depth_coefficient=1.2, 

1192 default_size=260, 

1193 model_name="efficientnetv2-b2", 

1194 include_top=include_top, 

1195 weights=weights, 

1196 input_tensor=input_tensor, 

1197 input_shape=input_shape, 

1198 pooling=pooling, 

1199 classes=classes, 

1200 classifier_activation=classifier_activation, 

1201 include_preprocessing=include_preprocessing, 

1202 ) 

1203 

1204 

1205@keras_export( 

1206 "keras.applications.efficientnet_v2.EfficientNetV2B3", 

1207 "keras.applications.EfficientNetV2B3", 

1208) 

1209def EfficientNetV2B3( 

1210 include_top=True, 

1211 weights="imagenet", 

1212 input_tensor=None, 

1213 input_shape=None, 

1214 pooling=None, 

1215 classes=1000, 

1216 classifier_activation="softmax", 

1217 include_preprocessing=True, 

1218): 

1219 return EfficientNetV2( 

1220 width_coefficient=1.2, 

1221 depth_coefficient=1.4, 

1222 default_size=300, 

1223 model_name="efficientnetv2-b3", 

1224 include_top=include_top, 

1225 weights=weights, 

1226 input_tensor=input_tensor, 

1227 input_shape=input_shape, 

1228 pooling=pooling, 

1229 classes=classes, 

1230 classifier_activation=classifier_activation, 

1231 include_preprocessing=include_preprocessing, 

1232 ) 

1233 

1234 

1235@keras_export( 

1236 "keras.applications.efficientnet_v2.EfficientNetV2S", 

1237 "keras.applications.EfficientNetV2S", 

1238) 

1239def EfficientNetV2S( 

1240 include_top=True, 

1241 weights="imagenet", 

1242 input_tensor=None, 

1243 input_shape=None, 

1244 pooling=None, 

1245 classes=1000, 

1246 classifier_activation="softmax", 

1247 include_preprocessing=True, 

1248): 

1249 return EfficientNetV2( 

1250 width_coefficient=1.0, 

1251 depth_coefficient=1.0, 

1252 default_size=384, 

1253 model_name="efficientnetv2-s", 

1254 include_top=include_top, 

1255 weights=weights, 

1256 input_tensor=input_tensor, 

1257 input_shape=input_shape, 

1258 pooling=pooling, 

1259 classes=classes, 

1260 classifier_activation=classifier_activation, 

1261 include_preprocessing=include_preprocessing, 

1262 ) 

1263 

1264 

1265@keras_export( 

1266 "keras.applications.efficientnet_v2.EfficientNetV2M", 

1267 "keras.applications.EfficientNetV2M", 

1268) 

1269def EfficientNetV2M( 

1270 include_top=True, 

1271 weights="imagenet", 

1272 input_tensor=None, 

1273 input_shape=None, 

1274 pooling=None, 

1275 classes=1000, 

1276 classifier_activation="softmax", 

1277 include_preprocessing=True, 

1278): 

1279 return EfficientNetV2( 

1280 width_coefficient=1.0, 

1281 depth_coefficient=1.0, 

1282 default_size=480, 

1283 model_name="efficientnetv2-m", 

1284 include_top=include_top, 

1285 weights=weights, 

1286 input_tensor=input_tensor, 

1287 input_shape=input_shape, 

1288 pooling=pooling, 

1289 classes=classes, 

1290 classifier_activation=classifier_activation, 

1291 include_preprocessing=include_preprocessing, 

1292 ) 

1293 

1294 

1295@keras_export( 

1296 "keras.applications.efficientnet_v2.EfficientNetV2L", 

1297 "keras.applications.EfficientNetV2L", 

1298) 

1299def EfficientNetV2L( 

1300 include_top=True, 

1301 weights="imagenet", 

1302 input_tensor=None, 

1303 input_shape=None, 

1304 pooling=None, 

1305 classes=1000, 

1306 classifier_activation="softmax", 

1307 include_preprocessing=True, 

1308): 

1309 return EfficientNetV2( 

1310 width_coefficient=1.0, 

1311 depth_coefficient=1.0, 

1312 default_size=480, 

1313 model_name="efficientnetv2-l", 

1314 include_top=include_top, 

1315 weights=weights, 

1316 input_tensor=input_tensor, 

1317 input_shape=input_shape, 

1318 pooling=pooling, 

1319 classes=classes, 

1320 classifier_activation=classifier_activation, 

1321 include_preprocessing=include_preprocessing, 

1322 ) 

1323 

1324 

1325EfficientNetV2B0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B0") 

1326EfficientNetV2B1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B1") 

1327EfficientNetV2B2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B2") 

1328EfficientNetV2B3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B3") 

1329EfficientNetV2S.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2S") 

1330EfficientNetV2M.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2M") 

1331EfficientNetV2L.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2L") 

1332 

1333 

1334@keras_export("keras.applications.efficientnet_v2.preprocess_input") 

1335def preprocess_input(x, data_format=None): 

1336 """A placeholder method for backward compatibility. 

1337 

1338 The preprocessing logic has been included in the EfficientNetV2 model 

1339 implementation. Users are no longer required to call this method to 

1340 normalize the input data. This method does nothing and only kept as a 

1341 placeholder to align the API surface between old and new version of model. 

1342 

1343 Args: 

1344 x: A floating point `numpy.array` or a `tf.Tensor`. 

1345 data_format: Optional data format of the image tensor/array. Defaults to 

1346 None, in which case the global setting 

1347 `tf.keras.backend.image_data_format()` is used (unless you changed it, 

1348 it defaults to "channels_last").{mode} 

1349 

1350 Returns: 

1351 Unchanged `numpy.array` or `tf.Tensor`. 

1352 """ 

1353 return x 

1354 

1355 

1356@keras_export("keras.applications.efficientnet_v2.decode_predictions") 

1357def decode_predictions(preds, top=5): 

1358 return imagenet_utils.decode_predictions(preds, top=top) 

1359 

1360 

1361decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ 

1362