Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/applications/imagenet_utils.py: 16%

170 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2019 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Utilities for ImageNet data preprocessing & prediction decoding.""" 

16 

17import json 

18import warnings 

19 

20import numpy as np 

21 

22from keras.src import activations 

23from keras.src import backend 

24from keras.src.utils import data_utils 

25 

26# isort: off 

27from tensorflow.python.util.tf_export import keras_export 

28 

29CLASS_INDEX = None 

30CLASS_INDEX_PATH = ( 

31 "https://storage.googleapis.com/download.tensorflow.org/" 

32 "data/imagenet_class_index.json" 

33) 

34 

35 

36PREPROCESS_INPUT_DOC = """ 

37 Preprocesses a tensor or Numpy array encoding a batch of images. 

38 

39 Usage example with `applications.MobileNet`: 

40 

41 ```python 

42 i = tf.keras.layers.Input([None, None, 3], dtype = tf.uint8) 

43 x = tf.cast(i, tf.float32) 

44 x = tf.keras.applications.mobilenet.preprocess_input(x) 

45 core = tf.keras.applications.MobileNet() 

46 x = core(x) 

47 model = tf.keras.Model(inputs=[i], outputs=[x]) 

48 

49 image = tf.image.decode_png(tf.io.read_file('file.png')) 

50 result = model(image) 

51 ``` 

52 

53 Args: 

54 x: A floating point `numpy.array` or a `tf.Tensor`, 3D or 4D with 3 color 

55 channels, with values in the range [0, 255]. 

56 The preprocessed data are written over the input data 

57 if the data types are compatible. To avoid this 

58 behaviour, `numpy.copy(x)` can be used. 

59 data_format: Optional data format of the image tensor/array. None, means 

60 the global setting `tf.keras.backend.image_data_format()` is used 

61 (unless you changed it, it uses "channels_last").{mode} 

62 Defaults to `None`. 

63 

64 Returns: 

65 Preprocessed `numpy.array` or a `tf.Tensor` with type `float32`. 

66 {ret} 

67 

68 Raises: 

69 {error} 

70 """ 

71 

72PREPROCESS_INPUT_MODE_DOC = """ 

73 mode: One of "caffe", "tf" or "torch". 

74 - caffe: will convert the images from RGB to BGR, 

75 then will zero-center each color channel with 

76 respect to the ImageNet dataset, 

77 without scaling. 

78 - tf: will scale pixels between -1 and 1, 

79 sample-wise. 

80 - torch: will scale pixels between 0 and 1 and then 

81 will normalize each channel with respect to the 

82 ImageNet dataset. 

83 Defaults to "caffe". 

84 """ 

85 

86PREPROCESS_INPUT_DEFAULT_ERROR_DOC = """ 

87 ValueError: In case of unknown `mode` or `data_format` argument.""" 

88 

89PREPROCESS_INPUT_ERROR_DOC = """ 

90 ValueError: In case of unknown `data_format` argument.""" 

91 

92PREPROCESS_INPUT_RET_DOC_TF = """ 

93 The inputs pixel values are scaled between -1 and 1, sample-wise.""" 

94 

95PREPROCESS_INPUT_RET_DOC_TORCH = """ 

96 The input pixels values are scaled between 0 and 1 and each channel is 

97 normalized with respect to the ImageNet dataset.""" 

98 

99PREPROCESS_INPUT_RET_DOC_CAFFE = """ 

100 The images are converted from RGB to BGR, then each color channel is 

101 zero-centered with respect to the ImageNet dataset, without scaling.""" 

102 

103 

104@keras_export("keras.applications.imagenet_utils.preprocess_input") 

105def preprocess_input(x, data_format=None, mode="caffe"): 

106 """Preprocesses a tensor or Numpy array encoding a batch of images.""" 

107 if mode not in {"caffe", "tf", "torch"}: 

108 raise ValueError( 

109 "Expected mode to be one of `caffe`, `tf` or `torch`. " 

110 f"Received: mode={mode}" 

111 ) 

112 

113 if data_format is None: 

114 data_format = backend.image_data_format() 

115 elif data_format not in {"channels_first", "channels_last"}: 

116 raise ValueError( 

117 "Expected data_format to be one of `channels_first` or " 

118 f"`channels_last`. Received: data_format={data_format}" 

119 ) 

120 

121 if isinstance(x, np.ndarray): 

122 return _preprocess_numpy_input(x, data_format=data_format, mode=mode) 

123 else: 

124 return _preprocess_symbolic_input(x, data_format=data_format, mode=mode) 

125 

126 

127preprocess_input.__doc__ = PREPROCESS_INPUT_DOC.format( 

128 mode=PREPROCESS_INPUT_MODE_DOC, 

129 ret="", 

130 error=PREPROCESS_INPUT_DEFAULT_ERROR_DOC, 

131) 

132 

133 

134@keras_export("keras.applications.imagenet_utils.decode_predictions") 

135def decode_predictions(preds, top=5): 

136 """Decodes the prediction of an ImageNet model. 

137 

138 Args: 

139 preds: Numpy array encoding a batch of predictions. 

140 top: Integer, how many top-guesses to return. Defaults to 5. 

141 

142 Returns: 

143 A list of lists of top class prediction tuples 

144 `(class_name, class_description, score)`. 

145 One list of tuples per sample in batch input. 

146 

147 Raises: 

148 ValueError: In case of invalid shape of the `pred` array 

149 (must be 2D). 

150 """ 

151 global CLASS_INDEX 

152 

153 if len(preds.shape) != 2 or preds.shape[1] != 1000: 

154 raise ValueError( 

155 "`decode_predictions` expects " 

156 "a batch of predictions " 

157 "(i.e. a 2D array of shape (samples, 1000)). " 

158 "Found array with shape: " + str(preds.shape) 

159 ) 

160 if CLASS_INDEX is None: 

161 fpath = data_utils.get_file( 

162 "imagenet_class_index.json", 

163 CLASS_INDEX_PATH, 

164 cache_subdir="models", 

165 file_hash="c2c37ea517e94d9795004a39431a14cb", 

166 ) 

167 with open(fpath) as f: 

168 CLASS_INDEX = json.load(f) 

169 results = [] 

170 for pred in preds: 

171 top_indices = pred.argsort()[-top:][::-1] 

172 result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] 

173 result.sort(key=lambda x: x[2], reverse=True) 

174 results.append(result) 

175 return results 

176 

177 

178def _preprocess_numpy_input(x, data_format, mode): 

179 """Preprocesses a Numpy array encoding a batch of images. 

180 

181 Args: 

182 x: Input array, 3D or 4D. 

183 data_format: Data format of the image array. 

184 mode: One of "caffe", "tf" or "torch". 

185 - caffe: will convert the images from RGB to BGR, 

186 then will zero-center each color channel with 

187 respect to the ImageNet dataset, 

188 without scaling. 

189 - tf: will scale pixels between -1 and 1, 

190 sample-wise. 

191 - torch: will scale pixels between 0 and 1 and then 

192 will normalize each channel with respect to the 

193 ImageNet dataset. 

194 

195 Returns: 

196 Preprocessed Numpy array. 

197 """ 

198 if not issubclass(x.dtype.type, np.floating): 

199 x = x.astype(backend.floatx(), copy=False) 

200 

201 if mode == "tf": 

202 x /= 127.5 

203 x -= 1.0 

204 return x 

205 elif mode == "torch": 

206 x /= 255.0 

207 mean = [0.485, 0.456, 0.406] 

208 std = [0.229, 0.224, 0.225] 

209 else: 

210 if data_format == "channels_first": 

211 # 'RGB'->'BGR' 

212 if x.ndim == 3: 

213 x = x[::-1, ...] 

214 else: 

215 x = x[:, ::-1, ...] 

216 else: 

217 # 'RGB'->'BGR' 

218 x = x[..., ::-1] 

219 mean = [103.939, 116.779, 123.68] 

220 std = None 

221 

222 # Zero-center by mean pixel 

223 if data_format == "channels_first": 

224 if x.ndim == 3: 

225 x[0, :, :] -= mean[0] 

226 x[1, :, :] -= mean[1] 

227 x[2, :, :] -= mean[2] 

228 if std is not None: 

229 x[0, :, :] /= std[0] 

230 x[1, :, :] /= std[1] 

231 x[2, :, :] /= std[2] 

232 else: 

233 x[:, 0, :, :] -= mean[0] 

234 x[:, 1, :, :] -= mean[1] 

235 x[:, 2, :, :] -= mean[2] 

236 if std is not None: 

237 x[:, 0, :, :] /= std[0] 

238 x[:, 1, :, :] /= std[1] 

239 x[:, 2, :, :] /= std[2] 

240 else: 

241 x[..., 0] -= mean[0] 

242 x[..., 1] -= mean[1] 

243 x[..., 2] -= mean[2] 

244 if std is not None: 

245 x[..., 0] /= std[0] 

246 x[..., 1] /= std[1] 

247 x[..., 2] /= std[2] 

248 return x 

249 

250 

251def _preprocess_symbolic_input(x, data_format, mode): 

252 """Preprocesses a tensor encoding a batch of images. 

253 

254 Args: 

255 x: Input tensor, 3D or 4D. 

256 data_format: Data format of the image tensor. 

257 mode: One of "caffe", "tf" or "torch". 

258 - caffe: will convert the images from RGB to BGR, 

259 then will zero-center each color channel with 

260 respect to the ImageNet dataset, 

261 without scaling. 

262 - tf: will scale pixels between -1 and 1, 

263 sample-wise. 

264 - torch: will scale pixels between 0 and 1 and then 

265 will normalize each channel with respect to the 

266 ImageNet dataset. 

267 

268 Returns: 

269 Preprocessed tensor. 

270 """ 

271 if mode == "tf": 

272 x /= 127.5 

273 x -= 1.0 

274 return x 

275 elif mode == "torch": 

276 x /= 255.0 

277 mean = [0.485, 0.456, 0.406] 

278 std = [0.229, 0.224, 0.225] 

279 else: 

280 if data_format == "channels_first": 

281 # 'RGB'->'BGR' 

282 if backend.ndim(x) == 3: 

283 x = x[::-1, ...] 

284 else: 

285 x = x[:, ::-1, ...] 

286 else: 

287 # 'RGB'->'BGR' 

288 x = x[..., ::-1] 

289 mean = [103.939, 116.779, 123.68] 

290 std = None 

291 

292 mean_tensor = backend.constant(-np.array(mean)) 

293 

294 # Zero-center by mean pixel 

295 if backend.dtype(x) != backend.dtype(mean_tensor): 

296 x = backend.bias_add( 

297 x, 

298 backend.cast(mean_tensor, backend.dtype(x)), 

299 data_format=data_format, 

300 ) 

301 else: 

302 x = backend.bias_add(x, mean_tensor, data_format) 

303 if std is not None: 

304 std_tensor = backend.constant(np.array(std), dtype=backend.dtype(x)) 

305 if data_format == "channels_first": 

306 std_tensor = backend.reshape(std_tensor, (-1, 1, 1)) 

307 x /= std_tensor 

308 return x 

309 

310 

311def obtain_input_shape( 

312 input_shape, 

313 default_size, 

314 min_size, 

315 data_format, 

316 require_flatten, 

317 weights=None, 

318): 

319 """Internal utility to compute/validate a model's input shape. 

320 

321 Args: 

322 input_shape: Either None (will return the default network input shape), 

323 or a user-provided shape to be validated. 

324 default_size: Default input width/height for the model. 

325 min_size: Minimum input width/height accepted by the model. 

326 data_format: Image data format to use. 

327 require_flatten: Whether the model is expected to 

328 be linked to a classifier via a Flatten layer. 

329 weights: One of `None` (random initialization) 

330 or 'imagenet' (pre-training on ImageNet). 

331 If weights='imagenet' input channels must be equal to 3. 

332 

333 Returns: 

334 An integer shape tuple (may include None entries). 

335 

336 Raises: 

337 ValueError: In case of invalid argument values. 

338 """ 

339 if weights != "imagenet" and input_shape and len(input_shape) == 3: 

340 if data_format == "channels_first": 

341 if input_shape[0] not in {1, 3}: 

342 warnings.warn( 

343 "This model usually expects 1 or 3 input channels. " 

344 "However, it was passed an input_shape with " 

345 + str(input_shape[0]) 

346 + " input channels.", 

347 stacklevel=2, 

348 ) 

349 default_shape = (input_shape[0], default_size, default_size) 

350 else: 

351 if input_shape[-1] not in {1, 3}: 

352 warnings.warn( 

353 "This model usually expects 1 or 3 input channels. " 

354 "However, it was passed an input_shape with " 

355 + str(input_shape[-1]) 

356 + " input channels.", 

357 stacklevel=2, 

358 ) 

359 default_shape = (default_size, default_size, input_shape[-1]) 

360 else: 

361 if data_format == "channels_first": 

362 default_shape = (3, default_size, default_size) 

363 else: 

364 default_shape = (default_size, default_size, 3) 

365 if weights == "imagenet" and require_flatten: 

366 if input_shape is not None: 

367 if input_shape != default_shape: 

368 raise ValueError( 

369 "When setting `include_top=True` " 

370 "and loading `imagenet` weights, " 

371 f"`input_shape` should be {default_shape}. " 

372 f"Received: input_shape={input_shape}" 

373 ) 

374 return default_shape 

375 if input_shape: 

376 if data_format == "channels_first": 

377 if input_shape is not None: 

378 if len(input_shape) != 3: 

379 raise ValueError( 

380 "`input_shape` must be a tuple of three integers." 

381 ) 

382 if input_shape[0] != 3 and weights == "imagenet": 

383 raise ValueError( 

384 "The input must have 3 channels; Received " 

385 f"`input_shape={input_shape}`" 

386 ) 

387 if ( 

388 input_shape[1] is not None and input_shape[1] < min_size 

389 ) or (input_shape[2] is not None and input_shape[2] < min_size): 

390 raise ValueError( 

391 f"Input size must be at least {min_size}" 

392 f"x{min_size}; Received: " 

393 f"input_shape={input_shape}" 

394 ) 

395 else: 

396 if input_shape is not None: 

397 if len(input_shape) != 3: 

398 raise ValueError( 

399 "`input_shape` must be a tuple of three integers." 

400 ) 

401 if input_shape[-1] != 3 and weights == "imagenet": 

402 raise ValueError( 

403 "The input must have 3 channels; Received " 

404 f"`input_shape={input_shape}`" 

405 ) 

406 if ( 

407 input_shape[0] is not None and input_shape[0] < min_size 

408 ) or (input_shape[1] is not None and input_shape[1] < min_size): 

409 raise ValueError( 

410 "Input size must be at least " 

411 f"{min_size}x{min_size}; Received: " 

412 f"input_shape={input_shape}" 

413 ) 

414 else: 

415 if require_flatten: 

416 input_shape = default_shape 

417 else: 

418 if data_format == "channels_first": 

419 input_shape = (3, None, None) 

420 else: 

421 input_shape = (None, None, 3) 

422 if require_flatten: 

423 if None in input_shape: 

424 raise ValueError( 

425 "If `include_top` is True, " 

426 "you should specify a static `input_shape`. " 

427 f"Received: input_shape={input_shape}" 

428 ) 

429 return input_shape 

430 

431 

432def correct_pad(inputs, kernel_size): 

433 """Returns a tuple for zero-padding for 2D convolution with downsampling. 

434 

435 Args: 

436 inputs: Input tensor. 

437 kernel_size: An integer or tuple/list of 2 integers. 

438 

439 Returns: 

440 A tuple. 

441 """ 

442 img_dim = 2 if backend.image_data_format() == "channels_first" else 1 

443 input_size = backend.int_shape(inputs)[img_dim : (img_dim + 2)] 

444 if isinstance(kernel_size, int): 

445 kernel_size = (kernel_size, kernel_size) 

446 if input_size[0] is None: 

447 adjust = (1, 1) 

448 else: 

449 adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) 

450 correct = (kernel_size[0] // 2, kernel_size[1] // 2) 

451 return ( 

452 (correct[0] - adjust[0], correct[0]), 

453 (correct[1] - adjust[1], correct[1]), 

454 ) 

455 

456 

457def validate_activation(classifier_activation, weights): 

458 """validates that the classifer_activation is compatible with the weights. 

459 

460 Args: 

461 classifier_activation: str or callable activation function 

462 weights: The pretrained weights to load. 

463 

464 Raises: 

465 ValueError: if an activation other than `None` or `softmax` are used with 

466 pretrained weights. 

467 """ 

468 if weights is None: 

469 return 

470 

471 classifier_activation = activations.get(classifier_activation) 

472 if classifier_activation not in { 

473 activations.get("softmax"), 

474 activations.get(None), 

475 }: 

476 raise ValueError( 

477 "Only `None` and `softmax` activations are allowed " 

478 "for the `classifier_activation` argument when using " 

479 "pretrained weights, with `include_top=True`; Received: " 

480 f"classifier_activation={classifier_activation}" 

481 ) 

482