Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/applications/imagenet_utils.py: 16%
170 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Utilities for ImageNet data preprocessing & prediction decoding."""
17import json
18import warnings
20import numpy as np
22from keras.src import activations
23from keras.src import backend
24from keras.src.utils import data_utils
26# isort: off
27from tensorflow.python.util.tf_export import keras_export
29CLASS_INDEX = None
30CLASS_INDEX_PATH = (
31 "https://storage.googleapis.com/download.tensorflow.org/"
32 "data/imagenet_class_index.json"
33)
36PREPROCESS_INPUT_DOC = """
37 Preprocesses a tensor or Numpy array encoding a batch of images.
39 Usage example with `applications.MobileNet`:
41 ```python
42 i = tf.keras.layers.Input([None, None, 3], dtype = tf.uint8)
43 x = tf.cast(i, tf.float32)
44 x = tf.keras.applications.mobilenet.preprocess_input(x)
45 core = tf.keras.applications.MobileNet()
46 x = core(x)
47 model = tf.keras.Model(inputs=[i], outputs=[x])
49 image = tf.image.decode_png(tf.io.read_file('file.png'))
50 result = model(image)
51 ```
53 Args:
54 x: A floating point `numpy.array` or a `tf.Tensor`, 3D or 4D with 3 color
55 channels, with values in the range [0, 255].
56 The preprocessed data are written over the input data
57 if the data types are compatible. To avoid this
58 behaviour, `numpy.copy(x)` can be used.
59 data_format: Optional data format of the image tensor/array. None, means
60 the global setting `tf.keras.backend.image_data_format()` is used
61 (unless you changed it, it uses "channels_last").{mode}
62 Defaults to `None`.
64 Returns:
65 Preprocessed `numpy.array` or a `tf.Tensor` with type `float32`.
66 {ret}
68 Raises:
69 {error}
70 """
72PREPROCESS_INPUT_MODE_DOC = """
73 mode: One of "caffe", "tf" or "torch".
74 - caffe: will convert the images from RGB to BGR,
75 then will zero-center each color channel with
76 respect to the ImageNet dataset,
77 without scaling.
78 - tf: will scale pixels between -1 and 1,
79 sample-wise.
80 - torch: will scale pixels between 0 and 1 and then
81 will normalize each channel with respect to the
82 ImageNet dataset.
83 Defaults to "caffe".
84 """
86PREPROCESS_INPUT_DEFAULT_ERROR_DOC = """
87 ValueError: In case of unknown `mode` or `data_format` argument."""
89PREPROCESS_INPUT_ERROR_DOC = """
90 ValueError: In case of unknown `data_format` argument."""
92PREPROCESS_INPUT_RET_DOC_TF = """
93 The inputs pixel values are scaled between -1 and 1, sample-wise."""
95PREPROCESS_INPUT_RET_DOC_TORCH = """
96 The input pixels values are scaled between 0 and 1 and each channel is
97 normalized with respect to the ImageNet dataset."""
99PREPROCESS_INPUT_RET_DOC_CAFFE = """
100 The images are converted from RGB to BGR, then each color channel is
101 zero-centered with respect to the ImageNet dataset, without scaling."""
104@keras_export("keras.applications.imagenet_utils.preprocess_input")
105def preprocess_input(x, data_format=None, mode="caffe"):
106 """Preprocesses a tensor or Numpy array encoding a batch of images."""
107 if mode not in {"caffe", "tf", "torch"}:
108 raise ValueError(
109 "Expected mode to be one of `caffe`, `tf` or `torch`. "
110 f"Received: mode={mode}"
111 )
113 if data_format is None:
114 data_format = backend.image_data_format()
115 elif data_format not in {"channels_first", "channels_last"}:
116 raise ValueError(
117 "Expected data_format to be one of `channels_first` or "
118 f"`channels_last`. Received: data_format={data_format}"
119 )
121 if isinstance(x, np.ndarray):
122 return _preprocess_numpy_input(x, data_format=data_format, mode=mode)
123 else:
124 return _preprocess_symbolic_input(x, data_format=data_format, mode=mode)
127preprocess_input.__doc__ = PREPROCESS_INPUT_DOC.format(
128 mode=PREPROCESS_INPUT_MODE_DOC,
129 ret="",
130 error=PREPROCESS_INPUT_DEFAULT_ERROR_DOC,
131)
134@keras_export("keras.applications.imagenet_utils.decode_predictions")
135def decode_predictions(preds, top=5):
136 """Decodes the prediction of an ImageNet model.
138 Args:
139 preds: Numpy array encoding a batch of predictions.
140 top: Integer, how many top-guesses to return. Defaults to 5.
142 Returns:
143 A list of lists of top class prediction tuples
144 `(class_name, class_description, score)`.
145 One list of tuples per sample in batch input.
147 Raises:
148 ValueError: In case of invalid shape of the `pred` array
149 (must be 2D).
150 """
151 global CLASS_INDEX
153 if len(preds.shape) != 2 or preds.shape[1] != 1000:
154 raise ValueError(
155 "`decode_predictions` expects "
156 "a batch of predictions "
157 "(i.e. a 2D array of shape (samples, 1000)). "
158 "Found array with shape: " + str(preds.shape)
159 )
160 if CLASS_INDEX is None:
161 fpath = data_utils.get_file(
162 "imagenet_class_index.json",
163 CLASS_INDEX_PATH,
164 cache_subdir="models",
165 file_hash="c2c37ea517e94d9795004a39431a14cb",
166 )
167 with open(fpath) as f:
168 CLASS_INDEX = json.load(f)
169 results = []
170 for pred in preds:
171 top_indices = pred.argsort()[-top:][::-1]
172 result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]
173 result.sort(key=lambda x: x[2], reverse=True)
174 results.append(result)
175 return results
178def _preprocess_numpy_input(x, data_format, mode):
179 """Preprocesses a Numpy array encoding a batch of images.
181 Args:
182 x: Input array, 3D or 4D.
183 data_format: Data format of the image array.
184 mode: One of "caffe", "tf" or "torch".
185 - caffe: will convert the images from RGB to BGR,
186 then will zero-center each color channel with
187 respect to the ImageNet dataset,
188 without scaling.
189 - tf: will scale pixels between -1 and 1,
190 sample-wise.
191 - torch: will scale pixels between 0 and 1 and then
192 will normalize each channel with respect to the
193 ImageNet dataset.
195 Returns:
196 Preprocessed Numpy array.
197 """
198 if not issubclass(x.dtype.type, np.floating):
199 x = x.astype(backend.floatx(), copy=False)
201 if mode == "tf":
202 x /= 127.5
203 x -= 1.0
204 return x
205 elif mode == "torch":
206 x /= 255.0
207 mean = [0.485, 0.456, 0.406]
208 std = [0.229, 0.224, 0.225]
209 else:
210 if data_format == "channels_first":
211 # 'RGB'->'BGR'
212 if x.ndim == 3:
213 x = x[::-1, ...]
214 else:
215 x = x[:, ::-1, ...]
216 else:
217 # 'RGB'->'BGR'
218 x = x[..., ::-1]
219 mean = [103.939, 116.779, 123.68]
220 std = None
222 # Zero-center by mean pixel
223 if data_format == "channels_first":
224 if x.ndim == 3:
225 x[0, :, :] -= mean[0]
226 x[1, :, :] -= mean[1]
227 x[2, :, :] -= mean[2]
228 if std is not None:
229 x[0, :, :] /= std[0]
230 x[1, :, :] /= std[1]
231 x[2, :, :] /= std[2]
232 else:
233 x[:, 0, :, :] -= mean[0]
234 x[:, 1, :, :] -= mean[1]
235 x[:, 2, :, :] -= mean[2]
236 if std is not None:
237 x[:, 0, :, :] /= std[0]
238 x[:, 1, :, :] /= std[1]
239 x[:, 2, :, :] /= std[2]
240 else:
241 x[..., 0] -= mean[0]
242 x[..., 1] -= mean[1]
243 x[..., 2] -= mean[2]
244 if std is not None:
245 x[..., 0] /= std[0]
246 x[..., 1] /= std[1]
247 x[..., 2] /= std[2]
248 return x
251def _preprocess_symbolic_input(x, data_format, mode):
252 """Preprocesses a tensor encoding a batch of images.
254 Args:
255 x: Input tensor, 3D or 4D.
256 data_format: Data format of the image tensor.
257 mode: One of "caffe", "tf" or "torch".
258 - caffe: will convert the images from RGB to BGR,
259 then will zero-center each color channel with
260 respect to the ImageNet dataset,
261 without scaling.
262 - tf: will scale pixels between -1 and 1,
263 sample-wise.
264 - torch: will scale pixels between 0 and 1 and then
265 will normalize each channel with respect to the
266 ImageNet dataset.
268 Returns:
269 Preprocessed tensor.
270 """
271 if mode == "tf":
272 x /= 127.5
273 x -= 1.0
274 return x
275 elif mode == "torch":
276 x /= 255.0
277 mean = [0.485, 0.456, 0.406]
278 std = [0.229, 0.224, 0.225]
279 else:
280 if data_format == "channels_first":
281 # 'RGB'->'BGR'
282 if backend.ndim(x) == 3:
283 x = x[::-1, ...]
284 else:
285 x = x[:, ::-1, ...]
286 else:
287 # 'RGB'->'BGR'
288 x = x[..., ::-1]
289 mean = [103.939, 116.779, 123.68]
290 std = None
292 mean_tensor = backend.constant(-np.array(mean))
294 # Zero-center by mean pixel
295 if backend.dtype(x) != backend.dtype(mean_tensor):
296 x = backend.bias_add(
297 x,
298 backend.cast(mean_tensor, backend.dtype(x)),
299 data_format=data_format,
300 )
301 else:
302 x = backend.bias_add(x, mean_tensor, data_format)
303 if std is not None:
304 std_tensor = backend.constant(np.array(std), dtype=backend.dtype(x))
305 if data_format == "channels_first":
306 std_tensor = backend.reshape(std_tensor, (-1, 1, 1))
307 x /= std_tensor
308 return x
311def obtain_input_shape(
312 input_shape,
313 default_size,
314 min_size,
315 data_format,
316 require_flatten,
317 weights=None,
318):
319 """Internal utility to compute/validate a model's input shape.
321 Args:
322 input_shape: Either None (will return the default network input shape),
323 or a user-provided shape to be validated.
324 default_size: Default input width/height for the model.
325 min_size: Minimum input width/height accepted by the model.
326 data_format: Image data format to use.
327 require_flatten: Whether the model is expected to
328 be linked to a classifier via a Flatten layer.
329 weights: One of `None` (random initialization)
330 or 'imagenet' (pre-training on ImageNet).
331 If weights='imagenet' input channels must be equal to 3.
333 Returns:
334 An integer shape tuple (may include None entries).
336 Raises:
337 ValueError: In case of invalid argument values.
338 """
339 if weights != "imagenet" and input_shape and len(input_shape) == 3:
340 if data_format == "channels_first":
341 if input_shape[0] not in {1, 3}:
342 warnings.warn(
343 "This model usually expects 1 or 3 input channels. "
344 "However, it was passed an input_shape with "
345 + str(input_shape[0])
346 + " input channels.",
347 stacklevel=2,
348 )
349 default_shape = (input_shape[0], default_size, default_size)
350 else:
351 if input_shape[-1] not in {1, 3}:
352 warnings.warn(
353 "This model usually expects 1 or 3 input channels. "
354 "However, it was passed an input_shape with "
355 + str(input_shape[-1])
356 + " input channels.",
357 stacklevel=2,
358 )
359 default_shape = (default_size, default_size, input_shape[-1])
360 else:
361 if data_format == "channels_first":
362 default_shape = (3, default_size, default_size)
363 else:
364 default_shape = (default_size, default_size, 3)
365 if weights == "imagenet" and require_flatten:
366 if input_shape is not None:
367 if input_shape != default_shape:
368 raise ValueError(
369 "When setting `include_top=True` "
370 "and loading `imagenet` weights, "
371 f"`input_shape` should be {default_shape}. "
372 f"Received: input_shape={input_shape}"
373 )
374 return default_shape
375 if input_shape:
376 if data_format == "channels_first":
377 if input_shape is not None:
378 if len(input_shape) != 3:
379 raise ValueError(
380 "`input_shape` must be a tuple of three integers."
381 )
382 if input_shape[0] != 3 and weights == "imagenet":
383 raise ValueError(
384 "The input must have 3 channels; Received "
385 f"`input_shape={input_shape}`"
386 )
387 if (
388 input_shape[1] is not None and input_shape[1] < min_size
389 ) or (input_shape[2] is not None and input_shape[2] < min_size):
390 raise ValueError(
391 f"Input size must be at least {min_size}"
392 f"x{min_size}; Received: "
393 f"input_shape={input_shape}"
394 )
395 else:
396 if input_shape is not None:
397 if len(input_shape) != 3:
398 raise ValueError(
399 "`input_shape` must be a tuple of three integers."
400 )
401 if input_shape[-1] != 3 and weights == "imagenet":
402 raise ValueError(
403 "The input must have 3 channels; Received "
404 f"`input_shape={input_shape}`"
405 )
406 if (
407 input_shape[0] is not None and input_shape[0] < min_size
408 ) or (input_shape[1] is not None and input_shape[1] < min_size):
409 raise ValueError(
410 "Input size must be at least "
411 f"{min_size}x{min_size}; Received: "
412 f"input_shape={input_shape}"
413 )
414 else:
415 if require_flatten:
416 input_shape = default_shape
417 else:
418 if data_format == "channels_first":
419 input_shape = (3, None, None)
420 else:
421 input_shape = (None, None, 3)
422 if require_flatten:
423 if None in input_shape:
424 raise ValueError(
425 "If `include_top` is True, "
426 "you should specify a static `input_shape`. "
427 f"Received: input_shape={input_shape}"
428 )
429 return input_shape
432def correct_pad(inputs, kernel_size):
433 """Returns a tuple for zero-padding for 2D convolution with downsampling.
435 Args:
436 inputs: Input tensor.
437 kernel_size: An integer or tuple/list of 2 integers.
439 Returns:
440 A tuple.
441 """
442 img_dim = 2 if backend.image_data_format() == "channels_first" else 1
443 input_size = backend.int_shape(inputs)[img_dim : (img_dim + 2)]
444 if isinstance(kernel_size, int):
445 kernel_size = (kernel_size, kernel_size)
446 if input_size[0] is None:
447 adjust = (1, 1)
448 else:
449 adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
450 correct = (kernel_size[0] // 2, kernel_size[1] // 2)
451 return (
452 (correct[0] - adjust[0], correct[0]),
453 (correct[1] - adjust[1], correct[1]),
454 )
457def validate_activation(classifier_activation, weights):
458 """validates that the classifer_activation is compatible with the weights.
460 Args:
461 classifier_activation: str or callable activation function
462 weights: The pretrained weights to load.
464 Raises:
465 ValueError: if an activation other than `None` or `softmax` are used with
466 pretrained weights.
467 """
468 if weights is None:
469 return
471 classifier_activation = activations.get(classifier_activation)
472 if classifier_activation not in {
473 activations.get("softmax"),
474 activations.get(None),
475 }:
476 raise ValueError(
477 "Only `None` and `softmax` activations are allowed "
478 "for the `classifier_activation` argument when using "
479 "pretrained weights, with `include_top=True`; Received: "
480 f"classifier_activation={classifier_activation}"
481 )