Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/image_ops_impl.py: 24%
1268 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Implementation of image ops."""
17import functools
18import numpy as np
20from tensorflow.python.eager import context
21from tensorflow.python.eager import def_function
22from tensorflow.python.framework import config
23from tensorflow.python.framework import constant_op
24from tensorflow.python.framework import dtypes
25from tensorflow.python.framework import ops
26from tensorflow.python.framework import random_seed
27from tensorflow.python.framework import tensor_shape
28from tensorflow.python.framework import tensor_util
29from tensorflow.python.ops import array_ops
30from tensorflow.python.ops import array_ops_stack
31from tensorflow.python.ops import check_ops
32from tensorflow.python.ops import cond as tf_cond
33from tensorflow.python.ops import control_flow_assert
34from tensorflow.python.ops import control_flow_case
35from tensorflow.python.ops import control_flow_ops
36from tensorflow.python.ops import gen_image_ops
37from tensorflow.python.ops import math_ops
38from tensorflow.python.ops import nn
39from tensorflow.python.ops import nn_ops
40from tensorflow.python.ops import random_ops
41from tensorflow.python.ops import sort_ops
42from tensorflow.python.ops import stateless_random_ops
43from tensorflow.python.ops import string_ops
44from tensorflow.python.ops import variables
45from tensorflow.python.ops import while_loop
46from tensorflow.python.util import deprecation
47from tensorflow.python.util import dispatch
48from tensorflow.python.util.tf_export import tf_export
50ops.NotDifferentiable('RandomCrop')
51# TODO(b/31222613): This op may be differentiable, and there may be
52# latent bugs here.
53ops.NotDifferentiable('HSVToRGB')
54ops.NotDifferentiable('DrawBoundingBoxes')
55ops.NotDifferentiable('SampleDistortedBoundingBox')
56ops.NotDifferentiable('SampleDistortedBoundingBoxV2')
57# TODO(bsteiner): Implement the gradient function for extract_glimpse
58# TODO(b/31222613): This op may be differentiable, and there may be
59# latent bugs here.
60ops.NotDifferentiable('ExtractGlimpse')
61ops.NotDifferentiable('NonMaxSuppression')
62ops.NotDifferentiable('NonMaxSuppressionV2')
63ops.NotDifferentiable('NonMaxSuppressionWithOverlaps')
64ops.NotDifferentiable('GenerateBoundingBoxProposals')
67# pylint: disable=invalid-name
68def _assert(cond, ex_type, msg):
69 """A polymorphic assert, works with tensors and boolean expressions.
71 If `cond` is not a tensor, behave like an ordinary assert statement, except
72 that a empty list is returned. If `cond` is a tensor, return a list
73 containing a single TensorFlow assert op.
75 Args:
76 cond: Something evaluates to a boolean value. May be a tensor.
77 ex_type: The exception class to use.
78 msg: The error message.
80 Returns:
81 A list, containing at most one assert op.
82 """
83 if _is_tensor(cond):
84 return [control_flow_assert.Assert(cond, [msg])]
85 else:
86 if not cond:
87 raise ex_type(msg)
88 else:
89 return []
92def _is_tensor(x):
93 """Returns `True` if `x` is a symbolic tensor-like object.
95 Args:
96 x: A python object to check.
98 Returns:
99 `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.
100 """
101 return isinstance(x, (ops.Tensor, variables.Variable))
104def _ImageDimensions(image, rank):
105 """Returns the dimensions of an image tensor.
107 Args:
108 image: A rank-D Tensor. For 3-D of shape: `[height, width, channels]`.
109 rank: The expected rank of the image
111 Returns:
112 A list of corresponding to the dimensions of the
113 input image. Dimensions that are statically known are python integers,
114 otherwise, they are integer scalar tensors.
115 """
116 if image.get_shape().is_fully_defined():
117 return image.get_shape().as_list()
118 else:
119 static_shape = image.get_shape().with_rank(rank).as_list()
120 dynamic_shape = array_ops_stack.unstack(array_ops.shape(image), rank)
121 return [
122 s if s is not None else d for s, d in zip(static_shape, dynamic_shape)
123 ]
126def _Check3DImage(image, require_static=True):
127 """Assert that we are working with a properly shaped image.
129 Args:
130 image: 3-D Tensor of shape [height, width, channels]
131 require_static: If `True`, requires that all dimensions of `image` are known
132 and non-zero.
134 Raises:
135 ValueError: if `image.shape` is not a 3-vector.
137 Returns:
138 An empty list, if `image` has fully defined dimensions. Otherwise, a list
139 containing an assert op is returned.
140 """
141 try:
142 image_shape = image.get_shape().with_rank(3)
143 except ValueError:
144 raise ValueError("'image' (shape %s) must be three-dimensional." %
145 image.shape)
146 if require_static and not image_shape.is_fully_defined():
147 raise ValueError("'image' (shape %s) must be fully defined." % image_shape)
148 if any(x == 0 for x in image_shape):
149 raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape)
150 if not image_shape.is_fully_defined():
151 return [
152 check_ops.assert_positive(
153 array_ops.shape(image),
154 ["all dims of 'image.shape' "
155 'must be > 0.'])
156 ]
157 else:
158 return []
161def _Assert3DImage(image):
162 """Assert that we are working with a properly shaped image.
164 Performs the check statically if possible (i.e. if the shape
165 is statically known). Otherwise adds a control dependency
166 to an assert op that checks the dynamic shape.
168 Args:
169 image: 3-D Tensor of shape [height, width, channels]
171 Raises:
172 ValueError: if `image.shape` is not a 3-vector.
174 Returns:
175 If the shape of `image` could be verified statically, `image` is
176 returned unchanged, otherwise there will be a control dependency
177 added that asserts the correct dynamic shape.
178 """
179 return control_flow_ops.with_dependencies(
180 _Check3DImage(image, require_static=False), image)
183def _AssertAtLeast3DImage(image):
184 """Assert that we are working with a properly shaped image.
186 Performs the check statically if possible (i.e. if the shape
187 is statically known). Otherwise adds a control dependency
188 to an assert op that checks the dynamic shape.
190 Args:
191 image: >= 3-D Tensor of size [*, height, width, depth]
193 Raises:
194 ValueError: if image.shape is not a [>= 3] vector.
196 Returns:
197 If the shape of `image` could be verified statically, `image` is
198 returned unchanged, otherwise there will be a control dependency
199 added that asserts the correct dynamic shape.
200 """
201 return control_flow_ops.with_dependencies(
202 _CheckAtLeast3DImage(image, require_static=False), image)
205def _CheckAtLeast3DImage(image, require_static=True):
206 """Assert that we are working with a properly shaped image.
208 Args:
209 image: >= 3-D Tensor of size [*, height, width, depth]
210 require_static: If `True`, requires that all dimensions of `image` are known
211 and non-zero.
213 Raises:
214 ValueError: if image.shape is not a [>= 3] vector.
216 Returns:
217 An empty list, if `image` has fully defined dimensions. Otherwise, a list
218 containing an assert op is returned.
219 """
220 try:
221 if image.get_shape().ndims is None:
222 image_shape = image.get_shape().with_rank(3)
223 else:
224 image_shape = image.get_shape().with_rank_at_least(3)
225 except ValueError:
226 raise ValueError("'image' (shape %s) must be at least three-dimensional." %
227 image.shape)
228 if require_static and not image_shape.is_fully_defined():
229 raise ValueError('\'image\' must be fully defined.')
230 if any(x == 0 for x in image_shape[-3:]):
231 raise ValueError('inner 3 dims of \'image.shape\' must be > 0: %s' %
232 image_shape)
233 if not image_shape[-3:].is_fully_defined():
234 return [
235 check_ops.assert_positive(
236 array_ops.shape(image)[-3:],
237 ["inner 3 dims of 'image.shape' "
238 'must be > 0.']),
239 check_ops.assert_greater_equal(
240 array_ops.rank(image),
241 3,
242 message="'image' must be at least three-dimensional.")
243 ]
244 else:
245 return []
248def _AssertGrayscaleImage(image):
249 """Assert that we are working with a properly shaped grayscale image.
251 Performs the check statically if possible (i.e. if the shape
252 is statically known). Otherwise adds a control dependency
253 to an assert op that checks the dynamic shape.
255 Args:
256 image: >= 2-D Tensor of size [*, 1]
258 Raises:
259 ValueError: if image.shape is not a [>= 2] vector or if
260 last dimension is not size 1.
262 Returns:
263 If the shape of `image` could be verified statically, `image` is
264 returned unchanged, otherwise there will be a control dependency
265 added that asserts the correct dynamic shape.
266 """
267 return control_flow_ops.with_dependencies(
268 _CheckGrayscaleImage(image, require_static=False), image)
271def _CheckGrayscaleImage(image, require_static=True):
272 """Assert that we are working with properly shaped grayscale image.
274 Args:
275 image: >= 2-D Tensor of size [*, 1]
276 require_static: Boolean, whether static shape is required.
278 Raises:
279 ValueError: if image.shape is not a [>= 2] vector or if
280 last dimension is not size 1.
282 Returns:
283 An empty list, if `image` has fully defined dimensions. Otherwise, a list
284 containing an assert op is returned.
285 """
286 try:
287 if image.get_shape().ndims is None:
288 image_shape = image.get_shape().with_rank(2)
289 else:
290 image_shape = image.get_shape().with_rank_at_least(2)
291 except ValueError:
292 raise ValueError('A grayscale image (shape %s) must be at least '
293 'two-dimensional.' % image.shape)
294 if require_static and not image_shape.is_fully_defined():
295 raise ValueError('\'image\' must be fully defined.')
296 if image_shape.is_fully_defined():
297 if image_shape[-1] != 1:
298 raise ValueError('Last dimension of a grayscale image should be size 1.')
299 if not image_shape.is_fully_defined():
300 return [
301 check_ops.assert_equal(
302 array_ops.shape(image)[-1],
303 1,
304 message='Last dimension of a grayscale image should be size 1.'),
305 check_ops.assert_greater_equal(
306 array_ops.rank(image),
307 3,
308 message='A grayscale image must be at least two-dimensional.')
309 ]
310 else:
311 return []
314def fix_image_flip_shape(image, result):
315 """Set the shape to 3 dimensional if we don't know anything else.
317 Args:
318 image: original image size
319 result: flipped or transformed image
321 Returns:
322 An image whose shape is at least (None, None, None).
323 """
325 image_shape = image.get_shape()
326 if image_shape == tensor_shape.unknown_shape():
327 result.set_shape([None, None, None])
328 else:
329 result.set_shape(image_shape)
330 return result
333@tf_export('image.random_flip_up_down')
334@dispatch.add_dispatch_support
335def random_flip_up_down(image, seed=None):
336 """Randomly flips an image vertically (upside down).
338 With a 1 in 2 chance, outputs the contents of `image` flipped along the first
339 dimension, which is `height`. Otherwise, output the image as-is.
340 When passing a batch of images, each image will be randomly flipped
341 independent of other images.
343 Example usage:
345 >>> image = np.array([[[1], [2]], [[3], [4]]])
346 >>> tf.image.random_flip_up_down(image, 3).numpy().tolist()
347 [[[3], [4]], [[1], [2]]]
349 Randomly flip multiple images.
351 >>> images = np.array(
352 ... [
353 ... [[[1], [2]], [[3], [4]]],
354 ... [[[5], [6]], [[7], [8]]]
355 ... ])
356 >>> tf.image.random_flip_up_down(images, 4).numpy().tolist()
357 [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]]
359 For producing deterministic results given a `seed` value, use
360 `tf.image.stateless_random_flip_up_down`. Unlike using the `seed` param
361 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
362 same results given the same seed independent of how many times the function is
363 called, and independent of global seed settings (e.g. tf.random.set_seed).
365 Args:
366 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
367 of shape `[height, width, channels]`.
368 seed: A Python integer. Used to create a random seed. See
369 `tf.compat.v1.set_random_seed` for behavior.
371 Returns:
372 A tensor of the same type and shape as `image`.
373 Raises:
374 ValueError: if the shape of `image` not supported.
375 """
376 random_func = functools.partial(random_ops.random_uniform, seed=seed)
377 return _random_flip(image, 0, random_func, 'random_flip_up_down')
380@tf_export('image.random_flip_left_right')
381@dispatch.add_dispatch_support
382def random_flip_left_right(image, seed=None):
383 """Randomly flip an image horizontally (left to right).
385 With a 1 in 2 chance, outputs the contents of `image` flipped along the
386 second dimension, which is `width`. Otherwise output the image as-is.
387 When passing a batch of images, each image will be randomly flipped
388 independent of other images.
390 Example usage:
392 >>> image = np.array([[[1], [2]], [[3], [4]]])
393 >>> tf.image.random_flip_left_right(image, 5).numpy().tolist()
394 [[[2], [1]], [[4], [3]]]
396 Randomly flip multiple images.
398 >>> images = np.array(
399 ... [
400 ... [[[1], [2]], [[3], [4]]],
401 ... [[[5], [6]], [[7], [8]]]
402 ... ])
403 >>> tf.image.random_flip_left_right(images, 6).numpy().tolist()
404 [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]]
406 For producing deterministic results given a `seed` value, use
407 `tf.image.stateless_random_flip_left_right`. Unlike using the `seed` param
408 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
409 same results given the same seed independent of how many times the function is
410 called, and independent of global seed settings (e.g. tf.random.set_seed).
412 Args:
413 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
414 of shape `[height, width, channels]`.
415 seed: A Python integer. Used to create a random seed. See
416 `tf.compat.v1.set_random_seed` for behavior.
418 Returns:
419 A tensor of the same type and shape as `image`.
421 Raises:
422 ValueError: if the shape of `image` not supported.
423 """
424 random_func = functools.partial(random_ops.random_uniform, seed=seed)
425 return _random_flip(image, 1, random_func, 'random_flip_left_right')
428@tf_export('image.stateless_random_flip_left_right', v1=[])
429@dispatch.add_dispatch_support
430def stateless_random_flip_left_right(image, seed):
431 """Randomly flip an image horizontally (left to right) deterministically.
433 Guarantees the same results given the same `seed` independent of how many
434 times the function is called, and independent of global seed settings (e.g.
435 `tf.random.set_seed`).
437 Example usage:
439 >>> image = np.array([[[1], [2]], [[3], [4]]])
440 >>> seed = (2, 3)
441 >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist()
442 [[[2], [1]], [[4], [3]]]
444 Args:
445 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
446 of shape `[height, width, channels]`.
447 seed: A shape [2] Tensor, the seed to the random number generator. Must have
448 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
450 Returns:
451 A tensor of the same type and shape as `image`.
452 """
453 random_func = functools.partial(
454 stateless_random_ops.stateless_random_uniform, seed=seed)
455 return _random_flip(
456 image, 1, random_func, 'stateless_random_flip_left_right')
459@tf_export('image.stateless_random_flip_up_down', v1=[])
460@dispatch.add_dispatch_support
461def stateless_random_flip_up_down(image, seed):
462 """Randomly flip an image vertically (upside down) deterministically.
464 Guarantees the same results given the same `seed` independent of how many
465 times the function is called, and independent of global seed settings (e.g.
466 `tf.random.set_seed`).
468 Example usage:
470 >>> image = np.array([[[1], [2]], [[3], [4]]])
471 >>> seed = (2, 3)
472 >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist()
473 [[[3], [4]], [[1], [2]]]
475 Args:
476 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
477 of shape `[height, width, channels]`.
478 seed: A shape [2] Tensor, the seed to the random number generator. Must have
479 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
481 Returns:
482 A tensor of the same type and shape as `image`.
483 """
484 random_func = functools.partial(
485 stateless_random_ops.stateless_random_uniform, seed=seed)
486 return _random_flip(
487 image, 0, random_func, 'stateless_random_flip_up_down')
490def _random_flip(image, flip_index, random_func, scope_name):
491 """Randomly (50% chance) flip an image along axis `flip_index`.
493 Args:
494 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
495 of shape `[height, width, channels]`.
496 flip_index: Dimension along which to flip the image.
497 Vertical is 0, Horizontal is 1.
498 random_func: partial function for calling either stateful or stateless
499 random ops with `seed` parameter specified.
500 scope_name: Name of the scope in which the ops are added.
502 Returns:
503 A tensor of the same type and shape as `image`.
505 Raises:
506 ValueError: if the shape of `image` not supported.
507 """
508 with ops.name_scope(None, scope_name, [image]) as scope:
509 image = ops.convert_to_tensor(image, name='image')
510 image = _AssertAtLeast3DImage(image)
511 shape = image.get_shape()
513 def f_rank3():
514 uniform_random = random_func(shape=[], minval=0, maxval=1.0)
515 mirror_cond = math_ops.less(uniform_random, .5)
516 result = tf_cond.cond(
517 mirror_cond,
518 lambda: array_ops.reverse(image, [flip_index]),
519 lambda: image,
520 name=scope)
521 return fix_image_flip_shape(image, result)
523 def f_rank4():
524 batch_size = array_ops.shape(image)[0]
525 uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0)
526 flips = math_ops.round(
527 array_ops.reshape(uniform_random, [batch_size, 1, 1, 1]))
528 flips = math_ops.cast(flips, image.dtype)
529 flipped_input = array_ops.reverse(image, [flip_index + 1])
530 return flips * flipped_input + (1 - flips) * image
532 if shape.ndims is None:
533 rank = array_ops.rank(image)
534 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
535 if shape.ndims == 3:
536 return f_rank3()
537 elif shape.ndims == 4:
538 return f_rank4()
539 else:
540 raise ValueError(
541 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
544@tf_export('image.flip_left_right')
545@dispatch.add_dispatch_support
546def flip_left_right(image):
547 """Flip an image horizontally (left to right).
549 Outputs the contents of `image` flipped along the width dimension.
551 See also `tf.reverse`.
553 Usage Example:
555 >>> x = [[[1.0, 2.0, 3.0],
556 ... [4.0, 5.0, 6.0]],
557 ... [[7.0, 8.0, 9.0],
558 ... [10.0, 11.0, 12.0]]]
559 >>> tf.image.flip_left_right(x)
560 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
561 array([[[ 4., 5., 6.],
562 [ 1., 2., 3.]],
563 [[10., 11., 12.],
564 [ 7., 8., 9.]]], dtype=float32)>
566 Args:
567 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
568 of shape `[height, width, channels]`.
570 Returns:
571 A tensor of the same type and shape as `image`.
573 Raises:
574 ValueError: if the shape of `image` not supported.
575 """
576 return _flip(image, 1, 'flip_left_right')
579@tf_export('image.flip_up_down')
580@dispatch.add_dispatch_support
581def flip_up_down(image):
582 """Flip an image vertically (upside down).
584 Outputs the contents of `image` flipped along the height dimension.
586 See also `reverse()`.
588 Usage Example:
590 >>> x = [[[1.0, 2.0, 3.0],
591 ... [4.0, 5.0, 6.0]],
592 ... [[7.0, 8.0, 9.0],
593 ... [10.0, 11.0, 12.0]]]
594 >>> tf.image.flip_up_down(x)
595 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
596 array([[[ 7., 8., 9.],
597 [10., 11., 12.]],
598 [[ 1., 2., 3.],
599 [ 4., 5., 6.]]], dtype=float32)>
601 Args:
602 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
603 of shape `[height, width, channels]`.
605 Returns:
606 A `Tensor` of the same type and shape as `image`.
608 Raises:
609 ValueError: if the shape of `image` not supported.
610 """
611 return _flip(image, 0, 'flip_up_down')
614def _flip(image, flip_index, scope_name):
615 """Flip an image either horizontally or vertically.
617 Outputs the contents of `image` flipped along the dimension `flip_index`.
619 See also `reverse()`.
621 Args:
622 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
623 of shape `[height, width, channels]`.
624 flip_index: 0 For vertical, 1 for horizontal.
625 scope_name: string, scope name.
627 Returns:
628 A `Tensor` of the same type and shape as `image`.
630 Raises:
631 ValueError: if the shape of `image` not supported.
632 """
633 with ops.name_scope(None, scope_name, [image]):
634 image = ops.convert_to_tensor(image, name='image')
635 image = _AssertAtLeast3DImage(image)
636 shape = image.get_shape()
638 def f_rank3():
639 return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index]))
641 def f_rank4():
642 return array_ops.reverse(image, [flip_index + 1])
644 if shape.ndims is None:
645 rank = array_ops.rank(image)
646 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
647 elif shape.ndims == 3:
648 return f_rank3()
649 elif shape.ndims == 4:
650 return f_rank4()
651 else:
652 raise ValueError(
653 '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape)
656@tf_export('image.rot90')
657@dispatch.add_dispatch_support
658def rot90(image, k=1, name=None):
659 """Rotate image(s) by 90 degrees.
662 For example:
664 >>> a=tf.constant([[[1],[2]],
665 ... [[3],[4]]])
666 >>> # rotating `a` counter clockwise by 90 degrees
667 >>> a_rot=tf.image.rot90(a)
668 >>> print(a_rot[...,0].numpy())
669 [[2 4]
670 [1 3]]
671 >>> # rotating `a` counter clockwise by 270 degrees
672 >>> a_rot=tf.image.rot90(a, k=3)
673 >>> print(a_rot[...,0].numpy())
674 [[3 1]
675 [4 2]]
676 >>> # rotating `a` clockwise by 180 degrees
677 >>> a_rot=tf.image.rot90(a, k=-2)
678 >>> print(a_rot[...,0].numpy())
679 [[4 3]
680 [2 1]]
682 Args:
683 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
684 of shape `[height, width, channels]`.
685 k: A scalar integer tensor. The number of times the image(s) are rotated by
686 90 degrees.
687 name: A name for this operation (optional).
689 Returns:
690 A rotated tensor of the same type and shape as `image`.
692 Raises:
693 ValueError: if the shape of `image` not supported.
694 """
695 with ops.name_scope(name, 'rot90', [image, k]) as scope:
696 image = ops.convert_to_tensor(image, name='image')
697 image = _AssertAtLeast3DImage(image)
698 k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k')
699 k.get_shape().assert_has_rank(0)
700 k = math_ops.mod(k, 4)
702 shape = image.get_shape()
703 if shape.ndims is None:
704 rank = array_ops.rank(image)
706 def f_rank3():
707 return _rot90_3D(image, k, scope)
709 def f_rank4():
710 return _rot90_4D(image, k, scope)
712 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
713 elif shape.ndims == 3:
714 return _rot90_3D(image, k, scope)
715 elif shape.ndims == 4:
716 return _rot90_4D(image, k, scope)
717 else:
718 raise ValueError(
719 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
722def _rot90_3D(image, k, name_scope):
723 """Rotate image counter-clockwise by 90 degrees `k` times.
725 Args:
726 image: 3-D Tensor of shape `[height, width, channels]`.
727 k: A scalar integer. The number of times the image is rotated by 90 degrees.
728 name_scope: A valid TensorFlow name scope.
730 Returns:
731 A 3-D tensor of the same type and shape as `image`.
733 """
735 def _rot90():
736 return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2])
738 def _rot180():
739 return array_ops.reverse_v2(image, [0, 1])
741 def _rot270():
742 return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1])
744 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),
745 (math_ops.equal(k, 3), _rot270)]
747 result = control_flow_case.case(
748 cases, default=lambda: image, exclusive=True, name=name_scope)
749 result.set_shape([None, None, image.get_shape()[2]])
750 return result
753def _rot90_4D(images, k, name_scope):
754 """Rotate batch of images counter-clockwise by 90 degrees `k` times.
756 Args:
757 images: 4-D Tensor of shape `[height, width, channels]`.
758 k: A scalar integer. The number of times the images are rotated by 90
759 degrees.
760 name_scope: A valid TensorFlow name scope.
762 Returns:
763 A 4-D `Tensor` of the same type and shape as `images`.
764 """
766 def _rot90():
767 return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3])
769 def _rot180():
770 return array_ops.reverse_v2(images, [1, 2])
772 def _rot270():
773 return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2])
775 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),
776 (math_ops.equal(k, 3), _rot270)]
778 result = control_flow_case.case(
779 cases, default=lambda: images, exclusive=True, name=name_scope)
780 shape = result.get_shape()
781 result.set_shape([shape[0], None, None, shape[3]])
782 return result
785@tf_export('image.transpose', v1=['image.transpose', 'image.transpose_image'])
786@dispatch.add_dispatch_support
787def transpose(image, name=None):
788 """Transpose image(s) by swapping the height and width dimension.
790 Usage Example:
792 >>> x = [[[1.0, 2.0, 3.0],
793 ... [4.0, 5.0, 6.0]],
794 ... [[7.0, 8.0, 9.0],
795 ... [10.0, 11.0, 12.0]]]
796 >>> tf.image.transpose(x)
797 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
798 array([[[ 1., 2., 3.],
799 [ 7., 8., 9.]],
800 [[ 4., 5., 6.],
801 [10., 11., 12.]]], dtype=float32)>
803 Args:
804 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
805 of shape `[height, width, channels]`.
806 name: A name for this operation (optional).
808 Returns:
809 If `image` was 4-D, a 4-D float Tensor of shape
810 `[batch, width, height, channels]`
811 If `image` was 3-D, a 3-D float Tensor of shape
812 `[width, height, channels]`
814 Raises:
815 ValueError: if the shape of `image` not supported.
817 Usage Example:
819 >>> image = [[[1, 2], [3, 4]],
820 ... [[5, 6], [7, 8]],
821 ... [[9, 10], [11, 12]]]
822 >>> image = tf.constant(image)
823 >>> tf.image.transpose(image)
824 <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy=
825 array([[[ 1, 2],
826 [ 5, 6],
827 [ 9, 10]],
828 [[ 3, 4],
829 [ 7, 8],
830 [11, 12]]], dtype=int32)>
831 """
832 with ops.name_scope(name, 'transpose', [image]):
833 image = ops.convert_to_tensor(image, name='image')
834 image = _AssertAtLeast3DImage(image)
835 shape = image.get_shape()
836 if shape.ndims is None:
837 rank = array_ops.rank(image)
839 def f_rank3():
840 return array_ops.transpose(image, [1, 0, 2], name=name)
842 def f_rank4():
843 return array_ops.transpose(image, [0, 2, 1, 3], name=name)
845 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
846 elif shape.ndims == 3:
847 return array_ops.transpose(image, [1, 0, 2], name=name)
848 elif shape.ndims == 4:
849 return array_ops.transpose(image, [0, 2, 1, 3], name=name)
850 else:
851 raise ValueError(
852 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
855@tf_export('image.central_crop')
856@dispatch.add_dispatch_support
857def central_crop(image, central_fraction):
858 """Crop the central region of the image(s).
860 Remove the outer parts of an image but retain the central region of the image
861 along each dimension. If we specify `central_fraction = 0.5`, this function
862 returns the region marked with "X" in the below diagram. The larger the value
863 of `central_fraction`, the larger the dimension of the region to be cropped
864 and retained.
866 --------
867 | |
868 | XXXX |
869 | XXXX |
870 | | where "X" is the central 50% of the image.
871 --------
873 This function works on either a single image (`image` is a 3-D Tensor), or a
874 batch of images (`image` is a 4-D Tensor).
876 Usage Example:
878 >>> x = [[[1.0, 2.0, 3.0],
879 ... [4.0, 5.0, 6.0],
880 ... [7.0, 8.0, 9.0],
881 ... [10.0, 11.0, 12.0]],
882 ... [[13.0, 14.0, 15.0],
883 ... [16.0, 17.0, 18.0],
884 ... [19.0, 20.0, 21.0],
885 ... [22.0, 23.0, 24.0]],
886 ... [[25.0, 26.0, 27.0],
887 ... [28.0, 29.0, 30.0],
888 ... [31.0, 32.0, 33.0],
889 ... [34.0, 35.0, 36.0]],
890 ... [[37.0, 38.0, 39.0],
891 ... [40.0, 41.0, 42.0],
892 ... [43.0, 44.0, 45.0],
893 ... [46.0, 47.0, 48.0]]]
894 >>> tf.image.central_crop(x, 0.5)
895 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
896 array([[[16., 17., 18.],
897 [19., 20., 21.]],
898 [[28., 29., 30.],
899 [31., 32., 33.]]], dtype=float32)>
901 Args:
902 image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D
903 Tensor of shape [batch_size, height, width, depth].
904 central_fraction: float (0, 1], fraction of size to crop
906 Raises:
907 ValueError: if central_crop_fraction is not within (0, 1].
909 Returns:
910 3-D / 4-D float Tensor, as per the input.
911 """
912 with ops.name_scope(None, 'central_crop', [image]):
913 image = ops.convert_to_tensor(image, name='image')
914 central_fraction_static = tensor_util.constant_value(central_fraction)
915 if central_fraction_static is not None:
916 if central_fraction_static <= 0.0 or central_fraction_static > 1.0:
917 raise ValueError('central_fraction must be within (0, 1]')
918 if central_fraction_static == 1.0:
919 return image
920 else:
921 assert_ops = _assert(
922 math_ops.logical_or(central_fraction > 0.0, central_fraction <= 1.0),
923 ValueError, 'central_fraction must be within (0, 1]')
924 image = control_flow_ops.with_dependencies(assert_ops, image)
926 _AssertAtLeast3DImage(image)
927 rank = image.get_shape().ndims
928 if rank != 3 and rank != 4:
929 raise ValueError('`image` should either be a Tensor with rank = 3 or '
930 'rank = 4. Had rank = {}.'.format(rank))
932 # Helper method to return the `idx`-th dimension of `tensor`, along with
933 # a boolean signifying if the dimension is dynamic.
934 def _get_dim(tensor, idx):
935 static_shape = tensor.get_shape().dims[idx].value
936 if static_shape is not None:
937 return static_shape, False
938 return array_ops.shape(tensor)[idx], True
940 # Get the height, width, depth (and batch size, if the image is a 4-D
941 # tensor).
942 if rank == 3:
943 img_h, dynamic_h = _get_dim(image, 0)
944 img_w, dynamic_w = _get_dim(image, 1)
945 img_d = image.get_shape()[2]
946 else:
947 img_bs = image.get_shape()[0]
948 img_h, dynamic_h = _get_dim(image, 1)
949 img_w, dynamic_w = _get_dim(image, 2)
950 img_d = image.get_shape()[3]
952 dynamic_h = dynamic_h or (central_fraction_static is None)
953 dynamic_w = dynamic_w or (central_fraction_static is None)
955 # Compute the bounding boxes for the crop. The type and value of the
956 # bounding boxes depend on the `image` tensor's rank and whether / not the
957 # dimensions are statically defined.
958 if dynamic_h:
959 img_hd = math_ops.cast(img_h, dtypes.float64)
960 bbox_h_start = math_ops.cast(
961 (img_hd - img_hd * math_ops.cast(central_fraction, dtypes.float64)) /
962 2, dtypes.int32)
963 else:
964 img_hd = float(img_h)
965 bbox_h_start = int((img_hd - img_hd * central_fraction_static) / 2)
967 if dynamic_w:
968 img_wd = math_ops.cast(img_w, dtypes.float64)
969 bbox_w_start = math_ops.cast(
970 (img_wd - img_wd * math_ops.cast(central_fraction, dtypes.float64)) /
971 2, dtypes.int32)
972 else:
973 img_wd = float(img_w)
974 bbox_w_start = int((img_wd - img_wd * central_fraction_static) / 2)
976 bbox_h_size = img_h - bbox_h_start * 2
977 bbox_w_size = img_w - bbox_w_start * 2
979 if rank == 3:
980 bbox_begin = array_ops_stack.stack([bbox_h_start, bbox_w_start, 0])
981 bbox_size = array_ops_stack.stack([bbox_h_size, bbox_w_size, -1])
982 else:
983 bbox_begin = array_ops_stack.stack([0, bbox_h_start, bbox_w_start, 0])
984 bbox_size = array_ops_stack.stack([-1, bbox_h_size, bbox_w_size, -1])
986 image = array_ops.slice(image, bbox_begin, bbox_size)
988 # Reshape the `image` tensor to the desired size.
989 if rank == 3:
990 image.set_shape([
991 None if dynamic_h else bbox_h_size,
992 None if dynamic_w else bbox_w_size, img_d
993 ])
994 else:
995 image.set_shape([
996 img_bs, None if dynamic_h else bbox_h_size,
997 None if dynamic_w else bbox_w_size, img_d
998 ])
999 return image
1002@tf_export('image.pad_to_bounding_box')
1003@dispatch.add_dispatch_support
1004def pad_to_bounding_box(image, offset_height, offset_width, target_height,
1005 target_width):
1006 """Pad `image` with zeros to the specified `height` and `width`.
1008 Adds `offset_height` rows of zeros on top, `offset_width` columns of
1009 zeros on the left, and then pads the image on the bottom and right
1010 with zeros until it has dimensions `target_height`, `target_width`.
1012 This op does nothing if `offset_*` is zero and the image already has size
1013 `target_height` by `target_width`.
1015 Usage Example:
1017 >>> x = [[[1., 2., 3.],
1018 ... [4., 5., 6.]],
1019 ... [[7., 8., 9.],
1020 ... [10., 11., 12.]]]
1021 >>> padded_image = tf.image.pad_to_bounding_box(x, 1, 1, 4, 4)
1022 >>> padded_image
1023 <tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy=
1024 array([[[ 0., 0., 0.],
1025 [ 0., 0., 0.],
1026 [ 0., 0., 0.],
1027 [ 0., 0., 0.]],
1028 [[ 0., 0., 0.],
1029 [ 1., 2., 3.],
1030 [ 4., 5., 6.],
1031 [ 0., 0., 0.]],
1032 [[ 0., 0., 0.],
1033 [ 7., 8., 9.],
1034 [10., 11., 12.],
1035 [ 0., 0., 0.]],
1036 [[ 0., 0., 0.],
1037 [ 0., 0., 0.],
1038 [ 0., 0., 0.],
1039 [ 0., 0., 0.]]], dtype=float32)>
1041 Args:
1042 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1043 of shape `[height, width, channels]`.
1044 offset_height: Number of rows of zeros to add on top.
1045 offset_width: Number of columns of zeros to add on the left.
1046 target_height: Height of output image.
1047 target_width: Width of output image.
1049 Returns:
1050 If `image` was 4-D, a 4-D float Tensor of shape
1051 `[batch, target_height, target_width, channels]`
1052 If `image` was 3-D, a 3-D float Tensor of shape
1053 `[target_height, target_width, channels]`
1055 Raises:
1056 ValueError: If the shape of `image` is incompatible with the `offset_*` or
1057 `target_*` arguments, or either `offset_height` or `offset_width` is
1058 negative.
1059 """
1060 return pad_to_bounding_box_internal(
1061 image,
1062 offset_height,
1063 offset_width,
1064 target_height,
1065 target_width,
1066 check_dims=True)
1069# TODO(b/190099338) Remove this internal method and remap call sites to call
1070# image_ops.pad_to_bounding_box when asserts are no longer serialized. See also
1071# b/204377079#comment6 for more context.
1072def pad_to_bounding_box_internal(image, offset_height, offset_width,
1073 target_height, target_width, check_dims):
1074 """Pad `image` with zeros to the specified `height` and `width`.
1076 Adds `offset_height` rows of zeros on top, `offset_width` columns of
1077 zeros on the left, and then pads the image on the bottom and right
1078 with zeros until it has dimensions `target_height`, `target_width`.
1080 This op does nothing if `offset_*` is zero and the image already has size
1081 `target_height` by `target_width`.
1083 Args:
1084 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1085 of shape `[height, width, channels]`.
1086 offset_height: Number of rows of zeros to add on top.
1087 offset_width: Number of columns of zeros to add on the left.
1088 target_height: Height of output image.
1089 target_width: Width of output image.
1090 check_dims: If True, assert that dimensions are non-negative and in range.
1091 In multi-GPU distributed settings, assertions can cause program slowdown.
1092 Setting this parameter to `False` avoids this, resulting in faster speed
1093 in some situations, with the tradeoff being that some error checking is
1094 not happening.
1096 Returns:
1097 If `image` was 4-D, a 4-D float Tensor of shape
1098 `[batch, target_height, target_width, channels]`
1099 If `image` was 3-D, a 3-D float Tensor of shape
1100 `[target_height, target_width, channels]`
1102 Raises:
1103 ValueError: If the shape of `image` is incompatible with the `offset_*` or
1104 `target_*` arguments, or either `offset_height` or `offset_width` is
1105 negative. Not raised if `check_dims` is `False`.
1106 """
1107 with ops.name_scope(None, 'pad_to_bounding_box', [image]):
1108 image = ops.convert_to_tensor(image, name='image')
1110 is_batch = True
1111 image_shape = image.get_shape()
1112 if image_shape.ndims == 3:
1113 is_batch = False
1114 image = array_ops.expand_dims(image, 0)
1115 elif image_shape.ndims is None:
1116 is_batch = False
1117 image = array_ops.expand_dims(image, 0)
1118 image.set_shape([None] * 4)
1119 elif image_shape.ndims != 4:
1120 raise ValueError(
1121 '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1122 image_shape)
1124 batch, height, width, depth = _ImageDimensions(image, rank=4)
1126 after_padding_width = target_width - offset_width - width
1128 after_padding_height = target_height - offset_height - height
1130 if check_dims:
1131 assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1132 assert_ops += _assert(offset_height >= 0, ValueError,
1133 'offset_height must be >= 0')
1134 assert_ops += _assert(offset_width >= 0, ValueError,
1135 'offset_width must be >= 0')
1136 assert_ops += _assert(after_padding_width >= 0, ValueError,
1137 'width must be <= target - offset')
1138 assert_ops += _assert(after_padding_height >= 0, ValueError,
1139 'height must be <= target - offset')
1140 image = control_flow_ops.with_dependencies(assert_ops, image)
1142 # Do not pad on the depth dimensions.
1143 paddings = array_ops.reshape(
1144 array_ops_stack.stack([
1145 0, 0, offset_height, after_padding_height, offset_width,
1146 after_padding_width, 0, 0
1147 ]), [4, 2])
1148 padded = array_ops.pad(image, paddings)
1150 padded_shape = [
1151 None if _is_tensor(i) else i
1152 for i in [batch, target_height, target_width, depth]
1153 ]
1154 padded.set_shape(padded_shape)
1156 if not is_batch:
1157 padded = array_ops.squeeze(padded, axis=[0])
1159 return padded
1162@tf_export('image.crop_to_bounding_box')
1163@dispatch.add_dispatch_support
1164def crop_to_bounding_box(image, offset_height, offset_width, target_height,
1165 target_width):
1166 """Crops an `image` to a specified bounding box.
1168 This op cuts a rectangular bounding box out of `image`. The top-left corner
1169 of the bounding box is at `offset_height, offset_width` in `image`, and the
1170 lower-right corner is at
1171 `offset_height + target_height, offset_width + target_width`.
1173 Example Usage:
1175 >>> image = tf.constant(np.arange(1, 28, dtype=np.float32), shape=[3, 3, 3])
1176 >>> image[:,:,0] # print the first channel of the 3-D tensor
1177 <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
1178 array([[ 1., 4., 7.],
1179 [10., 13., 16.],
1180 [19., 22., 25.]], dtype=float32)>
1181 >>> cropped_image = tf.image.crop_to_bounding_box(image, 0, 0, 2, 2)
1182 >>> cropped_image[:,:,0] # print the first channel of the cropped 3-D tensor
1183 <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
1184 array([[ 1., 4.],
1185 [10., 13.]], dtype=float32)>
1187 Args:
1188 image: 4-D `Tensor` of shape `[batch, height, width, channels]` or 3-D
1189 `Tensor` of shape `[height, width, channels]`.
1190 offset_height: Vertical coordinate of the top-left corner of the bounding
1191 box in `image`.
1192 offset_width: Horizontal coordinate of the top-left corner of the bounding
1193 box in `image`.
1194 target_height: Height of the bounding box.
1195 target_width: Width of the bounding box.
1197 Returns:
1198 If `image` was 4-D, a 4-D `Tensor` of shape
1199 `[batch, target_height, target_width, channels]`.
1200 If `image` was 3-D, a 3-D `Tensor` of shape
1201 `[target_height, target_width, channels]`.
1202 It has the same dtype with `image`.
1204 Raises:
1205 ValueError: `image` is not a 3-D or 4-D `Tensor`.
1206 ValueError: `offset_width < 0` or `offset_height < 0`.
1207 ValueError: `target_width <= 0` or `target_height <= 0`.
1208 ValueError: `width < offset_width + target_width` or
1209 `height < offset_height + target_height`.
1210 """
1211 with ops.name_scope(None, 'crop_to_bounding_box', [image]):
1212 image = ops.convert_to_tensor(image, name='image')
1214 is_batch = True
1215 image_shape = image.get_shape()
1216 if image_shape.ndims == 3:
1217 is_batch = False
1218 image = array_ops.expand_dims(image, 0)
1219 elif image_shape.ndims is None:
1220 is_batch = False
1221 image = array_ops.expand_dims(image, 0)
1222 image.set_shape([None] * 4)
1223 elif image_shape.ndims != 4:
1224 raise ValueError(
1225 '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1226 image_shape)
1228 assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1230 batch, height, width, depth = _ImageDimensions(image, rank=4)
1232 assert_ops += _assert(offset_width >= 0, ValueError,
1233 'offset_width must be >= 0.')
1234 assert_ops += _assert(offset_height >= 0, ValueError,
1235 'offset_height must be >= 0.')
1236 assert_ops += _assert(target_width > 0, ValueError,
1237 'target_width must be > 0.')
1238 assert_ops += _assert(target_height > 0, ValueError,
1239 'target_height must be > 0.')
1240 assert_ops += _assert(width >= (target_width + offset_width), ValueError,
1241 'width must be >= target + offset.')
1242 assert_ops += _assert(height >= (target_height + offset_height), ValueError,
1243 'height must be >= target + offset.')
1244 image = control_flow_ops.with_dependencies(assert_ops, image)
1246 cropped = array_ops.slice(
1247 image,
1248 array_ops_stack.stack([0, offset_height, offset_width, 0]),
1249 array_ops_stack.stack([
1250 array_ops.shape(image)[0],
1251 target_height,
1252 target_width,
1253 array_ops.shape(image)[3]]))
1255 cropped_shape = [
1256 None if _is_tensor(i) else i
1257 for i in [batch, target_height, target_width, depth]
1258 ]
1259 cropped.set_shape(cropped_shape)
1261 if not is_batch:
1262 cropped = array_ops.squeeze(cropped, axis=[0])
1264 return cropped
1267@tf_export(
1268 'image.resize_with_crop_or_pad',
1269 v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad'])
1270@dispatch.add_dispatch_support
1271def resize_image_with_crop_or_pad(image, target_height, target_width):
1272 """Crops and/or pads an image to a target width and height.
1274 Resizes an image to a target width and height by either centrally
1275 cropping the image or padding it evenly with zeros.
1277 If `width` or `height` is greater than the specified `target_width` or
1278 `target_height` respectively, this op centrally crops along that dimension.
1280 For example:
1282 >>> image = np.arange(75).reshape(5, 5, 3) # create 3-D image input
1283 >>> image[:,:,0] # print first channel just for demo purposes
1284 array([[ 0, 3, 6, 9, 12],
1285 [15, 18, 21, 24, 27],
1286 [30, 33, 36, 39, 42],
1287 [45, 48, 51, 54, 57],
1288 [60, 63, 66, 69, 72]])
1289 >>> image = tf.image.resize_with_crop_or_pad(image, 3, 3) # crop
1290 >>> # print first channel for demo purposes; centrally cropped output
1291 >>> image[:,:,0]
1292 <tf.Tensor: shape=(3, 3), dtype=int64, numpy=
1293 array([[18, 21, 24],
1294 [33, 36, 39],
1295 [48, 51, 54]])>
1297 If `width` or `height` is smaller than the specified `target_width` or
1298 `target_height` respectively, this op centrally pads with 0 along that
1299 dimension.
1301 For example:
1303 >>> image = np.arange(1, 28).reshape(3, 3, 3) # create 3-D image input
1304 >>> image[:,:,0] # print first channel just for demo purposes
1305 array([[ 1, 4, 7],
1306 [10, 13, 16],
1307 [19, 22, 25]])
1308 >>> image = tf.image.resize_with_crop_or_pad(image, 5, 5) # pad
1309 >>> # print first channel for demo purposes; we should see 0 paddings
1310 >>> image[:,:,0]
1311 <tf.Tensor: shape=(5, 5), dtype=int64, numpy=
1312 array([[ 0, 0, 0, 0, 0],
1313 [ 0, 1, 4, 7, 0],
1314 [ 0, 10, 13, 16, 0],
1315 [ 0, 19, 22, 25, 0],
1316 [ 0, 0, 0, 0, 0]])>
1318 Args:
1319 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1320 of shape `[height, width, channels]`.
1321 target_height: Target height.
1322 target_width: Target width.
1324 Raises:
1325 ValueError: if `target_height` or `target_width` are zero or negative.
1327 Returns:
1328 Cropped and/or padded image.
1329 If `images` was 4-D, a 4-D float Tensor of shape
1330 `[batch, new_height, new_width, channels]`.
1331 If `images` was 3-D, a 3-D float Tensor of shape
1332 `[new_height, new_width, channels]`.
1333 """
1334 with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]):
1335 image = ops.convert_to_tensor(image, name='image')
1336 image_shape = image.get_shape()
1337 is_batch = True
1338 if image_shape.ndims == 3:
1339 is_batch = False
1340 image = array_ops.expand_dims(image, 0)
1341 elif image_shape.ndims is None:
1342 is_batch = False
1343 image = array_ops.expand_dims(image, 0)
1344 image.set_shape([None] * 4)
1345 elif image_shape.ndims != 4:
1346 raise ValueError(
1347 '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1348 image_shape)
1350 assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1351 assert_ops += _assert(target_width > 0, ValueError,
1352 'target_width must be > 0.')
1353 assert_ops += _assert(target_height > 0, ValueError,
1354 'target_height must be > 0.')
1356 image = control_flow_ops.with_dependencies(assert_ops, image)
1357 # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
1358 # Make sure our checks come first, so that error messages are clearer.
1359 if _is_tensor(target_height):
1360 target_height = control_flow_ops.with_dependencies(
1361 assert_ops, target_height)
1362 if _is_tensor(target_width):
1363 target_width = control_flow_ops.with_dependencies(assert_ops,
1364 target_width)
1366 def max_(x, y):
1367 if _is_tensor(x) or _is_tensor(y):
1368 return math_ops.maximum(x, y)
1369 else:
1370 return max(x, y)
1372 def min_(x, y):
1373 if _is_tensor(x) or _is_tensor(y):
1374 return math_ops.minimum(x, y)
1375 else:
1376 return min(x, y)
1378 def equal_(x, y):
1379 if _is_tensor(x) or _is_tensor(y):
1380 return math_ops.equal(x, y)
1381 else:
1382 return x == y
1384 _, height, width, _ = _ImageDimensions(image, rank=4)
1385 width_diff = target_width - width
1386 offset_crop_width = max_(-width_diff // 2, 0)
1387 offset_pad_width = max_(width_diff // 2, 0)
1389 height_diff = target_height - height
1390 offset_crop_height = max_(-height_diff // 2, 0)
1391 offset_pad_height = max_(height_diff // 2, 0)
1393 # Maybe crop if needed.
1394 cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
1395 min_(target_height, height),
1396 min_(target_width, width))
1398 # Maybe pad if needed.
1399 resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
1400 target_height, target_width)
1402 # In theory all the checks below are redundant.
1403 if resized.get_shape().ndims is None:
1404 raise ValueError('resized contains no shape.')
1406 _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4)
1408 assert_ops = []
1409 assert_ops += _assert(
1410 equal_(resized_height, target_height), ValueError,
1411 'resized height is not correct.')
1412 assert_ops += _assert(
1413 equal_(resized_width, target_width), ValueError,
1414 'resized width is not correct.')
1416 resized = control_flow_ops.with_dependencies(assert_ops, resized)
1418 if not is_batch:
1419 resized = array_ops.squeeze(resized, axis=[0])
1421 return resized
1424@tf_export(v1=['image.ResizeMethod'])
1425class ResizeMethodV1:
1426 """See `v1.image.resize` for details."""
1427 BILINEAR = 0
1428 NEAREST_NEIGHBOR = 1
1429 BICUBIC = 2
1430 AREA = 3
1433@tf_export('image.ResizeMethod', v1=[])
1434class ResizeMethod:
1435 """See `tf.image.resize` for details."""
1436 BILINEAR = 'bilinear'
1437 NEAREST_NEIGHBOR = 'nearest'
1438 BICUBIC = 'bicubic'
1439 AREA = 'area'
1440 LANCZOS3 = 'lanczos3'
1441 LANCZOS5 = 'lanczos5'
1442 GAUSSIAN = 'gaussian'
1443 MITCHELLCUBIC = 'mitchellcubic'
1446def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name,
1447 skip_resize_if_same):
1448 """Core functionality for v1 and v2 resize functions."""
1449 with ops.name_scope(name, 'resize', [images, size]):
1450 images = ops.convert_to_tensor(images, name='images')
1451 if images.get_shape().ndims is None:
1452 raise ValueError('\'images\' contains no shape.')
1453 # TODO(shlens): Migrate this functionality to the underlying Op's.
1454 is_batch = True
1455 if images.get_shape().ndims == 3:
1456 is_batch = False
1457 images = array_ops.expand_dims(images, 0)
1458 elif images.get_shape().ndims != 4:
1459 raise ValueError('\'images\' must have either 3 or 4 dimensions.')
1461 _, height, width, _ = images.get_shape().as_list()
1463 try:
1464 size = ops.convert_to_tensor(size, dtypes.int32, name='size')
1465 except (TypeError, ValueError):
1466 raise ValueError('\'size\' must be a 1-D int32 Tensor')
1467 if not size.get_shape().is_compatible_with([2]):
1468 raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '
1469 'new_height, new_width')
1471 if preserve_aspect_ratio:
1472 # Get the current shapes of the image, even if dynamic.
1473 _, current_height, current_width, _ = _ImageDimensions(images, rank=4)
1475 # do the computation to find the right scale and height/width.
1476 scale_factor_height = (
1477 math_ops.cast(size[0], dtypes.float32) /
1478 math_ops.cast(current_height, dtypes.float32))
1479 scale_factor_width = (
1480 math_ops.cast(size[1], dtypes.float32) /
1481 math_ops.cast(current_width, dtypes.float32))
1482 scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width)
1483 scaled_height_const = math_ops.cast(
1484 math_ops.round(scale_factor *
1485 math_ops.cast(current_height, dtypes.float32)),
1486 dtypes.int32)
1487 scaled_width_const = math_ops.cast(
1488 math_ops.round(scale_factor *
1489 math_ops.cast(current_width, dtypes.float32)),
1490 dtypes.int32)
1492 # NOTE: Reset the size and other constants used later.
1493 size = ops.convert_to_tensor([scaled_height_const, scaled_width_const],
1494 dtypes.int32,
1495 name='size')
1497 size_const_as_shape = tensor_util.constant_value_as_shape(size)
1498 new_height_const = tensor_shape.dimension_at_index(size_const_as_shape,
1499 0).value
1500 new_width_const = tensor_shape.dimension_at_index(size_const_as_shape,
1501 1).value
1503 # If we can determine that the height and width will be unmodified by this
1504 # transformation, we avoid performing the resize.
1505 if skip_resize_if_same and all(
1506 x is not None
1507 for x in [new_width_const, width, new_height_const, height]) and (
1508 width == new_width_const and height == new_height_const):
1509 if not is_batch:
1510 images = array_ops.squeeze(images, axis=[0])
1511 return images
1513 images = resizer_fn(images, size)
1515 # NOTE(mrry): The shape functions for the resize ops cannot unpack
1516 # the packed values in `new_size`, so set the shape here.
1517 images.set_shape([None, new_height_const, new_width_const, None])
1519 if not is_batch:
1520 images = array_ops.squeeze(images, axis=[0])
1521 return images
1524@tf_export(v1=['image.resize_images', 'image.resize'])
1525@dispatch.add_dispatch_support
1526def resize_images(images,
1527 size,
1528 method=ResizeMethodV1.BILINEAR,
1529 align_corners=False,
1530 preserve_aspect_ratio=False,
1531 name=None):
1532 """Resize `images` to `size` using the specified `method`.
1534 Resized images will be distorted if their original aspect ratio is not
1535 the same as `size`. To avoid distortions see
1536 `tf.image.resize_with_pad` or `tf.image.resize_with_crop_or_pad`.
1538 The `method` can be one of:
1540 * <b>`tf.image.ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.](
1541 https://en.wikipedia.org/wiki/Bilinear_interpolation)
1542 * <b>`tf.image.ResizeMethod.NEAREST_NEIGHBOR`</b>: [
1543 Nearest neighbor interpolation.](
1544 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
1545 * <b>`tf.image.ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.](
1546 https://en.wikipedia.org/wiki/Bicubic_interpolation)
1547 * <b>`tf.image.ResizeMethod.AREA`</b>: Area interpolation.
1549 The return value has the same type as `images` if `method` is
1550 `tf.image.ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type
1551 as `images` if the size of `images` can be statically determined to be the
1552 same as `size`, because `images` is returned in this case. Otherwise, the
1553 return value has type `float32`.
1555 Args:
1556 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1557 of shape `[height, width, channels]`.
1558 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new
1559 size for the images.
1560 method: ResizeMethod. Defaults to `tf.image.ResizeMethod.BILINEAR`.
1561 align_corners: bool. If True, the centers of the 4 corner pixels of the
1562 input and output tensors are aligned, preserving the values at the corner
1563 pixels. Defaults to `False`.
1564 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,
1565 then `images` will be resized to a size that fits in `size` while
1566 preserving the aspect ratio of the original image. Scales up the image if
1567 `size` is bigger than the current size of the `image`. Defaults to False.
1568 name: A name for this operation (optional).
1570 Raises:
1571 ValueError: if the shape of `images` is incompatible with the
1572 shape arguments to this function
1573 ValueError: if `size` has invalid shape or type.
1574 ValueError: if an unsupported resize method is specified.
1576 Returns:
1577 If `images` was 4-D, a 4-D float Tensor of shape
1578 `[batch, new_height, new_width, channels]`.
1579 If `images` was 3-D, a 3-D float Tensor of shape
1580 `[new_height, new_width, channels]`.
1581 """
1583 def resize_fn(images_t, new_size):
1584 """Legacy resize core function, passed to _resize_images_common."""
1585 if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR:
1586 return gen_image_ops.resize_bilinear(
1587 images_t, new_size, align_corners=align_corners)
1588 elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or
1589 method == ResizeMethod.NEAREST_NEIGHBOR):
1590 return gen_image_ops.resize_nearest_neighbor(
1591 images_t, new_size, align_corners=align_corners)
1592 elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC:
1593 return gen_image_ops.resize_bicubic(
1594 images_t, new_size, align_corners=align_corners)
1595 elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA:
1596 return gen_image_ops.resize_area(
1597 images_t, new_size, align_corners=align_corners)
1598 else:
1599 raise ValueError('Resize method is not implemented: {}'.format(method))
1601 return _resize_images_common(
1602 images,
1603 resize_fn,
1604 size,
1605 preserve_aspect_ratio=preserve_aspect_ratio,
1606 name=name,
1607 skip_resize_if_same=True)
1610@tf_export('image.resize', v1=[])
1611@dispatch.add_dispatch_support
1612def resize_images_v2(images,
1613 size,
1614 method=ResizeMethod.BILINEAR,
1615 preserve_aspect_ratio=False,
1616 antialias=False,
1617 name=None):
1618 """Resize `images` to `size` using the specified `method`.
1620 Resized images will be distorted if their original aspect ratio is not
1621 the same as `size`. To avoid distortions see
1622 `tf.image.resize_with_pad`.
1624 >>> image = tf.constant([
1625 ... [1,0,0,0,0],
1626 ... [0,1,0,0,0],
1627 ... [0,0,1,0,0],
1628 ... [0,0,0,1,0],
1629 ... [0,0,0,0,1],
1630 ... ])
1631 >>> # Add "batch" and "channels" dimensions
1632 >>> image = image[tf.newaxis, ..., tf.newaxis]
1633 >>> image.shape.as_list() # [batch, height, width, channels]
1634 [1, 5, 5, 1]
1635 >>> tf.image.resize(image, [3,5])[0,...,0].numpy()
1636 array([[0.6666667, 0.3333333, 0. , 0. , 0. ],
1637 [0. , 0. , 1. , 0. , 0. ],
1638 [0. , 0. , 0. , 0.3333335, 0.6666665]],
1639 dtype=float32)
1641 It works equally well with a single image instead of a batch of images:
1643 >>> tf.image.resize(image[0], [3,5]).shape.as_list()
1644 [3, 5, 1]
1646 When `antialias` is true, the sampling filter will anti-alias the input image
1647 as well as interpolate. When downsampling an image with [anti-aliasing](
1648 https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter
1649 kernel is scaled in order to properly anti-alias the input image signal.
1650 `antialias` has no effect when upsampling an image:
1652 >>> a = tf.image.resize(image, [5,10])
1653 >>> b = tf.image.resize(image, [5,10], antialias=True)
1654 >>> tf.reduce_max(abs(a - b)).numpy()
1655 0.0
1657 The `method` argument expects an item from the `image.ResizeMethod` enum, or
1658 the string equivalent. The options are:
1660 * <b>`bilinear`</b>: [Bilinear interpolation.](
1661 https://en.wikipedia.org/wiki/Bilinear_interpolation) If `antialias` is
1662 true, becomes a hat/tent filter function with radius 1 when downsampling.
1663 * <b>`lanczos3`</b>: [Lanczos kernel](
1664 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3.
1665 High-quality practical filter but may have some ringing, especially on
1666 synthetic images.
1667 * <b>`lanczos5`</b>: [Lanczos kernel] (
1668 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5.
1669 Very-high-quality filter but may have stronger ringing.
1670 * <b>`bicubic`</b>: [Cubic interpolant](
1671 https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to
1672 Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel,
1673 particularly when upsampling.
1674 * <b>`gaussian`</b>: [Gaussian kernel](
1675 https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3,
1676 sigma = 1.5 / 3.0.
1677 * <b>`nearest`</b>: [Nearest neighbor interpolation.](
1678 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
1679 `antialias` has no effect when used with nearest neighbor interpolation.
1680 * <b>`area`</b>: Anti-aliased resampling with area interpolation.
1681 `antialias` has no effect when used with area interpolation; it
1682 always anti-aliases.
1683 * <b>`mitchellcubic`</b>: Mitchell-Netravali Cubic non-interpolating filter.
1684 For synthetic images (especially those lacking proper prefiltering), less
1685 ringing than Keys cubic kernel but less sharp.
1687 Note: Near image edges the filtering kernel may be partially outside the
1688 image boundaries. For these pixels, only input pixels inside the image will be
1689 included in the filter sum, and the output value will be appropriately
1690 normalized.
1692 The return value has type `float32`, unless the `method` is
1693 `ResizeMethod.NEAREST_NEIGHBOR`, then the return dtype is the dtype
1694 of `images`:
1696 >>> nn = tf.image.resize(image, [5,7], method='nearest')
1697 >>> nn[0,...,0].numpy()
1698 array([[1, 0, 0, 0, 0, 0, 0],
1699 [0, 1, 1, 0, 0, 0, 0],
1700 [0, 0, 0, 1, 0, 0, 0],
1701 [0, 0, 0, 0, 1, 1, 0],
1702 [0, 0, 0, 0, 0, 0, 1]], dtype=int32)
1704 With `preserve_aspect_ratio=True`, the aspect ratio is preserved, so `size`
1705 is the maximum for each dimension:
1707 >>> max_10_20 = tf.image.resize(image, [10,20], preserve_aspect_ratio=True)
1708 >>> max_10_20.shape.as_list()
1709 [1, 10, 10, 1]
1711 Args:
1712 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1713 of shape `[height, width, channels]`.
1714 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new
1715 size for the images.
1716 method: An `image.ResizeMethod`, or string equivalent. Defaults to
1717 `bilinear`.
1718 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,
1719 then `images` will be resized to a size that fits in `size` while
1720 preserving the aspect ratio of the original image. Scales up the image if
1721 `size` is bigger than the current size of the `image`. Defaults to False.
1722 antialias: Whether to use an anti-aliasing filter when downsampling an
1723 image.
1724 name: A name for this operation (optional).
1726 Raises:
1727 ValueError: if the shape of `images` is incompatible with the
1728 shape arguments to this function
1729 ValueError: if `size` has an invalid shape or type.
1730 ValueError: if an unsupported resize method is specified.
1732 Returns:
1733 If `images` was 4-D, a 4-D float Tensor of shape
1734 `[batch, new_height, new_width, channels]`.
1735 If `images` was 3-D, a 3-D float Tensor of shape
1736 `[new_height, new_width, channels]`.
1737 """
1739 def resize_fn(images_t, new_size):
1740 """Resize core function, passed to _resize_images_common."""
1741 scale_and_translate_methods = [
1742 ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN,
1743 ResizeMethod.MITCHELLCUBIC
1744 ]
1746 def resize_with_scale_and_translate(method):
1747 scale = (
1748 math_ops.cast(new_size, dtype=dtypes.float32) /
1749 math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32))
1750 return gen_image_ops.scale_and_translate(
1751 images_t,
1752 new_size,
1753 scale,
1754 array_ops.zeros([2]),
1755 kernel_type=method,
1756 antialias=antialias)
1758 if method == ResizeMethod.BILINEAR:
1759 if antialias:
1760 return resize_with_scale_and_translate('triangle')
1761 else:
1762 return gen_image_ops.resize_bilinear(
1763 images_t, new_size, half_pixel_centers=True)
1764 elif method == ResizeMethod.NEAREST_NEIGHBOR:
1765 return gen_image_ops.resize_nearest_neighbor(
1766 images_t, new_size, half_pixel_centers=True)
1767 elif method == ResizeMethod.BICUBIC:
1768 if antialias:
1769 return resize_with_scale_and_translate('keyscubic')
1770 else:
1771 return gen_image_ops.resize_bicubic(
1772 images_t, new_size, half_pixel_centers=True)
1773 elif method == ResizeMethod.AREA:
1774 return gen_image_ops.resize_area(images_t, new_size)
1775 elif method in scale_and_translate_methods:
1776 return resize_with_scale_and_translate(method)
1777 else:
1778 raise ValueError('Resize method is not implemented: {}'.format(method))
1780 return _resize_images_common(
1781 images,
1782 resize_fn,
1783 size,
1784 preserve_aspect_ratio=preserve_aspect_ratio,
1785 name=name,
1786 skip_resize_if_same=False)
1789def _resize_image_with_pad_common(image, target_height, target_width,
1790 resize_fn):
1791 """Core functionality for v1 and v2 resize_image_with_pad functions."""
1792 with ops.name_scope(None, 'resize_image_with_pad', [image]):
1793 image = ops.convert_to_tensor(image, name='image')
1794 image_shape = image.get_shape()
1795 is_batch = True
1796 if image_shape.ndims == 3:
1797 is_batch = False
1798 image = array_ops.expand_dims(image, 0)
1799 elif image_shape.ndims is None:
1800 is_batch = False
1801 image = array_ops.expand_dims(image, 0)
1802 image.set_shape([None] * 4)
1803 elif image_shape.ndims != 4:
1804 raise ValueError(
1805 '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1806 image_shape)
1808 assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1809 assert_ops += _assert(target_width > 0, ValueError,
1810 'target_width must be > 0.')
1811 assert_ops += _assert(target_height > 0, ValueError,
1812 'target_height must be > 0.')
1814 image = control_flow_ops.with_dependencies(assert_ops, image)
1816 def max_(x, y):
1817 if _is_tensor(x) or _is_tensor(y):
1818 return math_ops.maximum(x, y)
1819 else:
1820 return max(x, y)
1822 _, height, width, _ = _ImageDimensions(image, rank=4)
1824 # convert values to float, to ease divisions
1825 f_height = math_ops.cast(height, dtype=dtypes.float32)
1826 f_width = math_ops.cast(width, dtype=dtypes.float32)
1827 f_target_height = math_ops.cast(target_height, dtype=dtypes.float32)
1828 f_target_width = math_ops.cast(target_width, dtype=dtypes.float32)
1830 # Find the ratio by which the image must be adjusted
1831 # to fit within the target
1832 ratio = max_(f_width / f_target_width, f_height / f_target_height)
1833 resized_height_float = f_height / ratio
1834 resized_width_float = f_width / ratio
1835 resized_height = math_ops.cast(
1836 math_ops.floor(resized_height_float), dtype=dtypes.int32)
1837 resized_width = math_ops.cast(
1838 math_ops.floor(resized_width_float), dtype=dtypes.int32)
1840 padding_height = (f_target_height - resized_height_float) / 2
1841 padding_width = (f_target_width - resized_width_float) / 2
1842 f_padding_height = math_ops.floor(padding_height)
1843 f_padding_width = math_ops.floor(padding_width)
1844 p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32))
1845 p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32))
1847 # Resize first, then pad to meet requested dimensions
1848 resized = resize_fn(image, [resized_height, resized_width])
1850 padded = pad_to_bounding_box(resized, p_height, p_width, target_height,
1851 target_width)
1853 if padded.get_shape().ndims is None:
1854 raise ValueError('padded contains no shape.')
1856 _ImageDimensions(padded, rank=4)
1858 if not is_batch:
1859 padded = array_ops.squeeze(padded, axis=[0])
1861 return padded
1864@tf_export(v1=['image.resize_image_with_pad'])
1865@dispatch.add_dispatch_support
1866def resize_image_with_pad_v1(image,
1867 target_height,
1868 target_width,
1869 method=ResizeMethodV1.BILINEAR,
1870 align_corners=False):
1871 """Resizes and pads an image to a target width and height.
1873 Resizes an image to a target width and height by keeping
1874 the aspect ratio the same without distortion. If the target
1875 dimensions don't match the image dimensions, the image
1876 is resized and then padded with zeroes to match requested
1877 dimensions.
1879 Args:
1880 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1881 of shape `[height, width, channels]`.
1882 target_height: Target height.
1883 target_width: Target width.
1884 method: Method to use for resizing image. See `resize_images()`
1885 align_corners: bool. If True, the centers of the 4 corner pixels of the
1886 input and output tensors are aligned, preserving the values at the corner
1887 pixels. Defaults to `False`.
1889 Raises:
1890 ValueError: if `target_height` or `target_width` are zero or negative.
1892 Returns:
1893 Resized and padded image.
1894 If `images` was 4-D, a 4-D float Tensor of shape
1895 `[batch, new_height, new_width, channels]`.
1896 If `images` was 3-D, a 3-D float Tensor of shape
1897 `[new_height, new_width, channels]`.
1898 """
1900 def _resize_fn(im, new_size):
1901 return resize_images(im, new_size, method, align_corners=align_corners)
1903 return _resize_image_with_pad_common(image, target_height, target_width,
1904 _resize_fn)
1907@tf_export('image.resize_with_pad', v1=[])
1908@dispatch.add_dispatch_support
1909def resize_image_with_pad_v2(image,
1910 target_height,
1911 target_width,
1912 method=ResizeMethod.BILINEAR,
1913 antialias=False):
1914 """Resizes and pads an image to a target width and height.
1916 Resizes an image to a target width and height by keeping
1917 the aspect ratio the same without distortion. If the target
1918 dimensions don't match the image dimensions, the image
1919 is resized and then padded with zeroes to match requested
1920 dimensions.
1922 Args:
1923 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1924 of shape `[height, width, channels]`.
1925 target_height: Target height.
1926 target_width: Target width.
1927 method: Method to use for resizing image. See `image.resize()`
1928 antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'.
1930 Raises:
1931 ValueError: if `target_height` or `target_width` are zero or negative.
1933 Returns:
1934 Resized and padded image.
1935 If `images` was 4-D, a 4-D float Tensor of shape
1936 `[batch, new_height, new_width, channels]`.
1937 If `images` was 3-D, a 3-D float Tensor of shape
1938 `[new_height, new_width, channels]`.
1939 """
1941 def _resize_fn(im, new_size):
1942 return resize_images_v2(im, new_size, method, antialias=antialias)
1944 return _resize_image_with_pad_common(image, target_height, target_width,
1945 _resize_fn)
1948@tf_export('image.per_image_standardization')
1949@dispatch.add_dispatch_support
1950def per_image_standardization(image):
1951 """Linearly scales each image in `image` to have mean 0 and variance 1.
1953 For each 3-D image `x` in `image`, computes `(x - mean) / adjusted_stddev`,
1954 where
1956 - `mean` is the average of all values in `x`
1957 - `adjusted_stddev = max(stddev, 1.0/sqrt(N))` is capped away from 0 to
1958 protect against division by 0 when handling uniform images
1959 - `N` is the number of elements in `x`
1960 - `stddev` is the standard deviation of all values in `x`
1962 Example Usage:
1964 >>> image = tf.constant(np.arange(1, 13, dtype=np.int32), shape=[2, 2, 3])
1965 >>> image # 3-D tensor
1966 <tf.Tensor: shape=(2, 2, 3), dtype=int32, numpy=
1967 array([[[ 1, 2, 3],
1968 [ 4, 5, 6]],
1969 [[ 7, 8, 9],
1970 [10, 11, 12]]], dtype=int32)>
1971 >>> new_image = tf.image.per_image_standardization(image)
1972 >>> new_image # 3-D tensor with mean ~= 0 and variance ~= 1
1973 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
1974 array([[[-1.593255 , -1.3035723 , -1.0138896 ],
1975 [-0.7242068 , -0.4345241 , -0.14484136]],
1976 [[ 0.14484136, 0.4345241 , 0.7242068 ],
1977 [ 1.0138896 , 1.3035723 , 1.593255 ]]], dtype=float32)>
1979 Args:
1980 image: An n-D `Tensor` with at least 3 dimensions, the last 3 of which are
1981 the dimensions of each image.
1983 Returns:
1984 A `Tensor` with the same shape as `image` and its dtype is `float32`.
1986 Raises:
1987 ValueError: The shape of `image` has fewer than 3 dimensions.
1988 """
1989 with ops.name_scope(None, 'per_image_standardization', [image]) as scope:
1990 image = ops.convert_to_tensor(image, name='image')
1991 image = _AssertAtLeast3DImage(image)
1993 image = math_ops.cast(image, dtype=dtypes.float32)
1994 num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:])
1995 image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True)
1997 # Apply a minimum normalization that protects us against uniform images.
1998 stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True)
1999 min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
2000 adjusted_stddev = math_ops.maximum(stddev, min_stddev)
2002 image -= image_mean
2003 image = math_ops.divide(image, adjusted_stddev, name=scope)
2004 return image
2007@tf_export('image.random_brightness')
2008@dispatch.register_unary_elementwise_api
2009@dispatch.add_dispatch_support
2010def random_brightness(image, max_delta, seed=None):
2011 """Adjust the brightness of images by a random factor.
2013 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the
2014 interval `[-max_delta, max_delta)`.
2016 For producing deterministic results given a `seed` value, use
2017 `tf.image.stateless_random_brightness`. Unlike using the `seed` param
2018 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2019 same results given the same seed independent of how many times the function is
2020 called, and independent of global seed settings (e.g. tf.random.set_seed).
2022 Args:
2023 image: An image or images to adjust.
2024 max_delta: float, must be non-negative.
2025 seed: A Python integer. Used to create a random seed. See
2026 `tf.compat.v1.set_random_seed` for behavior.
2028 Usage Example:
2030 >>> x = [[[1.0, 2.0, 3.0],
2031 ... [4.0, 5.0, 6.0]],
2032 ... [[7.0, 8.0, 9.0],
2033 ... [10.0, 11.0, 12.0]]]
2034 >>> tf.image.random_brightness(x, 0.2)
2035 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2037 Returns:
2038 The brightness-adjusted image(s).
2040 Raises:
2041 ValueError: if `max_delta` is negative.
2042 """
2043 if max_delta < 0:
2044 raise ValueError('max_delta must be non-negative.')
2046 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
2047 return adjust_brightness(image, delta)
2050@tf_export('image.stateless_random_brightness', v1=[])
2051@dispatch.register_unary_elementwise_api
2052@dispatch.add_dispatch_support
2053def stateless_random_brightness(image, max_delta, seed):
2054 """Adjust the brightness of images by a random factor deterministically.
2056 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the
2057 interval `[-max_delta, max_delta)`.
2059 Guarantees the same results given the same `seed` independent of how many
2060 times the function is called, and independent of global seed settings (e.g.
2061 `tf.random.set_seed`).
2063 Usage Example:
2065 >>> x = [[[1.0, 2.0, 3.0],
2066 ... [4.0, 5.0, 6.0]],
2067 ... [[7.0, 8.0, 9.0],
2068 ... [10.0, 11.0, 12.0]]]
2069 >>> seed = (1, 2)
2070 >>> tf.image.stateless_random_brightness(x, 0.2, seed)
2071 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2072 array([[[ 1.1376241, 2.1376243, 3.1376243],
2073 [ 4.1376243, 5.1376243, 6.1376243]],
2074 [[ 7.1376243, 8.137624 , 9.137624 ],
2075 [10.137624 , 11.137624 , 12.137624 ]]], dtype=float32)>
2077 Args:
2078 image: An image or images to adjust.
2079 max_delta: float, must be non-negative.
2080 seed: A shape [2] Tensor, the seed to the random number generator. Must have
2081 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2083 Returns:
2084 The brightness-adjusted image(s).
2086 Raises:
2087 ValueError: if `max_delta` is negative.
2088 """
2089 if max_delta < 0:
2090 raise ValueError('max_delta must be non-negative.')
2092 delta = stateless_random_ops.stateless_random_uniform(
2093 shape=[], minval=-max_delta, maxval=max_delta, seed=seed)
2094 return adjust_brightness(image, delta)
2097@tf_export('image.random_contrast')
2098@dispatch.add_dispatch_support
2099def random_contrast(image, lower, upper, seed=None):
2100 """Adjust the contrast of an image or images by a random factor.
2102 Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly
2103 picked in the interval `[lower, upper)`.
2105 For producing deterministic results given a `seed` value, use
2106 `tf.image.stateless_random_contrast`. Unlike using the `seed` param
2107 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2108 same results given the same seed independent of how many times the function is
2109 called, and independent of global seed settings (e.g. tf.random.set_seed).
2111 Args:
2112 image: An image tensor with 3 or more dimensions.
2113 lower: float. Lower bound for the random contrast factor.
2114 upper: float. Upper bound for the random contrast factor.
2115 seed: A Python integer. Used to create a random seed. See
2116 `tf.compat.v1.set_random_seed` for behavior.
2118 Usage Example:
2120 >>> x = [[[1.0, 2.0, 3.0],
2121 ... [4.0, 5.0, 6.0]],
2122 ... [[7.0, 8.0, 9.0],
2123 ... [10.0, 11.0, 12.0]]]
2124 >>> tf.image.random_contrast(x, 0.2, 0.5)
2125 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2127 Returns:
2128 The contrast-adjusted image(s).
2130 Raises:
2131 ValueError: if `upper <= lower` or if `lower < 0`.
2132 """
2133 if upper <= lower:
2134 raise ValueError('upper must be > lower.')
2136 if lower < 0:
2137 raise ValueError('lower must be non-negative.')
2139 contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed)
2140 return adjust_contrast(image, contrast_factor)
2143@tf_export('image.stateless_random_contrast', v1=[])
2144@dispatch.add_dispatch_support
2145def stateless_random_contrast(image, lower, upper, seed):
2146 """Adjust the contrast of images by a random factor deterministically.
2148 Guarantees the same results given the same `seed` independent of how many
2149 times the function is called, and independent of global seed settings (e.g.
2150 `tf.random.set_seed`).
2152 Args:
2153 image: An image tensor with 3 or more dimensions.
2154 lower: float. Lower bound for the random contrast factor.
2155 upper: float. Upper bound for the random contrast factor.
2156 seed: A shape [2] Tensor, the seed to the random number generator. Must have
2157 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2159 Usage Example:
2161 >>> x = [[[1.0, 2.0, 3.0],
2162 ... [4.0, 5.0, 6.0]],
2163 ... [[7.0, 8.0, 9.0],
2164 ... [10.0, 11.0, 12.0]]]
2165 >>> seed = (1, 2)
2166 >>> tf.image.stateless_random_contrast(x, 0.2, 0.5, seed)
2167 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2168 array([[[3.4605184, 4.4605184, 5.4605184],
2169 [4.820173 , 5.820173 , 6.820173 ]],
2170 [[6.179827 , 7.179827 , 8.179828 ],
2171 [7.5394816, 8.539482 , 9.539482 ]]], dtype=float32)>
2173 Returns:
2174 The contrast-adjusted image(s).
2176 Raises:
2177 ValueError: if `upper <= lower` or if `lower < 0`.
2178 """
2179 if upper <= lower:
2180 raise ValueError('upper must be > lower.')
2182 if lower < 0:
2183 raise ValueError('lower must be non-negative.')
2185 contrast_factor = stateless_random_ops.stateless_random_uniform(
2186 shape=[], minval=lower, maxval=upper, seed=seed)
2187 return adjust_contrast(image, contrast_factor)
2190@tf_export('image.adjust_brightness')
2191@dispatch.register_unary_elementwise_api
2192@dispatch.add_dispatch_support
2193def adjust_brightness(image, delta):
2194 """Adjust the brightness of RGB or Grayscale images.
2196 This is a convenience method that converts RGB images to float
2197 representation, adjusts their brightness, and then converts them back to the
2198 original data type. If several adjustments are chained, it is advisable to
2199 minimize the number of redundant conversions.
2201 The value `delta` is added to all components of the tensor `image`. `image` is
2202 converted to `float` and scaled appropriately if it is in fixed-point
2203 representation, and `delta` is converted to the same data type. For regular
2204 images, `delta` should be in the range `(-1,1)`, as it is added to the image
2205 in floating point representation, where pixel values are in the `[0,1)` range.
2207 Usage Example:
2209 >>> x = [[[1.0, 2.0, 3.0],
2210 ... [4.0, 5.0, 6.0]],
2211 ... [[7.0, 8.0, 9.0],
2212 ... [10.0, 11.0, 12.0]]]
2213 >>> tf.image.adjust_brightness(x, delta=0.1)
2214 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2215 array([[[ 1.1, 2.1, 3.1],
2216 [ 4.1, 5.1, 6.1]],
2217 [[ 7.1, 8.1, 9.1],
2218 [10.1, 11.1, 12.1]]], dtype=float32)>
2220 Args:
2221 image: RGB image or images to adjust.
2222 delta: A scalar. Amount to add to the pixel values.
2224 Returns:
2225 A brightness-adjusted tensor of the same shape and type as `image`.
2226 """
2227 with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name:
2228 image = ops.convert_to_tensor(image, name='image')
2229 # Remember original dtype to so we can convert back if needed
2230 orig_dtype = image.dtype
2232 if orig_dtype in [dtypes.float16, dtypes.float32]:
2233 flt_image = image
2234 else:
2235 flt_image = convert_image_dtype(image, dtypes.float32)
2237 adjusted = math_ops.add(
2238 flt_image, math_ops.cast(delta, flt_image.dtype), name=name)
2240 return convert_image_dtype(adjusted, orig_dtype, saturate=True)
2243@tf_export('image.adjust_contrast')
2244@dispatch.add_dispatch_support
2245def adjust_contrast(images, contrast_factor):
2246 """Adjust contrast of RGB or grayscale images.
2248 This is a convenience method that converts RGB images to float
2249 representation, adjusts their contrast, and then converts them back to the
2250 original data type. If several adjustments are chained, it is advisable to
2251 minimize the number of redundant conversions.
2253 `images` is a tensor of at least 3 dimensions. The last 3 dimensions are
2254 interpreted as `[height, width, channels]`. The other dimensions only
2255 represent a collection of images, such as `[batch, height, width, channels].`
2257 Contrast is adjusted independently for each channel of each image.
2259 For each channel, this Op computes the mean of the image pixels in the
2260 channel and then adjusts each component `x` of each pixel to
2261 `(x - mean) * contrast_factor + mean`.
2263 `contrast_factor` must be in the interval `(-inf, inf)`.
2265 Usage Example:
2267 >>> x = [[[1.0, 2.0, 3.0],
2268 ... [4.0, 5.0, 6.0]],
2269 ... [[7.0, 8.0, 9.0],
2270 ... [10.0, 11.0, 12.0]]]
2271 >>> tf.image.adjust_contrast(x, 2.)
2272 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2273 array([[[-3.5, -2.5, -1.5],
2274 [ 2.5, 3.5, 4.5]],
2275 [[ 8.5, 9.5, 10.5],
2276 [14.5, 15.5, 16.5]]], dtype=float32)>
2278 Args:
2279 images: Images to adjust. At least 3-D.
2280 contrast_factor: A float multiplier for adjusting contrast.
2282 Returns:
2283 The contrast-adjusted image or images.
2284 """
2285 with ops.name_scope(None, 'adjust_contrast',
2286 [images, contrast_factor]) as name:
2287 images = ops.convert_to_tensor(images, name='images')
2288 # Remember original dtype to so we can convert back if needed
2289 orig_dtype = images.dtype
2291 if orig_dtype in (dtypes.float16, dtypes.float32):
2292 flt_images = images
2293 else:
2294 flt_images = convert_image_dtype(images, dtypes.float32)
2296 adjusted = gen_image_ops.adjust_contrastv2(
2297 flt_images, contrast_factor=contrast_factor, name=name)
2299 return convert_image_dtype(adjusted, orig_dtype, saturate=True)
2302@tf_export('image.adjust_gamma')
2303@dispatch.register_unary_elementwise_api
2304@dispatch.add_dispatch_support
2305def adjust_gamma(image, gamma=1, gain=1):
2306 """Performs [Gamma Correction](http://en.wikipedia.org/wiki/Gamma_correction).
2308 on the input image.
2310 Also known as Power Law Transform. This function converts the
2311 input images at first to float representation, then transforms them
2312 pixelwise according to the equation `Out = gain * In**gamma`,
2313 and then converts the back to the original data type.
2315 Usage Example:
2317 >>> x = [[[1.0, 2.0, 3.0],
2318 ... [4.0, 5.0, 6.0]],
2319 ... [[7.0, 8.0, 9.0],
2320 ... [10.0, 11.0, 12.0]]]
2321 >>> tf.image.adjust_gamma(x, 0.2)
2322 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2323 array([[[1. , 1.1486983, 1.2457309],
2324 [1.319508 , 1.3797297, 1.4309691]],
2325 [[1.4757731, 1.5157166, 1.5518456],
2326 [1.5848932, 1.6153942, 1.6437519]]], dtype=float32)>
2328 Args:
2329 image : RGB image or images to adjust.
2330 gamma : A scalar or tensor. Non-negative real number.
2331 gain : A scalar or tensor. The constant multiplier.
2333 Returns:
2334 A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`.
2336 Raises:
2337 ValueError: If gamma is negative.
2338 Notes:
2339 For gamma greater than 1, the histogram will shift towards left and
2340 the output image will be darker than the input image.
2341 For gamma less than 1, the histogram will shift towards right and
2342 the output image will be brighter than the input image.
2343 References:
2344 [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction)
2345 """
2347 with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name:
2348 image = ops.convert_to_tensor(image, name='image')
2349 # Remember original dtype to so we can convert back if needed
2350 orig_dtype = image.dtype
2352 if orig_dtype in [dtypes.float16, dtypes.float32]:
2353 flt_image = image
2354 else:
2355 flt_image = convert_image_dtype(image, dtypes.float32)
2357 assert_op = _assert(gamma >= 0, ValueError,
2358 'Gamma should be a non-negative real number.')
2359 if assert_op:
2360 gamma = control_flow_ops.with_dependencies(assert_op, gamma)
2362 # According to the definition of gamma correction.
2363 adjusted_img = gain * flt_image**gamma
2365 return convert_image_dtype(adjusted_img, orig_dtype, saturate=True)
2368@tf_export('image.convert_image_dtype')
2369@dispatch.register_unary_elementwise_api
2370@dispatch.add_dispatch_support
2371def convert_image_dtype(image, dtype, saturate=False, name=None):
2372 """Convert `image` to `dtype`, scaling its values if needed.
2374 The operation supports data types (for `image` and `dtype`) of
2375 `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`,
2376 `float16`, `float32`, `float64`, `bfloat16`.
2378 Images that are represented using floating point values are expected to have
2379 values in the range [0,1). Image data stored in integer data types are
2380 expected to have values in the range `[0,MAX]`, where `MAX` is the largest
2381 positive representable number for the data type.
2383 This op converts between data types, scaling the values appropriately before
2384 casting.
2386 Usage Example:
2388 >>> x = [[[1, 2, 3], [4, 5, 6]],
2389 ... [[7, 8, 9], [10, 11, 12]]]
2390 >>> x_int8 = tf.convert_to_tensor(x, dtype=tf.int8)
2391 >>> tf.image.convert_image_dtype(x_int8, dtype=tf.float16, saturate=False)
2392 <tf.Tensor: shape=(2, 2, 3), dtype=float16, numpy=
2393 array([[[0.00787, 0.01575, 0.02362],
2394 [0.0315 , 0.03937, 0.04724]],
2395 [[0.0551 , 0.063 , 0.07086],
2396 [0.07874, 0.0866 , 0.0945 ]]], dtype=float16)>
2398 Converting integer types to floating point types returns normalized floating
2399 point values in the range [0, 1); the values are normalized by the `MAX` value
2400 of the input dtype. Consider the following two examples:
2402 >>> a = [[[1], [2]], [[3], [4]]]
2403 >>> a_int8 = tf.convert_to_tensor(a, dtype=tf.int8)
2404 >>> tf.image.convert_image_dtype(a_int8, dtype=tf.float32)
2405 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2406 array([[[0.00787402],
2407 [0.01574803]],
2408 [[0.02362205],
2409 [0.03149606]]], dtype=float32)>
2411 >>> a_int32 = tf.convert_to_tensor(a, dtype=tf.int32)
2412 >>> tf.image.convert_image_dtype(a_int32, dtype=tf.float32)
2413 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2414 array([[[4.6566129e-10],
2415 [9.3132257e-10]],
2416 [[1.3969839e-09],
2417 [1.8626451e-09]]], dtype=float32)>
2419 Despite having identical values of `a` and output dtype of `float32`, the
2420 outputs differ due to the different input dtypes (`int8` vs. `int32`). This
2421 is, again, because the values are normalized by the `MAX` value of the input
2422 dtype.
2424 Note that converting floating point values to integer type may lose precision.
2425 In the example below, an image tensor `b` of dtype `float32` is converted to
2426 `int8` and back to `float32`. The final output, however, is different from
2427 the original input `b` due to precision loss.
2429 >>> b = [[[0.12], [0.34]], [[0.56], [0.78]]]
2430 >>> b_float32 = tf.convert_to_tensor(b, dtype=tf.float32)
2431 >>> b_int8 = tf.image.convert_image_dtype(b_float32, dtype=tf.int8)
2432 >>> tf.image.convert_image_dtype(b_int8, dtype=tf.float32)
2433 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2434 array([[[0.11811024],
2435 [0.33858266]],
2436 [[0.5590551 ],
2437 [0.77952754]]], dtype=float32)>
2439 Scaling up from an integer type (input dtype) to another integer type (output
2440 dtype) will not map input dtype's `MAX` to output dtype's `MAX` but converting
2441 back and forth should result in no change. For example, as shown below, the
2442 `MAX` value of int8 (=127) is not mapped to the `MAX` value of int16 (=32,767)
2443 but, when scaled back, we get the same, original values of `c`.
2445 >>> c = [[[1], [2]], [[127], [127]]]
2446 >>> c_int8 = tf.convert_to_tensor(c, dtype=tf.int8)
2447 >>> c_int16 = tf.image.convert_image_dtype(c_int8, dtype=tf.int16)
2448 >>> print(c_int16)
2449 tf.Tensor(
2450 [[[ 256]
2451 [ 512]]
2452 [[32512]
2453 [32512]]], shape=(2, 2, 1), dtype=int16)
2454 >>> c_int8_back = tf.image.convert_image_dtype(c_int16, dtype=tf.int8)
2455 >>> print(c_int8_back)
2456 tf.Tensor(
2457 [[[ 1]
2458 [ 2]]
2459 [[127]
2460 [127]]], shape=(2, 2, 1), dtype=int8)
2462 Scaling down from an integer type to another integer type can be a lossy
2463 conversion. Notice in the example below that converting `int16` to `uint8` and
2464 back to `int16` has lost precision.
2466 >>> d = [[[1000], [2000]], [[3000], [4000]]]
2467 >>> d_int16 = tf.convert_to_tensor(d, dtype=tf.int16)
2468 >>> d_uint8 = tf.image.convert_image_dtype(d_int16, dtype=tf.uint8)
2469 >>> d_int16_back = tf.image.convert_image_dtype(d_uint8, dtype=tf.int16)
2470 >>> print(d_int16_back)
2471 tf.Tensor(
2472 [[[ 896]
2473 [1920]]
2474 [[2944]
2475 [3968]]], shape=(2, 2, 1), dtype=int16)
2477 Note that converting from floating point inputs to integer types may lead to
2478 over/underflow problems. Set saturate to `True` to avoid such problem in
2479 problematic conversions. If enabled, saturation will clip the output into the
2480 allowed range before performing a potentially dangerous cast (and only before
2481 performing such a cast, i.e., when casting from a floating point to an integer
2482 type, and when casting from a signed to an unsigned type; `saturate` has no
2483 effect on casts between floats, or on casts that increase the type's range).
2485 Args:
2486 image: An image.
2487 dtype: A `DType` to convert `image` to.
2488 saturate: If `True`, clip the input before casting (if necessary).
2489 name: A name for this operation (optional).
2491 Returns:
2492 `image`, converted to `dtype`.
2494 Raises:
2495 AttributeError: Raises an attribute error when dtype is neither
2496 float nor integer.
2497 """
2498 image = ops.convert_to_tensor(image, name='image')
2499 dtype = dtypes.as_dtype(dtype)
2500 if not dtype.is_floating and not dtype.is_integer:
2501 raise AttributeError('dtype must be either floating point or integer')
2502 if not image.dtype.is_floating and not image.dtype.is_integer:
2503 raise AttributeError('image dtype must be either floating point or integer')
2504 if dtype == image.dtype:
2505 return array_ops.identity(image, name=name)
2507 with ops.name_scope(name, 'convert_image', [image]) as name:
2508 # Both integer: use integer multiplication in the larger range
2509 if image.dtype.is_integer and dtype.is_integer:
2510 scale_in = image.dtype.max
2511 scale_out = dtype.max
2512 if scale_in > scale_out:
2513 # Scaling down, scale first, then cast. The scaling factor will
2514 # cause in.max to be mapped to above out.max but below out.max+1,
2515 # so that the output is safely in the supported range.
2516 scale = (scale_in + 1) // (scale_out + 1)
2517 scaled = math_ops.floordiv(image, scale)
2519 if saturate:
2520 return math_ops.saturate_cast(scaled, dtype, name=name)
2521 else:
2522 return math_ops.cast(scaled, dtype, name=name)
2523 else:
2524 # Scaling up, cast first, then scale. The scale will not map in.max to
2525 # out.max, but converting back and forth should result in no change.
2526 if saturate:
2527 cast = math_ops.saturate_cast(image, dtype)
2528 else:
2529 cast = math_ops.cast(image, dtype)
2530 scale = (scale_out + 1) // (scale_in + 1)
2531 return math_ops.multiply(cast, scale, name=name)
2532 elif image.dtype.is_floating and dtype.is_floating:
2533 # Both float: Just cast, no possible overflows in the allowed ranges.
2534 # Note: We're ignoring float overflows. If your image dynamic range
2535 # exceeds float range, you're on your own.
2536 return math_ops.cast(image, dtype, name=name)
2537 else:
2538 if image.dtype.is_integer:
2539 # Converting to float: first cast, then scale. No saturation possible.
2540 cast = math_ops.cast(image, dtype)
2541 scale = 1. / image.dtype.max
2542 return math_ops.multiply(cast, scale, name=name)
2543 else:
2544 # Converting from float: first scale, then cast
2545 scale = dtype.max + 0.5 # avoid rounding problems in the cast
2546 scaled = math_ops.multiply(image, scale)
2547 if saturate:
2548 return math_ops.saturate_cast(scaled, dtype, name=name)
2549 else:
2550 return math_ops.cast(scaled, dtype, name=name)
2553@tf_export('image.rgb_to_grayscale')
2554@dispatch.add_dispatch_support
2555def rgb_to_grayscale(images, name=None):
2556 """Converts one or more images from RGB to Grayscale.
2558 Outputs a tensor of the same `DType` and rank as `images`. The size of the
2559 last dimension of the output is 1, containing the Grayscale value of the
2560 pixels.
2562 >>> original = tf.constant([[[1.0, 2.0, 3.0]]])
2563 >>> converted = tf.image.rgb_to_grayscale(original)
2564 >>> print(converted.numpy())
2565 [[[1.81...]]]
2567 Args:
2568 images: The RGB tensor to convert. The last dimension must have size 3 and
2569 should contain RGB values.
2570 name: A name for the operation (optional).
2572 Returns:
2573 The converted grayscale image(s).
2574 """
2575 with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name:
2576 images = ops.convert_to_tensor(images, name='images')
2577 # Remember original dtype to so we can convert back if needed
2578 orig_dtype = images.dtype
2579 flt_image = convert_image_dtype(images, dtypes.float32)
2581 # Reference for converting between RGB and grayscale.
2582 # https://en.wikipedia.org/wiki/Luma_%28video%29
2583 rgb_weights = [0.2989, 0.5870, 0.1140]
2584 gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1])
2585 gray_float = array_ops.expand_dims(gray_float, -1)
2586 return convert_image_dtype(gray_float, orig_dtype, name=name)
2589@tf_export('image.grayscale_to_rgb')
2590@dispatch.add_dispatch_support
2591def grayscale_to_rgb(images, name=None):
2592 """Converts one or more images from Grayscale to RGB.
2594 Outputs a tensor of the same `DType` and rank as `images`. The size of the
2595 last dimension of the output is 3, containing the RGB value of the pixels.
2596 The input images' last dimension must be size 1.
2598 >>> original = tf.constant([[[1.0], [2.0], [3.0]]])
2599 >>> converted = tf.image.grayscale_to_rgb(original)
2600 >>> print(converted.numpy())
2601 [[[1. 1. 1.]
2602 [2. 2. 2.]
2603 [3. 3. 3.]]]
2605 Args:
2606 images: The Grayscale tensor to convert. The last dimension must be size 1.
2607 name: A name for the operation (optional).
2609 Returns:
2610 The converted grayscale image(s).
2611 """
2612 with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name:
2613 images = _AssertGrayscaleImage(images)
2615 images = ops.convert_to_tensor(images, name='images')
2616 rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
2617 shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] +
2618 [array_ops.expand_dims(3, 0)])
2619 multiples = array_ops.concat(shape_list, 0)
2620 rgb = array_ops.tile(images, multiples, name=name)
2621 rgb.set_shape(images.get_shape()[:-1].concatenate([3]))
2622 return rgb
2625# pylint: disable=invalid-name
2626@tf_export('image.random_hue')
2627@dispatch.add_dispatch_support
2628def random_hue(image, max_delta, seed=None):
2629 """Adjust the hue of RGB images by a random factor.
2631 Equivalent to `adjust_hue()` but uses a `delta` randomly
2632 picked in the interval `[-max_delta, max_delta)`.
2634 `max_delta` must be in the interval `[0, 0.5]`.
2636 Usage Example:
2638 >>> x = [[[1.0, 2.0, 3.0],
2639 ... [4.0, 5.0, 6.0]],
2640 ... [[7.0, 8.0, 9.0],
2641 ... [10.0, 11.0, 12.0]]]
2642 >>> tf.image.random_hue(x, 0.2)
2643 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2645 For producing deterministic results given a `seed` value, use
2646 `tf.image.stateless_random_hue`. Unlike using the `seed` param with
2647 `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the same
2648 results given the same seed independent of how many times the function is
2649 called, and independent of global seed settings (e.g. tf.random.set_seed).
2651 Args:
2652 image: RGB image or images. The size of the last dimension must be 3.
2653 max_delta: float. The maximum value for the random delta.
2654 seed: An operation-specific seed. It will be used in conjunction with the
2655 graph-level seed to determine the real seeds that will be used in this
2656 operation. Please see the documentation of set_random_seed for its
2657 interaction with the graph-level random seed.
2659 Returns:
2660 Adjusted image(s), same shape and DType as `image`.
2662 Raises:
2663 ValueError: if `max_delta` is invalid.
2664 """
2665 if max_delta > 0.5:
2666 raise ValueError('max_delta must be <= 0.5.')
2668 if max_delta < 0:
2669 raise ValueError('max_delta must be non-negative.')
2671 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
2672 return adjust_hue(image, delta)
2675@tf_export('image.stateless_random_hue', v1=[])
2676@dispatch.add_dispatch_support
2677def stateless_random_hue(image, max_delta, seed):
2678 """Adjust the hue of RGB images by a random factor deterministically.
2680 Equivalent to `adjust_hue()` but uses a `delta` randomly picked in the
2681 interval `[-max_delta, max_delta)`.
2683 Guarantees the same results given the same `seed` independent of how many
2684 times the function is called, and independent of global seed settings (e.g.
2685 `tf.random.set_seed`).
2687 `max_delta` must be in the interval `[0, 0.5]`.
2689 Usage Example:
2691 >>> x = [[[1.0, 2.0, 3.0],
2692 ... [4.0, 5.0, 6.0]],
2693 ... [[7.0, 8.0, 9.0],
2694 ... [10.0, 11.0, 12.0]]]
2695 >>> seed = (1, 2)
2696 >>> tf.image.stateless_random_hue(x, 0.2, seed)
2697 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2698 array([[[ 1.6514902, 1. , 3. ],
2699 [ 4.65149 , 4. , 6. ]],
2700 [[ 7.65149 , 7. , 9. ],
2701 [10.65149 , 10. , 12. ]]], dtype=float32)>
2703 Args:
2704 image: RGB image or images. The size of the last dimension must be 3.
2705 max_delta: float. The maximum value for the random delta.
2706 seed: A shape [2] Tensor, the seed to the random number generator. Must have
2707 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2709 Returns:
2710 Adjusted image(s), same shape and DType as `image`.
2712 Raises:
2713 ValueError: if `max_delta` is invalid.
2714 """
2715 if max_delta > 0.5:
2716 raise ValueError('max_delta must be <= 0.5.')
2718 if max_delta < 0:
2719 raise ValueError('max_delta must be non-negative.')
2721 delta = stateless_random_ops.stateless_random_uniform(
2722 shape=[], minval=-max_delta, maxval=max_delta, seed=seed)
2723 return adjust_hue(image, delta)
2726@tf_export('image.adjust_hue')
2727@dispatch.add_dispatch_support
2728def adjust_hue(image, delta, name=None):
2729 """Adjust hue of RGB images.
2731 This is a convenience method that converts an RGB image to float
2732 representation, converts it to HSV, adds an offset to the
2733 hue channel, converts back to RGB and then back to the original
2734 data type. If several adjustments are chained it is advisable to minimize
2735 the number of redundant conversions.
2737 `image` is an RGB image. The image hue is adjusted by converting the
2738 image(s) to HSV and rotating the hue channel (H) by
2739 `delta`. The image is then converted back to RGB.
2741 `delta` must be in the interval `[-1, 1]`.
2743 Usage Example:
2745 >>> x = [[[1.0, 2.0, 3.0],
2746 ... [4.0, 5.0, 6.0]],
2747 ... [[7.0, 8.0, 9.0],
2748 ... [10.0, 11.0, 12.0]]]
2749 >>> tf.image.adjust_hue(x, 0.2)
2750 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2751 array([[[ 2.3999996, 1. , 3. ],
2752 [ 5.3999996, 4. , 6. ]],
2753 [[ 8.4 , 7. , 9. ],
2754 [11.4 , 10. , 12. ]]], dtype=float32)>
2756 Args:
2757 image: RGB image or images. The size of the last dimension must be 3.
2758 delta: float. How much to add to the hue channel.
2759 name: A name for this operation (optional).
2761 Returns:
2762 Adjusted image(s), same shape and DType as `image`.
2764 Raises:
2765 InvalidArgumentError: image must have at least 3 dimensions.
2766 InvalidArgumentError: The size of the last dimension must be 3.
2767 ValueError: if `delta` is not in the interval of `[-1, 1]`.
2769 Usage Example:
2771 >>> image = [[[1, 2, 3], [4, 5, 6]],
2772 ... [[7, 8, 9], [10, 11, 12]],
2773 ... [[13, 14, 15], [16, 17, 18]]]
2774 >>> image = tf.constant(image)
2775 >>> tf.image.adjust_hue(image, 0.2)
2776 <tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy=
2777 array([[[ 2, 1, 3],
2778 [ 5, 4, 6]],
2779 [[ 8, 7, 9],
2780 [11, 10, 12]],
2781 [[14, 13, 15],
2782 [17, 16, 18]]], dtype=int32)>
2783 """
2784 with ops.name_scope(name, 'adjust_hue', [image]) as name:
2785 if context.executing_eagerly():
2786 if delta < -1 or delta > 1:
2787 raise ValueError('delta must be in the interval [-1, 1]')
2788 image = ops.convert_to_tensor(image, name='image')
2789 # Remember original dtype to so we can convert back if needed
2790 orig_dtype = image.dtype
2791 if orig_dtype in (dtypes.float16, dtypes.float32):
2792 flt_image = image
2793 else:
2794 flt_image = convert_image_dtype(image, dtypes.float32)
2796 rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
2798 return convert_image_dtype(rgb_altered, orig_dtype)
2801# pylint: disable=invalid-name
2802@tf_export('image.random_jpeg_quality')
2803@dispatch.add_dispatch_support
2804def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None):
2805 """Randomly changes jpeg encoding quality for inducing jpeg noise.
2807 `min_jpeg_quality` must be in the interval `[0, 100]` and less than
2808 `max_jpeg_quality`.
2809 `max_jpeg_quality` must be in the interval `[0, 100]`.
2811 Usage Example:
2813 >>> x = tf.constant([[[1, 2, 3],
2814 ... [4, 5, 6]],
2815 ... [[7, 8, 9],
2816 ... [10, 11, 12]]], dtype=tf.uint8)
2817 >>> tf.image.random_jpeg_quality(x, 75, 95)
2818 <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy=...>
2820 For producing deterministic results given a `seed` value, use
2821 `tf.image.stateless_random_jpeg_quality`. Unlike using the `seed` param
2822 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2823 same results given the same seed independent of how many times the function is
2824 called, and independent of global seed settings (e.g. tf.random.set_seed).
2826 Args:
2827 image: 3D image. Size of the last dimension must be 1 or 3.
2828 min_jpeg_quality: Minimum jpeg encoding quality to use.
2829 max_jpeg_quality: Maximum jpeg encoding quality to use.
2830 seed: An operation-specific seed. It will be used in conjunction with the
2831 graph-level seed to determine the real seeds that will be used in this
2832 operation. Please see the documentation of set_random_seed for its
2833 interaction with the graph-level random seed.
2835 Returns:
2836 Adjusted image(s), same shape and DType as `image`.
2838 Raises:
2839 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.
2840 """
2841 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or
2842 max_jpeg_quality > 100):
2843 raise ValueError('jpeg encoding range must be between 0 and 100.')
2845 if min_jpeg_quality >= max_jpeg_quality:
2846 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')
2848 jpeg_quality = random_ops.random_uniform([],
2849 min_jpeg_quality,
2850 max_jpeg_quality,
2851 seed=seed,
2852 dtype=dtypes.int32)
2853 return adjust_jpeg_quality(image, jpeg_quality)
2856@tf_export('image.stateless_random_jpeg_quality', v1=[])
2857@dispatch.add_dispatch_support
2858def stateless_random_jpeg_quality(image,
2859 min_jpeg_quality,
2860 max_jpeg_quality,
2861 seed):
2862 """Deterministically radomize jpeg encoding quality for inducing jpeg noise.
2864 Guarantees the same results given the same `seed` independent of how many
2865 times the function is called, and independent of global seed settings (e.g.
2866 `tf.random.set_seed`).
2868 `min_jpeg_quality` must be in the interval `[0, 100]` and less than
2869 `max_jpeg_quality`.
2870 `max_jpeg_quality` must be in the interval `[0, 100]`.
2872 Usage Example:
2874 >>> x = tf.constant([[[1, 2, 3],
2875 ... [4, 5, 6]],
2876 ... [[7, 8, 9],
2877 ... [10, 11, 12]]], dtype=tf.uint8)
2878 >>> seed = (1, 2)
2879 >>> tf.image.stateless_random_jpeg_quality(x, 75, 95, seed)
2880 <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy=
2881 array([[[ 0, 4, 5],
2882 [ 1, 5, 6]],
2883 [[ 5, 9, 10],
2884 [ 5, 9, 10]]], dtype=uint8)>
2886 Args:
2887 image: 3D image. Size of the last dimension must be 1 or 3.
2888 min_jpeg_quality: Minimum jpeg encoding quality to use.
2889 max_jpeg_quality: Maximum jpeg encoding quality to use.
2890 seed: A shape [2] Tensor, the seed to the random number generator. Must have
2891 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2893 Returns:
2894 Adjusted image(s), same shape and DType as `image`.
2896 Raises:
2897 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.
2898 """
2899 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or
2900 max_jpeg_quality > 100):
2901 raise ValueError('jpeg encoding range must be between 0 and 100.')
2903 if min_jpeg_quality >= max_jpeg_quality:
2904 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')
2906 jpeg_quality = stateless_random_ops.stateless_random_uniform(
2907 shape=[], minval=min_jpeg_quality, maxval=max_jpeg_quality, seed=seed,
2908 dtype=dtypes.int32)
2909 return adjust_jpeg_quality(image, jpeg_quality)
2912@tf_export('image.adjust_jpeg_quality')
2913@dispatch.add_dispatch_support
2914def adjust_jpeg_quality(image, jpeg_quality, name=None):
2915 """Adjust jpeg encoding quality of an image.
2917 This is a convenience method that converts an image to uint8 representation,
2918 encodes it to jpeg with `jpeg_quality`, decodes it, and then converts back
2919 to the original data type.
2921 `jpeg_quality` must be in the interval `[0, 100]`.
2923 Usage Examples:
2925 >>> x = [[[0.01, 0.02, 0.03],
2926 ... [0.04, 0.05, 0.06]],
2927 ... [[0.07, 0.08, 0.09],
2928 ... [0.10, 0.11, 0.12]]]
2929 >>> x_jpeg = tf.image.adjust_jpeg_quality(x, 75)
2930 >>> x_jpeg.numpy()
2931 array([[[0.00392157, 0.01960784, 0.03137255],
2932 [0.02745098, 0.04313726, 0.05490196]],
2933 [[0.05882353, 0.07450981, 0.08627451],
2934 [0.08235294, 0.09803922, 0.10980393]]], dtype=float32)
2936 Note that floating point values are expected to have values in the range
2937 [0,1) and values outside this range are clipped.
2939 >>> x = [[[1.0, 2.0, 3.0],
2940 ... [4.0, 5.0, 6.0]],
2941 ... [[7.0, 8.0, 9.0],
2942 ... [10.0, 11.0, 12.0]]]
2943 >>> tf.image.adjust_jpeg_quality(x, 75)
2944 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2945 array([[[1., 1., 1.],
2946 [1., 1., 1.]],
2947 [[1., 1., 1.],
2948 [1., 1., 1.]]], dtype=float32)>
2950 Note that `jpeg_quality` 100 is still lossy compresson.
2952 >>> x = tf.constant([[[1, 2, 3],
2953 ... [4, 5, 6]],
2954 ... [[7, 8, 9],
2955 ... [10, 11, 12]]], dtype=tf.uint8)
2956 >>> tf.image.adjust_jpeg_quality(x, 100)
2957 <tf.Tensor: shape(2, 2, 3), dtype=uint8, numpy=
2958 array([[[ 0, 1, 3],
2959 [ 3, 4, 6]],
2960 [[ 6, 7, 9],
2961 [ 9, 10, 12]]], dtype=uint8)>
2963 Args:
2964 image: 3D image. The size of the last dimension must be None, 1 or 3.
2965 jpeg_quality: Python int or Tensor of type int32. jpeg encoding quality.
2966 name: A name for this operation (optional).
2968 Returns:
2969 Adjusted image, same shape and DType as `image`.
2971 Raises:
2972 InvalidArgumentError: quality must be in [0,100]
2973 InvalidArgumentError: image must have 1 or 3 channels
2974 """
2975 with ops.name_scope(name, 'adjust_jpeg_quality', [image]):
2976 image = ops.convert_to_tensor(image, name='image')
2977 channels = image.shape.as_list()[-1]
2978 # Remember original dtype to so we can convert back if needed
2979 orig_dtype = image.dtype
2980 image = convert_image_dtype(image, dtypes.uint8, saturate=True)
2981 if not _is_tensor(jpeg_quality):
2982 # If jpeg_quality is a int (not tensor).
2983 jpeg_quality = ops.convert_to_tensor(jpeg_quality, dtype=dtypes.int32)
2984 image = gen_image_ops.encode_jpeg_variable_quality(image, jpeg_quality)
2986 image = gen_image_ops.decode_jpeg(image, channels=channels)
2987 return convert_image_dtype(image, orig_dtype, saturate=True)
2990@tf_export('image.random_saturation')
2991@dispatch.add_dispatch_support
2992def random_saturation(image, lower, upper, seed=None):
2993 """Adjust the saturation of RGB images by a random factor.
2995 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly
2996 picked in the interval `[lower, upper)`.
2998 Usage Example:
3000 >>> x = [[[1.0, 2.0, 3.0],
3001 ... [4.0, 5.0, 6.0]],
3002 ... [[7.0, 8.0, 9.0],
3003 ... [10.0, 11.0, 12.0]]]
3004 >>> tf.image.random_saturation(x, 5, 10)
3005 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
3006 array([[[ 0. , 1.5, 3. ],
3007 [ 0. , 3. , 6. ]],
3008 [[ 0. , 4.5, 9. ],
3009 [ 0. , 6. , 12. ]]], dtype=float32)>
3011 For producing deterministic results given a `seed` value, use
3012 `tf.image.stateless_random_saturation`. Unlike using the `seed` param
3013 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
3014 same results given the same seed independent of how many times the function is
3015 called, and independent of global seed settings (e.g. tf.random.set_seed).
3017 Args:
3018 image: RGB image or images. The size of the last dimension must be 3.
3019 lower: float. Lower bound for the random saturation factor.
3020 upper: float. Upper bound for the random saturation factor.
3021 seed: An operation-specific seed. It will be used in conjunction with the
3022 graph-level seed to determine the real seeds that will be used in this
3023 operation. Please see the documentation of set_random_seed for its
3024 interaction with the graph-level random seed.
3026 Returns:
3027 Adjusted image(s), same shape and DType as `image`.
3029 Raises:
3030 ValueError: if `upper <= lower` or if `lower < 0`.
3031 """
3032 if upper <= lower:
3033 raise ValueError('upper must be > lower.')
3035 if lower < 0:
3036 raise ValueError('lower must be non-negative.')
3038 saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed)
3039 return adjust_saturation(image, saturation_factor)
3042@tf_export('image.stateless_random_saturation', v1=[])
3043@dispatch.add_dispatch_support
3044def stateless_random_saturation(image, lower, upper, seed=None):
3045 """Adjust the saturation of RGB images by a random factor deterministically.
3047 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly
3048 picked in the interval `[lower, upper)`.
3050 Guarantees the same results given the same `seed` independent of how many
3051 times the function is called, and independent of global seed settings (e.g.
3052 `tf.random.set_seed`).
3054 Usage Example:
3056 >>> x = [[[1.0, 2.0, 3.0],
3057 ... [4.0, 5.0, 6.0]],
3058 ... [[7.0, 8.0, 9.0],
3059 ... [10.0, 11.0, 12.0]]]
3060 >>> seed = (1, 2)
3061 >>> tf.image.stateless_random_saturation(x, 0.5, 1.0, seed)
3062 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
3063 array([[[ 1.1559395, 2.0779698, 3. ],
3064 [ 4.1559396, 5.07797 , 6. ]],
3065 [[ 7.1559396, 8.07797 , 9. ],
3066 [10.155939 , 11.07797 , 12. ]]], dtype=float32)>
3068 Args:
3069 image: RGB image or images. The size of the last dimension must be 3.
3070 lower: float. Lower bound for the random saturation factor.
3071 upper: float. Upper bound for the random saturation factor.
3072 seed: A shape [2] Tensor, the seed to the random number generator. Must have
3073 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
3075 Returns:
3076 Adjusted image(s), same shape and DType as `image`.
3078 Raises:
3079 ValueError: if `upper <= lower` or if `lower < 0`.
3080 """
3081 if upper <= lower:
3082 raise ValueError('upper must be > lower.')
3084 if lower < 0:
3085 raise ValueError('lower must be non-negative.')
3087 saturation_factor = stateless_random_ops.stateless_random_uniform(
3088 shape=[], minval=lower, maxval=upper, seed=seed)
3089 return adjust_saturation(image, saturation_factor)
3092@tf_export('image.adjust_saturation')
3093@dispatch.add_dispatch_support
3094def adjust_saturation(image, saturation_factor, name=None):
3095 """Adjust saturation of RGB images.
3097 This is a convenience method that converts RGB images to float
3098 representation, converts them to HSV, adds an offset to the
3099 saturation channel, converts back to RGB and then back to the original
3100 data type. If several adjustments are chained it is advisable to minimize
3101 the number of redundant conversions.
3103 `image` is an RGB image or images. The image saturation is adjusted by
3104 converting the images to HSV and multiplying the saturation (S) channel by
3105 `saturation_factor` and clipping. The images are then converted back to RGB.
3107 `saturation_factor` must be in the interval `[0, inf)`.
3109 Usage Example:
3111 >>> x = [[[1.0, 2.0, 3.0],
3112 ... [4.0, 5.0, 6.0]],
3113 ... [[7.0, 8.0, 9.0],
3114 ... [10.0, 11.0, 12.0]]]
3115 >>> tf.image.adjust_saturation(x, 0.5)
3116 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
3117 array([[[ 2. , 2.5, 3. ],
3118 [ 5. , 5.5, 6. ]],
3119 [[ 8. , 8.5, 9. ],
3120 [11. , 11.5, 12. ]]], dtype=float32)>
3122 Args:
3123 image: RGB image or images. The size of the last dimension must be 3.
3124 saturation_factor: float. Factor to multiply the saturation by.
3125 name: A name for this operation (optional).
3127 Returns:
3128 Adjusted image(s), same shape and DType as `image`.
3130 Raises:
3131 InvalidArgumentError: input must have 3 channels
3132 """
3133 with ops.name_scope(name, 'adjust_saturation', [image]) as name:
3134 image = ops.convert_to_tensor(image, name='image')
3135 # Remember original dtype to so we can convert back if needed
3136 orig_dtype = image.dtype
3137 if orig_dtype in (dtypes.float16, dtypes.float32):
3138 flt_image = image
3139 else:
3140 flt_image = convert_image_dtype(image, dtypes.float32)
3142 adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor)
3144 return convert_image_dtype(adjusted, orig_dtype)
3147@tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg'])
3148def is_jpeg(contents, name=None):
3149 r"""Convenience function to check if the 'contents' encodes a JPEG image.
3151 Args:
3152 contents: 0-D `string`. The encoded image bytes.
3153 name: A name for the operation (optional)
3155 Returns:
3156 A scalar boolean tensor indicating if 'contents' may be a JPEG image.
3157 is_jpeg is susceptible to false positives.
3158 """
3159 # Normal JPEGs start with \xff\xd8\xff\xe0
3160 # JPEG with EXIF starts with \xff\xd8\xff\xe1
3161 # Use \xff\xd8\xff to cover both.
3162 with ops.name_scope(name, 'is_jpeg'):
3163 substr = string_ops.substr(contents, 0, 3)
3164 return math_ops.equal(substr, b'\xff\xd8\xff', name=name)
3167def _is_png(contents, name=None):
3168 r"""Convenience function to check if the 'contents' encodes a PNG image.
3170 Args:
3171 contents: 0-D `string`. The encoded image bytes.
3172 name: A name for the operation (optional)
3174 Returns:
3175 A scalar boolean tensor indicating if 'contents' may be a PNG image.
3176 is_png is susceptible to false positives.
3177 """
3178 with ops.name_scope(name, 'is_png'):
3179 substr = string_ops.substr(contents, 0, 3)
3180 return math_ops.equal(substr, b'\211PN', name=name)
3183tf_export(
3184 'io.decode_and_crop_jpeg',
3185 'image.decode_and_crop_jpeg',
3186 v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])(
3187 dispatch.add_dispatch_support(gen_image_ops.decode_and_crop_jpeg))
3189tf_export(
3190 'io.decode_bmp',
3191 'image.decode_bmp',
3192 v1=['io.decode_bmp', 'image.decode_bmp'])(
3193 dispatch.add_dispatch_support(gen_image_ops.decode_bmp))
3194tf_export(
3195 'io.decode_gif',
3196 'image.decode_gif',
3197 v1=['io.decode_gif', 'image.decode_gif'])(
3198 dispatch.add_dispatch_support(gen_image_ops.decode_gif))
3199tf_export(
3200 'io.decode_jpeg',
3201 'image.decode_jpeg',
3202 v1=['io.decode_jpeg', 'image.decode_jpeg'])(
3203 dispatch.add_dispatch_support(gen_image_ops.decode_jpeg))
3204tf_export(
3205 'io.decode_png',
3206 'image.decode_png',
3207 v1=['io.decode_png', 'image.decode_png'])(
3208 dispatch.add_dispatch_support(gen_image_ops.decode_png))
3210tf_export(
3211 'io.encode_jpeg',
3212 'image.encode_jpeg',
3213 v1=['io.encode_jpeg', 'image.encode_jpeg'])(
3214 dispatch.add_dispatch_support(gen_image_ops.encode_jpeg))
3215tf_export(
3216 'io.extract_jpeg_shape',
3217 'image.extract_jpeg_shape',
3218 v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])(
3219 dispatch.add_dispatch_support(gen_image_ops.extract_jpeg_shape))
3222@tf_export('io.encode_png', 'image.encode_png')
3223@dispatch.add_dispatch_support
3224def encode_png(image, compression=-1, name=None):
3225 r"""PNG-encode an image.
3227 `image` is a rank-N Tensor of type uint8 or uint16 with shape `batch_dims +
3228 [height, width, channels]`, where `channels` is:
3230 * 1: for grayscale.
3231 * 2: for grayscale + alpha.
3232 * 3: for RGB.
3233 * 4: for RGBA.
3235 The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
3236 default or a value from 0 to 9. 9 is the highest compression level,
3237 generating the smallest output, but is slower.
3239 Args:
3240 image: A `Tensor`. Must be one of the following types: `uint8`, `uint16`.
3241 Rank N >= 3 with shape `batch_dims + [height, width, channels]`.
3242 compression: An optional `int`. Defaults to `-1`. Compression level.
3243 name: A name for the operation (optional).
3245 Returns:
3246 A `Tensor` of type `string`.
3247 """
3248 return gen_image_ops.encode_png(
3249 ops.convert_to_tensor(image), compression, name)
3252@tf_export(
3253 'io.decode_image',
3254 'image.decode_image',
3255 v1=['io.decode_image', 'image.decode_image'])
3256@dispatch.add_dispatch_support
3257def decode_image(contents,
3258 channels=None,
3259 dtype=dtypes.uint8,
3260 name=None,
3261 expand_animations=True):
3262 """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`.
3264 Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the
3265 appropriate operation to convert the input bytes `string` into a `Tensor`
3266 of type `dtype`.
3268 Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as
3269 opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D
3270 arrays `[height, width, num_channels]`. Make sure to take this into account
3271 when constructing your graph if you are intermixing GIF files with BMP, JPEG,
3272 and/or PNG files. Alternately, set the `expand_animations` argument of this
3273 function to `False`, in which case the op will return 3-dimensional tensors
3274 and will truncate animated GIF files to the first frame.
3276 NOTE: If the first frame of an animated GIF does not occupy the entire
3277 canvas (maximum frame width x maximum frame height), then it fills the
3278 unoccupied areas (in the first frame) with zeros (black). For frames after the
3279 first frame that does not occupy the entire canvas, it uses the previous
3280 frame to fill the unoccupied areas.
3282 Args:
3283 contents: A `Tensor` of type `string`. 0-D. The encoded image bytes.
3284 channels: An optional `int`. Defaults to `0`. Number of color channels for
3285 the decoded image.
3286 dtype: The desired DType of the returned `Tensor`.
3287 name: A name for the operation (optional)
3288 expand_animations: An optional `bool`. Defaults to `True`. Controls the
3289 shape of the returned op's output. If `True`, the returned op will produce
3290 a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all GIFs,
3291 whether animated or not. If, `False`, the returned op will produce a 3-D
3292 tensor for all file types and will truncate animated GIFs to the first
3293 frame.
3295 Returns:
3296 `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on
3297 the file type and the value of the `expand_animations` parameter.
3299 Raises:
3300 ValueError: On incorrect number of channels.
3301 """
3302 with ops.name_scope(name, 'decode_image'):
3303 channels = 0 if channels is None else channels
3304 if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]:
3305 dest_dtype = dtype
3306 dtype = dtypes.uint16
3307 return convert_image_dtype(
3308 gen_image_ops.decode_image(
3309 contents=contents,
3310 channels=channels,
3311 expand_animations=expand_animations,
3312 dtype=dtype), dest_dtype)
3313 else:
3314 return gen_image_ops.decode_image(
3315 contents=contents,
3316 channels=channels,
3317 expand_animations=expand_animations,
3318 dtype=dtype)
3321@tf_export('image.total_variation')
3322@dispatch.add_dispatch_support
3323def total_variation(images, name=None):
3324 """Calculate and return the total variation for one or more images.
3326 The total variation is the sum of the absolute differences for neighboring
3327 pixel-values in the input images. This measures how much noise is in the
3328 images.
3330 This can be used as a loss-function during optimization so as to suppress
3331 noise in images. If you have a batch of images, then you should calculate
3332 the scalar loss-value as the sum:
3333 `loss = tf.reduce_sum(tf.image.total_variation(images))`
3335 This implements the anisotropic 2-D version of the formula described here:
3337 https://en.wikipedia.org/wiki/Total_variation_denoising
3339 Args:
3340 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
3341 of shape `[height, width, channels]`.
3342 name: A name for the operation (optional).
3344 Raises:
3345 ValueError: if images.shape is not a 3-D or 4-D vector.
3347 Returns:
3348 The total variation of `images`.
3350 If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the
3351 total variation for each image in the batch.
3352 If `images` was 3-D, return a scalar float with the total variation for
3353 that image.
3354 """
3356 with ops.name_scope(name, 'total_variation'):
3357 ndims = images.get_shape().ndims
3359 if ndims == 3:
3360 # The input is a single image with shape [height, width, channels].
3362 # Calculate the difference of neighboring pixel-values.
3363 # The images are shifted one pixel along the height and width by slicing.
3364 pixel_dif1 = images[1:, :, :] - images[:-1, :, :]
3365 pixel_dif2 = images[:, 1:, :] - images[:, :-1, :]
3367 # Sum for all axis. (None is an alias for all axis.)
3368 sum_axis = None
3369 elif ndims == 4:
3370 # The input is a batch of images with shape:
3371 # [batch, height, width, channels].
3373 # Calculate the difference of neighboring pixel-values.
3374 # The images are shifted one pixel along the height and width by slicing.
3375 pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :]
3376 pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :]
3378 # Only sum for the last 3 axis.
3379 # This results in a 1-D tensor with the total variation for each image.
3380 sum_axis = [1, 2, 3]
3381 else:
3382 raise ValueError('\'images\' must be either 3 or 4-dimensional.')
3384 # Calculate the total variation by taking the absolute value of the
3385 # pixel-differences and summing over the appropriate axis.
3386 tot_var = (
3387 math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) +
3388 math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis))
3390 return tot_var
3393@tf_export('image.sample_distorted_bounding_box', v1=[])
3394@dispatch.add_dispatch_support
3395def sample_distorted_bounding_box_v2(image_size,
3396 bounding_boxes,
3397 seed=0,
3398 min_object_covered=0.1,
3399 aspect_ratio_range=None,
3400 area_range=None,
3401 max_attempts=None,
3402 use_image_if_no_bounding_boxes=None,
3403 name=None):
3404 """Generate a single randomly distorted bounding box for an image.
3406 Bounding box annotations are often supplied in addition to ground-truth labels
3407 in image recognition or object localization tasks. A common technique for
3408 training such a system is to randomly distort an image while preserving
3409 its content, i.e. *data augmentation*. This Op outputs a randomly distorted
3410 localization of an object, i.e. bounding box, given an `image_size`,
3411 `bounding_boxes` and a series of constraints.
3413 The output of this Op is a single bounding box that may be used to crop the
3414 original image. The output is returned as 3 tensors: `begin`, `size` and
3415 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3416 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3417 visualize what the bounding box looks like.
3419 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3420 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
3421 and the height of the underlying image.
3423 For example,
3425 ```python
3426 # Generate a single distorted bounding box.
3427 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
3428 tf.shape(image),
3429 bounding_boxes=bounding_boxes,
3430 min_object_covered=0.1)
3432 # Draw the bounding box in an image summary.
3433 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
3434 bbox_for_draw)
3435 tf.compat.v1.summary.image('images_with_box', image_with_box)
3437 # Employ the bounding box to distort the image.
3438 distorted_image = tf.slice(image, begin, size)
3439 ```
3441 Note that if no bounding box information is available, setting
3442 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
3443 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3444 false and no bounding boxes are supplied, an error is raised.
3446 For producing deterministic results given a `seed` value, use
3447 `tf.image.stateless_sample_distorted_bounding_box`. Unlike using the `seed`
3448 param with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops
3449 guarantee the same results given the same seed independent of how many times
3450 the function is called, and independent of global seed settings
3451 (e.g. tf.random.set_seed).
3453 Args:
3454 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3455 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3456 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3457 describing the N bounding boxes associated with the image.
3458 seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the
3459 random number generator is seeded by the given `seed`. Otherwise, it is
3460 seeded by a random seed.
3461 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3462 cropped area of the image must contain at least this fraction of any
3463 bounding box supplied. The value of this parameter should be non-negative.
3464 In the case of 0, the cropped area does not need to overlap any of the
3465 bounding boxes supplied.
3466 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3467 1.33]`. The cropped area of the image must have an aspect `ratio = width /
3468 height` within this range.
3469 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3470 cropped area of the image must contain a fraction of the supplied image
3471 within this range.
3472 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3473 generating a cropped region of the image of the specified constraints.
3474 After `max_attempts` failures, return the entire image.
3475 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3476 Controls behavior if no bounding boxes supplied. If true, assume an
3477 implicit bounding box covering the whole input. If false, raise an error.
3478 name: A name for the operation (optional).
3480 Returns:
3481 A tuple of `Tensor` objects (begin, size, bboxes).
3483 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3484 `[offset_height, offset_width, 0]`. Provide as input to
3485 `tf.slice`.
3486 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3487 `[target_height, target_width, -1]`. Provide as input to
3488 `tf.slice`.
3489 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3490 the distorted bounding box.
3491 Provide as input to `tf.image.draw_bounding_boxes`.
3493 Raises:
3494 ValueError: If no seed is specified and op determinism is enabled.
3495 """
3496 if seed:
3497 seed1, seed2 = random_seed.get_seed(seed)
3498 else:
3499 if config.is_op_determinism_enabled():
3500 raise ValueError(
3501 f'tf.image.sample_distorted_bounding_box requires a non-zero seed to '
3502 f'be passed in when determinism is enabled, but got seed={seed}. '
3503 f'Please pass in a non-zero seed, e.g. by passing "seed=1".')
3504 seed1, seed2 = (0, 0)
3505 with ops.name_scope(name, 'sample_distorted_bounding_box'):
3506 return gen_image_ops.sample_distorted_bounding_box_v2(
3507 image_size,
3508 bounding_boxes,
3509 seed=seed1,
3510 seed2=seed2,
3511 min_object_covered=min_object_covered,
3512 aspect_ratio_range=aspect_ratio_range,
3513 area_range=area_range,
3514 max_attempts=max_attempts,
3515 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3516 name=name)
3519@tf_export('image.stateless_sample_distorted_bounding_box', v1=[])
3520@dispatch.add_dispatch_support
3521def stateless_sample_distorted_bounding_box(image_size,
3522 bounding_boxes,
3523 seed,
3524 min_object_covered=0.1,
3525 aspect_ratio_range=None,
3526 area_range=None,
3527 max_attempts=None,
3528 use_image_if_no_bounding_boxes=None,
3529 name=None):
3530 """Generate a randomly distorted bounding box for an image deterministically.
3532 Bounding box annotations are often supplied in addition to ground-truth labels
3533 in image recognition or object localization tasks. A common technique for
3534 training such a system is to randomly distort an image while preserving
3535 its content, i.e. *data augmentation*. This Op, given the same `seed`,
3536 deterministically outputs a randomly distorted localization of an object, i.e.
3537 bounding box, given an `image_size`, `bounding_boxes` and a series of
3538 constraints.
3540 The output of this Op is a single bounding box that may be used to crop the
3541 original image. The output is returned as 3 tensors: `begin`, `size` and
3542 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3543 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3544 visualize what the bounding box looks like.
3546 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3547 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
3548 and the height of the underlying image.
3550 The output of this Op is guaranteed to be the same given the same `seed` and
3551 is independent of how many times the function is called, and independent of
3552 global seed settings (e.g. `tf.random.set_seed`).
3554 Example usage:
3556 >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]])
3557 >>> bbox = tf.constant(
3558 ... [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
3559 >>> seed = (1, 2)
3560 >>> # Generate a single distorted bounding box.
3561 >>> bbox_begin, bbox_size, bbox_draw = (
3562 ... tf.image.stateless_sample_distorted_bounding_box(
3563 ... tf.shape(image), bounding_boxes=bbox, seed=seed))
3564 >>> # Employ the bounding box to distort the image.
3565 >>> tf.slice(image, bbox_begin, bbox_size)
3566 <tf.Tensor: shape=(2, 2, 1), dtype=int64, numpy=
3567 array([[[1],
3568 [2]],
3569 [[4],
3570 [5]]])>
3571 >>> # Draw the bounding box in an image summary.
3572 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
3573 >>> tf.image.draw_bounding_boxes(
3574 ... tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors)
3575 <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=
3576 array([[[[1.],
3577 [1.],
3578 [3.]],
3579 [[1.],
3580 [1.],
3581 [6.]],
3582 [[7.],
3583 [8.],
3584 [9.]]]], dtype=float32)>
3586 Note that if no bounding box information is available, setting
3587 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
3588 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3589 false and no bounding boxes are supplied, an error is raised.
3591 Args:
3592 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3593 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3594 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3595 describing the N bounding boxes associated with the image.
3596 seed: A shape [2] Tensor, the seed to the random number generator. Must have
3597 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
3598 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3599 cropped area of the image must contain at least this fraction of any
3600 bounding box supplied. The value of this parameter should be non-negative.
3601 In the case of 0, the cropped area does not need to overlap any of the
3602 bounding boxes supplied.
3603 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3604 1.33]`. The cropped area of the image must have an aspect `ratio = width /
3605 height` within this range.
3606 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3607 cropped area of the image must contain a fraction of the supplied image
3608 within this range.
3609 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3610 generating a cropped region of the image of the specified constraints.
3611 After `max_attempts` failures, return the entire image.
3612 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3613 Controls behavior if no bounding boxes supplied. If true, assume an
3614 implicit bounding box covering the whole input. If false, raise an error.
3615 name: A name for the operation (optional).
3617 Returns:
3618 A tuple of `Tensor` objects (begin, size, bboxes).
3620 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3621 `[offset_height, offset_width, 0]`. Provide as input to
3622 `tf.slice`.
3623 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3624 `[target_height, target_width, -1]`. Provide as input to
3625 `tf.slice`.
3626 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3627 the distorted bounding box.
3628 Provide as input to `tf.image.draw_bounding_boxes`.
3629 """
3630 with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'):
3631 return gen_image_ops.stateless_sample_distorted_bounding_box(
3632 image_size=image_size,
3633 bounding_boxes=bounding_boxes,
3634 seed=seed,
3635 min_object_covered=min_object_covered,
3636 aspect_ratio_range=aspect_ratio_range,
3637 area_range=area_range,
3638 max_attempts=max_attempts,
3639 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3640 name=name)
3643@tf_export(v1=['image.sample_distorted_bounding_box'])
3644@dispatch.add_dispatch_support
3645@deprecation.deprecated(
3646 date=None,
3647 instructions='`seed2` arg is deprecated.'
3648 'Use sample_distorted_bounding_box_v2 instead.')
3649def sample_distorted_bounding_box(image_size,
3650 bounding_boxes,
3651 seed=None,
3652 seed2=None,
3653 min_object_covered=0.1,
3654 aspect_ratio_range=None,
3655 area_range=None,
3656 max_attempts=None,
3657 use_image_if_no_bounding_boxes=None,
3658 name=None):
3659 """Generate a single randomly distorted bounding box for an image.
3661 Bounding box annotations are often supplied in addition to ground-truth labels
3662 in image recognition or object localization tasks. A common technique for
3663 training such a system is to randomly distort an image while preserving
3664 its content, i.e. *data augmentation*. This Op outputs a randomly distorted
3665 localization of an object, i.e. bounding box, given an `image_size`,
3666 `bounding_boxes` and a series of constraints.
3668 The output of this Op is a single bounding box that may be used to crop the
3669 original image. The output is returned as 3 tensors: `begin`, `size` and
3670 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3671 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3672 visualize what the bounding box looks like.
3674 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3675 The
3676 bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
3677 height of the underlying image.
3679 For example,
3681 ```python
3682 # Generate a single distorted bounding box.
3683 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
3684 tf.shape(image),
3685 bounding_boxes=bounding_boxes,
3686 min_object_covered=0.1)
3688 # Draw the bounding box in an image summary.
3689 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
3690 bbox_for_draw)
3691 tf.compat.v1.summary.image('images_with_box', image_with_box)
3693 # Employ the bounding box to distort the image.
3694 distorted_image = tf.slice(image, begin, size)
3695 ```
3697 Note that if no bounding box information is available, setting
3698 `use_image_if_no_bounding_boxes = True` will assume there is a single implicit
3699 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3700 false and no bounding boxes are supplied, an error is raised.
3702 Args:
3703 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3704 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3705 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3706 describing the N bounding boxes associated with the image.
3707 seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are
3708 set to non-zero, the random number generator is seeded by the given
3709 `seed`. Otherwise, it is seeded by a random seed.
3710 seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed
3711 collision.
3712 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3713 cropped area of the image must contain at least this fraction of any
3714 bounding box supplied. The value of this parameter should be non-negative.
3715 In the case of 0, the cropped area does not need to overlap any of the
3716 bounding boxes supplied.
3717 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3718 1.33]`. The cropped area of the image must have an aspect ratio = width /
3719 height within this range.
3720 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3721 cropped area of the image must contain a fraction of the supplied image
3722 within this range.
3723 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3724 generating a cropped region of the image of the specified constraints.
3725 After `max_attempts` failures, return the entire image.
3726 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3727 Controls behavior if no bounding boxes supplied. If true, assume an
3728 implicit bounding box covering the whole input. If false, raise an error.
3729 name: A name for the operation (optional).
3731 Returns:
3732 A tuple of `Tensor` objects (begin, size, bboxes).
3734 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3735 `[offset_height, offset_width, 0]`. Provide as input to
3736 `tf.slice`.
3737 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3738 `[target_height, target_width, -1]`. Provide as input to
3739 `tf.slice`.
3740 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3741 the distorted bounding box.
3742 Provide as input to `tf.image.draw_bounding_boxes`.
3744 Raises:
3745 ValueError: If no seed is specified and op determinism is enabled.
3746 """
3747 if not seed and not seed2 and config.is_op_determinism_enabled():
3748 raise ValueError(
3749 f'tf.compat.v1.image.sample_distorted_bounding_box requires "seed" or '
3750 f'"seed2" to be non-zero when determinism is enabled. Please pass in '
3751 f'a non-zero seed, e.g. by passing "seed=1". Got seed={seed} and '
3752 f"seed2={seed2}")
3753 with ops.name_scope(name, 'sample_distorted_bounding_box'):
3754 return gen_image_ops.sample_distorted_bounding_box_v2(
3755 image_size,
3756 bounding_boxes,
3757 seed=seed,
3758 seed2=seed2,
3759 min_object_covered=min_object_covered,
3760 aspect_ratio_range=aspect_ratio_range,
3761 area_range=area_range,
3762 max_attempts=max_attempts,
3763 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3764 name=name)
3767@tf_export('image.non_max_suppression')
3768@dispatch.add_dispatch_support
3769def non_max_suppression(boxes,
3770 scores,
3771 max_output_size,
3772 iou_threshold=0.5,
3773 score_threshold=float('-inf'),
3774 name=None):
3775 """Greedily selects a subset of bounding boxes in descending order of score.
3777 Prunes away boxes that have high intersection-over-union (IOU) overlap
3778 with previously selected boxes. Bounding boxes are supplied as
3779 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
3780 diagonal pair of box corners and the coordinates can be provided as normalized
3781 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm
3782 is agnostic to where the origin is in the coordinate system. Note that this
3783 algorithm is invariant to orthogonal transformations and translations
3784 of the coordinate system; thus translating or reflections of the coordinate
3785 system result in the same boxes being selected by the algorithm.
3786 The output of this operation is a set of integers indexing into the input
3787 collection of bounding boxes representing the selected boxes. The bounding
3788 box coordinates corresponding to the selected indices can then be obtained
3789 using the `tf.gather` operation. For example:
3790 ```python
3791 selected_indices = tf.image.non_max_suppression(
3792 boxes, scores, max_output_size, iou_threshold)
3793 selected_boxes = tf.gather(boxes, selected_indices)
3794 ```
3796 Args:
3797 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
3798 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3799 score corresponding to each box (each row of boxes).
3800 max_output_size: A scalar integer `Tensor` representing the maximum number
3801 of boxes to be selected by non-max suppression.
3802 iou_threshold: A 0-D float tensor representing the threshold for deciding
3803 whether boxes overlap too much with respect to IOU.
3804 score_threshold: A 0-D float tensor representing the threshold for deciding
3805 when to remove boxes based on score.
3806 name: A name for the operation (optional).
3808 Returns:
3809 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3810 selected indices from the boxes tensor, where `M <= max_output_size`.
3811 """
3812 with ops.name_scope(name, 'non_max_suppression'):
3813 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
3814 score_threshold = ops.convert_to_tensor(
3815 score_threshold, name='score_threshold')
3816 return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size,
3817 iou_threshold, score_threshold)
3820@tf_export('image.non_max_suppression_with_scores')
3821@dispatch.add_dispatch_support
3822def non_max_suppression_with_scores(boxes,
3823 scores,
3824 max_output_size,
3825 iou_threshold=0.5,
3826 score_threshold=float('-inf'),
3827 soft_nms_sigma=0.0,
3828 name=None):
3829 """Greedily selects a subset of bounding boxes in descending order of score.
3831 Prunes away boxes that have high intersection-over-union (IOU) overlap
3832 with previously selected boxes. Bounding boxes are supplied as
3833 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
3834 diagonal pair of box corners and the coordinates can be provided as normalized
3835 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm
3836 is agnostic to where the origin is in the coordinate system. Note that this
3837 algorithm is invariant to orthogonal transformations and translations
3838 of the coordinate system; thus translating or reflections of the coordinate
3839 system result in the same boxes being selected by the algorithm.
3840 The output of this operation is a set of integers indexing into the input
3841 collection of bounding boxes representing the selected boxes. The bounding
3842 box coordinates corresponding to the selected indices can then be obtained
3843 using the `tf.gather` operation. For example:
3844 ```python
3845 selected_indices, selected_scores = tf.image.non_max_suppression_padded(
3846 boxes, scores, max_output_size, iou_threshold=1.0, score_threshold=0.1,
3847 soft_nms_sigma=0.5)
3848 selected_boxes = tf.gather(boxes, selected_indices)
3849 ```
3851 This function generalizes the `tf.image.non_max_suppression` op by also
3852 supporting a Soft-NMS (with Gaussian weighting) mode (c.f.
3853 Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score
3854 of other overlapping boxes instead of directly causing them to be pruned.
3855 Consequently, in contrast to `tf.image.non_max_suppression`,
3856 `tf.image.non_max_suppression_with_scores` returns the new scores of each
3857 input box in the second output, `selected_scores`.
3859 To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be
3860 larger than 0. When `soft_nms_sigma` equals 0, the behavior of
3861 `tf.image.non_max_suppression_with_scores` is identical to that of
3862 `tf.image.non_max_suppression` (except for the extra output) both in function
3863 and in running time.
3865 Note that when `soft_nms_sigma` > 0, Soft-NMS is performed and `iou_threshold`
3866 is ignored. `iou_threshold` is only used for standard NMS.
3868 Args:
3869 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
3870 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3871 score corresponding to each box (each row of boxes).
3872 max_output_size: A scalar integer `Tensor` representing the maximum number
3873 of boxes to be selected by non-max suppression.
3874 iou_threshold: A 0-D float tensor representing the threshold for deciding
3875 whether boxes overlap too much with respect to IOU.
3876 score_threshold: A 0-D float tensor representing the threshold for deciding
3877 when to remove boxes based on score.
3878 soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft
3879 NMS; see Bodla et al (c.f. https://arxiv.org/abs/1704.04503). When
3880 `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
3881 NMS.
3882 name: A name for the operation (optional).
3884 Returns:
3885 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3886 selected indices from the boxes tensor, where `M <= max_output_size`.
3887 selected_scores: A 1-D float tensor of shape `[M]` representing the
3888 corresponding scores for each selected box, where `M <= max_output_size`.
3889 Scores only differ from corresponding input scores when using Soft NMS
3890 (i.e. when `soft_nms_sigma>0`)
3891 """
3892 with ops.name_scope(name, 'non_max_suppression_with_scores'):
3893 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
3894 score_threshold = ops.convert_to_tensor(
3895 score_threshold, name='score_threshold')
3896 soft_nms_sigma = ops.convert_to_tensor(
3897 soft_nms_sigma, name='soft_nms_sigma')
3898 (selected_indices, selected_scores,
3899 _) = gen_image_ops.non_max_suppression_v5(
3900 boxes,
3901 scores,
3902 max_output_size,
3903 iou_threshold,
3904 score_threshold,
3905 soft_nms_sigma,
3906 pad_to_max_output_size=False)
3907 return selected_indices, selected_scores
3910@tf_export('image.non_max_suppression_overlaps')
3911@dispatch.add_dispatch_support
3912def non_max_suppression_with_overlaps(overlaps,
3913 scores,
3914 max_output_size,
3915 overlap_threshold=0.5,
3916 score_threshold=float('-inf'),
3917 name=None):
3918 """Greedily selects a subset of bounding boxes in descending order of score.
3920 Prunes away boxes that have high overlap with previously selected boxes.
3921 N-by-n overlap values are supplied as square matrix.
3922 The output of this operation is a set of integers indexing into the input
3923 collection of bounding boxes representing the selected boxes. The bounding
3924 box coordinates corresponding to the selected indices can then be obtained
3925 using the `tf.gather` operation. For example:
3926 ```python
3927 selected_indices = tf.image.non_max_suppression_overlaps(
3928 overlaps, scores, max_output_size, iou_threshold)
3929 selected_boxes = tf.gather(boxes, selected_indices)
3930 ```
3932 Args:
3933 overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]`
3934 representing the n-by-n box overlap values.
3935 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3936 score corresponding to each box (each row of boxes).
3937 max_output_size: A scalar integer `Tensor` representing the maximum number
3938 of boxes to be selected by non-max suppression.
3939 overlap_threshold: A 0-D float tensor representing the threshold for
3940 deciding whether boxes overlap too much with respect to the provided
3941 overlap values.
3942 score_threshold: A 0-D float tensor representing the threshold for deciding
3943 when to remove boxes based on score.
3944 name: A name for the operation (optional).
3946 Returns:
3947 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3948 selected indices from the overlaps tensor, where `M <= max_output_size`.
3949 """
3950 with ops.name_scope(name, 'non_max_suppression_overlaps'):
3951 overlap_threshold = ops.convert_to_tensor(
3952 overlap_threshold, name='overlap_threshold')
3953 # pylint: disable=protected-access
3954 return gen_image_ops.non_max_suppression_with_overlaps(
3955 overlaps, scores, max_output_size, overlap_threshold, score_threshold)
3956 # pylint: enable=protected-access
3959_rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115],
3960 [0.587, -0.27455667, -0.52273617],
3961 [0.114, -0.32134392, 0.31119955]]
3964@tf_export('image.rgb_to_yiq')
3965@dispatch.add_dispatch_support
3966def rgb_to_yiq(images):
3967 """Converts one or more images from RGB to YIQ.
3969 Outputs a tensor of the same shape as the `images` tensor, containing the YIQ
3970 value of the pixels.
3971 The output is only well defined if the value in images are in [0,1].
3973 Usage Example:
3975 >>> x = tf.constant([[[1.0, 2.0, 3.0]]])
3976 >>> tf.image.rgb_to_yiq(x)
3977 <tf.Tensor: shape=(1, 1, 3), dtype=float32,
3978 numpy=array([[[ 1.815 , -0.91724455, 0.09962624]]], dtype=float32)>
3980 Args:
3981 images: 2-D or higher rank. Image data to convert. Last dimension must be
3982 size 3.
3984 Returns:
3985 images: tensor with the same shape as `images`.
3986 """
3987 images = ops.convert_to_tensor(images, name='images')
3988 kernel = ops.convert_to_tensor(
3989 _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel')
3990 ndims = images.get_shape().ndims
3991 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
3994_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021],
3995 [0.6208248, -0.64720424, 1.70423049]]
3998@tf_export('image.yiq_to_rgb')
3999@dispatch.add_dispatch_support
4000def yiq_to_rgb(images):
4001 """Converts one or more images from YIQ to RGB.
4003 Outputs a tensor of the same shape as the `images` tensor, containing the RGB
4004 value of the pixels.
4005 The output is only well defined if the Y value in images are in [0,1],
4006 I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226].
4008 Args:
4009 images: 2-D or higher rank. Image data to convert. Last dimension must be
4010 size 3.
4012 Returns:
4013 images: tensor with the same shape as `images`.
4014 """
4015 images = ops.convert_to_tensor(images, name='images')
4016 kernel = ops.convert_to_tensor(
4017 _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel')
4018 ndims = images.get_shape().ndims
4019 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
4022_rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538],
4023 [0.587, -0.28886916, -0.51496512],
4024 [0.114, 0.43601035, -0.10001026]]
4027@tf_export('image.rgb_to_yuv')
4028@dispatch.add_dispatch_support
4029def rgb_to_yuv(images):
4030 """Converts one or more images from RGB to YUV.
4032 Outputs a tensor of the same shape as the `images` tensor, containing the YUV
4033 value of the pixels.
4034 The output is only well defined if the value in images are in [0, 1].
4035 There are two ways of representing an image: [0, 255] pixel values range or
4036 [0, 1] (as float) pixel values range. Users need to convert the input image
4037 into a float [0, 1] range.
4039 Args:
4040 images: 2-D or higher rank. Image data to convert. Last dimension must be
4041 size 3.
4043 Returns:
4044 images: tensor with the same shape as `images`.
4045 """
4046 images = ops.convert_to_tensor(images, name='images')
4047 kernel = ops.convert_to_tensor(
4048 _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel')
4049 ndims = images.get_shape().ndims
4050 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
4053_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185],
4054 [1.13988303, -0.58062185, 0]]
4057@tf_export('image.yuv_to_rgb')
4058@dispatch.add_dispatch_support
4059def yuv_to_rgb(images):
4060 """Converts one or more images from YUV to RGB.
4062 Outputs a tensor of the same shape as the `images` tensor, containing the RGB
4063 value of the pixels.
4064 The output is only well defined if the Y value in images are in [0,1],
4065 U and V value are in [-0.5,0.5].
4067 As per the above description, you need to scale your YUV images if their
4068 pixel values are not in the required range. Below given example illustrates
4069 preprocessing of each channel of images before feeding them to `yuv_to_rgb`.
4071 ```python
4072 yuv_images = tf.random.uniform(shape=[100, 64, 64, 3], maxval=255)
4073 last_dimension_axis = len(yuv_images.shape) - 1
4074 yuv_tensor_images = tf.truediv(
4075 tf.subtract(
4076 yuv_images,
4077 tf.reduce_min(yuv_images)
4078 ),
4079 tf.subtract(
4080 tf.reduce_max(yuv_images),
4081 tf.reduce_min(yuv_images)
4082 )
4083 )
4084 y, u, v = tf.split(yuv_tensor_images, 3, axis=last_dimension_axis)
4085 target_uv_min, target_uv_max = -0.5, 0.5
4086 u = u * (target_uv_max - target_uv_min) + target_uv_min
4087 v = v * (target_uv_max - target_uv_min) + target_uv_min
4088 preprocessed_yuv_images = tf.concat([y, u, v], axis=last_dimension_axis)
4089 rgb_tensor_images = tf.image.yuv_to_rgb(preprocessed_yuv_images)
4090 ```
4092 Args:
4093 images: 2-D or higher rank. Image data to convert. Last dimension must be
4094 size 3.
4096 Returns:
4097 images: tensor with the same shape as `images`.
4098 """
4099 images = ops.convert_to_tensor(images, name='images')
4100 kernel = ops.convert_to_tensor(
4101 _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel')
4102 ndims = images.get_shape().ndims
4103 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
4106def _verify_compatible_image_shapes(img1, img2):
4107 """Checks if two image tensors are compatible for applying SSIM or PSNR.
4109 This function checks if two sets of images have ranks at least 3, and if the
4110 last three dimensions match.
4112 Args:
4113 img1: Tensor containing the first image batch.
4114 img2: Tensor containing the second image batch.
4116 Returns:
4117 A tuple containing: the first tensor shape, the second tensor shape, and a
4118 list of control_flow_ops.Assert() ops implementing the checks.
4120 Raises:
4121 ValueError: When static shape check fails.
4122 """
4123 shape1 = img1.get_shape().with_rank_at_least(3)
4124 shape2 = img2.get_shape().with_rank_at_least(3)
4125 shape1[-3:].assert_is_compatible_with(shape2[-3:])
4127 if shape1.ndims is not None and shape2.ndims is not None:
4128 for dim1, dim2 in zip(
4129 reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])):
4130 if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)):
4131 raise ValueError('Two images are not compatible: %s and %s' %
4132 (shape1, shape2))
4134 # Now assign shape tensors.
4135 shape1, shape2 = array_ops.shape_n([img1, img2])
4137 # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable.
4138 checks = []
4139 checks.append(
4140 control_flow_assert.Assert(
4141 math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2],
4142 summarize=10))
4143 checks.append(
4144 control_flow_assert.Assert(
4145 math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])),
4146 [shape1, shape2],
4147 summarize=10))
4148 return shape1, shape2, checks
4151@tf_export('image.psnr')
4152@dispatch.add_dispatch_support
4153def psnr(a, b, max_val, name=None):
4154 """Returns the Peak Signal-to-Noise Ratio between a and b.
4156 This is intended to be used on signals (or images). Produces a PSNR value for
4157 each image in batch.
4159 The last three dimensions of input are expected to be [height, width, depth].
4161 Example:
4163 ```python
4164 # Read images from file.
4165 im1 = tf.decode_png('path/to/im1.png')
4166 im2 = tf.decode_png('path/to/im2.png')
4167 # Compute PSNR over tf.uint8 Tensors.
4168 psnr1 = tf.image.psnr(im1, im2, max_val=255)
4170 # Compute PSNR over tf.float32 Tensors.
4171 im1 = tf.image.convert_image_dtype(im1, tf.float32)
4172 im2 = tf.image.convert_image_dtype(im2, tf.float32)
4173 psnr2 = tf.image.psnr(im1, im2, max_val=1.0)
4174 # psnr1 and psnr2 both have type tf.float32 and are almost equal.
4175 ```
4177 Args:
4178 a: First set of images.
4179 b: Second set of images.
4180 max_val: The dynamic range of the images (i.e., the difference between the
4181 maximum the and minimum allowed values).
4182 name: Namespace to embed the computation in.
4184 Returns:
4185 The scalar PSNR between a and b. The returned tensor has type `tf.float32`
4186 and shape [batch_size, 1].
4187 """
4188 with ops.name_scope(name, 'PSNR', [a, b]):
4189 # Need to convert the images to float32. Scale max_val accordingly so that
4190 # PSNR is computed correctly.
4191 max_val = math_ops.cast(max_val, a.dtype)
4192 max_val = convert_image_dtype(max_val, dtypes.float32)
4193 a = convert_image_dtype(a, dtypes.float32)
4194 b = convert_image_dtype(b, dtypes.float32)
4195 mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1])
4196 psnr_val = math_ops.subtract(
4197 20 * math_ops.log(max_val) / math_ops.log(10.0),
4198 np.float32(10 / np.log(10)) * math_ops.log(mse),
4199 name='psnr')
4201 _, _, checks = _verify_compatible_image_shapes(a, b)
4202 with ops.control_dependencies(checks):
4203 return array_ops.identity(psnr_val)
4206def _ssim_helper(x, y, reducer, max_val, compensation=1.0, k1=0.01, k2=0.03):
4207 r"""Helper function for computing SSIM.
4209 SSIM estimates covariances with weighted sums. The default parameters
4210 use a biased estimate of the covariance:
4211 Suppose `reducer` is a weighted sum, then the mean estimators are
4212 \mu_x = \sum_i w_i x_i,
4213 \mu_y = \sum_i w_i y_i,
4214 where w_i's are the weighted-sum weights, and covariance estimator is
4215 cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
4216 with assumption \sum_i w_i = 1. This covariance estimator is biased, since
4217 E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y).
4218 For SSIM measure with unbiased covariance estimators, pass as `compensation`
4219 argument (1 - \sum_i w_i ^ 2).
4221 Args:
4222 x: First set of images.
4223 y: Second set of images.
4224 reducer: Function that computes 'local' averages from the set of images. For
4225 non-convolutional version, this is usually tf.reduce_mean(x, [1, 2]), and
4226 for convolutional version, this is usually tf.nn.avg_pool2d or
4227 tf.nn.conv2d with weighted-sum kernel.
4228 max_val: The dynamic range (i.e., the difference between the maximum
4229 possible allowed value and the minimum allowed value).
4230 compensation: Compensation factor. See above.
4231 k1: Default value 0.01
4232 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4233 it would be better if we took the values in the range of 0 < K2 < 0.4).
4235 Returns:
4236 A pair containing the luminance measure, and the contrast-structure measure.
4237 """
4239 c1 = (k1 * max_val)**2
4240 c2 = (k2 * max_val)**2
4242 # SSIM luminance measure is
4243 # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1).
4244 mean0 = reducer(x)
4245 mean1 = reducer(y)
4246 num0 = mean0 * mean1 * 2.0
4247 den0 = math_ops.square(mean0) + math_ops.square(mean1)
4248 luminance = (num0 + c1) / (den0 + c1)
4250 # SSIM contrast-structure measure is
4251 # (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2).
4252 # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then
4253 # cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
4254 # = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j).
4255 num1 = reducer(x * y) * 2.0
4256 den1 = reducer(math_ops.square(x) + math_ops.square(y))
4257 c2 *= compensation
4258 cs = (num1 - num0 + c2) / (den1 - den0 + c2)
4260 # SSIM score is the product of the luminance and contrast-structure measures.
4261 return luminance, cs
4264def _fspecial_gauss(size, sigma):
4265 """Function to mimic the 'fspecial' gaussian MATLAB function."""
4266 size = ops.convert_to_tensor(size, dtypes.int32)
4267 sigma = ops.convert_to_tensor(sigma)
4269 coords = math_ops.cast(math_ops.range(size), sigma.dtype)
4270 coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0
4272 g = math_ops.square(coords)
4273 g *= -0.5 / math_ops.square(sigma)
4275 g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1])
4276 g = array_ops.reshape(g, shape=[1, -1]) # For tf.nn.softmax().
4277 g = nn_ops.softmax(g)
4278 return array_ops.reshape(g, shape=[size, size, 1, 1])
4281def _ssim_per_channel(img1,
4282 img2,
4283 max_val=1.0,
4284 filter_size=11,
4285 filter_sigma=1.5,
4286 k1=0.01,
4287 k2=0.03,
4288 return_index_map=False):
4289 """Computes SSIM index between img1 and img2 per color channel.
4291 This function matches the standard SSIM implementation from:
4292 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image
4293 quality assessment: from error visibility to structural similarity. IEEE
4294 transactions on image processing.
4296 Details:
4297 - 11x11 Gaussian filter of width 1.5 is used.
4298 - k1 = 0.01, k2 = 0.03 as in the original paper.
4300 Args:
4301 img1: First image batch.
4302 img2: Second image batch.
4303 max_val: The dynamic range of the images (i.e., the difference between the
4304 maximum the and minimum allowed values).
4305 filter_size: Default value 11 (size of gaussian filter).
4306 filter_sigma: Default value 1.5 (width of gaussian filter).
4307 k1: Default value 0.01
4308 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4309 it would be better if we took the values in the range of 0 < K2 < 0.4).
4310 return_index_map: If True returns local SSIM map instead of the global mean.
4312 Returns:
4313 A pair of tensors containing and channel-wise SSIM and contrast-structure
4314 values. The shape is [..., channels].
4315 """
4316 filter_size = constant_op.constant(filter_size, dtype=dtypes.int32)
4317 filter_sigma = constant_op.constant(filter_sigma, dtype=img1.dtype)
4319 shape1, shape2 = array_ops.shape_n([img1, img2])
4320 checks = [
4321 control_flow_assert.Assert(
4322 math_ops.reduce_all(
4323 math_ops.greater_equal(shape1[-3:-1], filter_size)),
4324 [shape1, filter_size],
4325 summarize=8),
4326 control_flow_assert.Assert(
4327 math_ops.reduce_all(
4328 math_ops.greater_equal(shape2[-3:-1], filter_size)),
4329 [shape2, filter_size],
4330 summarize=8)
4331 ]
4333 # Enforce the check to run before computation.
4334 with ops.control_dependencies(checks):
4335 img1 = array_ops.identity(img1)
4337 # TODO(sjhwang): Try to cache kernels and compensation factor.
4338 kernel = _fspecial_gauss(filter_size, filter_sigma)
4339 kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1])
4341 # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`,
4342 # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead.
4343 compensation = 1.0
4345 # TODO(sjhwang): Try FFT.
4346 # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying
4347 # 1-by-n and n-by-1 Gaussian filters instead of an n-by-n filter.
4348 def reducer(x):
4349 shape = array_ops.shape(x)
4350 x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0))
4351 y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')
4352 return array_ops.reshape(
4353 y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0))
4355 luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation, k1,
4356 k2)
4358 # Average over the second and the third from the last: height, width.
4359 if return_index_map:
4360 ssim_val = luminance * cs
4361 else:
4362 axes = constant_op.constant([-3, -2], dtype=dtypes.int32)
4363 ssim_val = math_ops.reduce_mean(luminance * cs, axes)
4364 cs = math_ops.reduce_mean(cs, axes)
4365 return ssim_val, cs
4368@tf_export('image.ssim')
4369@dispatch.add_dispatch_support
4370def ssim(img1,
4371 img2,
4372 max_val,
4373 filter_size=11,
4374 filter_sigma=1.5,
4375 k1=0.01,
4376 k2=0.03,
4377 return_index_map=False):
4378 """Computes SSIM index between img1 and img2.
4380 This function is based on the standard SSIM implementation from:
4381 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image
4382 quality assessment: from error visibility to structural similarity. IEEE
4383 transactions on image processing.
4385 Note: The true SSIM is only defined on grayscale. This function does not
4386 perform any colorspace transform. (If the input is already YUV, then it will
4387 compute YUV SSIM average.)
4389 Details:
4390 - 11x11 Gaussian filter of width 1.5 is used.
4391 - k1 = 0.01, k2 = 0.03 as in the original paper.
4393 The image sizes must be at least 11x11 because of the filter size.
4395 Example:
4397 ```python
4398 # Read images (of size 255 x 255) from file.
4399 im1 = tf.image.decode_image(tf.io.read_file('path/to/im1.png'))
4400 im2 = tf.image.decode_image(tf.io.read_file('path/to/im2.png'))
4401 tf.shape(im1) # `img1.png` has 3 channels; shape is `(255, 255, 3)`
4402 tf.shape(im2) # `img2.png` has 3 channels; shape is `(255, 255, 3)`
4403 # Add an outer batch for each image.
4404 im1 = tf.expand_dims(im1, axis=0)
4405 im2 = tf.expand_dims(im2, axis=0)
4406 # Compute SSIM over tf.uint8 Tensors.
4407 ssim1 = tf.image.ssim(im1, im2, max_val=255, filter_size=11,
4408 filter_sigma=1.5, k1=0.01, k2=0.03)
4410 # Compute SSIM over tf.float32 Tensors.
4411 im1 = tf.image.convert_image_dtype(im1, tf.float32)
4412 im2 = tf.image.convert_image_dtype(im2, tf.float32)
4413 ssim2 = tf.image.ssim(im1, im2, max_val=1.0, filter_size=11,
4414 filter_sigma=1.5, k1=0.01, k2=0.03)
4415 # ssim1 and ssim2 both have type tf.float32 and are almost equal.
4416 ```
4418 Args:
4419 img1: First image batch. 4-D Tensor of shape `[batch, height, width,
4420 channels]` with only Positive Pixel Values.
4421 img2: Second image batch. 4-D Tensor of shape `[batch, height, width,
4422 channels]` with only Positive Pixel Values.
4423 max_val: The dynamic range of the images (i.e., the difference between the
4424 maximum the and minimum allowed values).
4425 filter_size: Default value 11 (size of gaussian filter).
4426 filter_sigma: Default value 1.5 (width of gaussian filter).
4427 k1: Default value 0.01
4428 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4429 it would be better if we took the values in the range of 0 < K2 < 0.4).
4430 return_index_map: If True returns local SSIM map instead of the global mean.
4432 Returns:
4433 A tensor containing an SSIM value for each image in batch or a tensor
4434 containing an SSIM value for each pixel for each image in batch if
4435 return_index_map is True. Returned SSIM values are in range (-1, 1], when
4436 pixel values are non-negative. Returns a tensor with shape:
4437 broadcast(img1.shape[:-3], img2.shape[:-3]) or broadcast(img1.shape[:-1],
4438 img2.shape[:-1]).
4439 """
4440 with ops.name_scope(None, 'SSIM', [img1, img2]):
4441 # Convert to tensor if needed.
4442 img1 = ops.convert_to_tensor(img1, name='img1')
4443 img2 = ops.convert_to_tensor(img2, name='img2')
4444 # Shape checking.
4445 _, _, checks = _verify_compatible_image_shapes(img1, img2)
4446 with ops.control_dependencies(checks):
4447 img1 = array_ops.identity(img1)
4449 # Need to convert the images to float32. Scale max_val accordingly so that
4450 # SSIM is computed correctly.
4451 max_val = math_ops.cast(max_val, img1.dtype)
4452 max_val = convert_image_dtype(max_val, dtypes.float32)
4453 img1 = convert_image_dtype(img1, dtypes.float32)
4454 img2 = convert_image_dtype(img2, dtypes.float32)
4455 ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val, filter_size,
4456 filter_sigma, k1, k2,
4457 return_index_map)
4458 # Compute average over color channels.
4459 return math_ops.reduce_mean(ssim_per_channel, [-1])
4462# Default values obtained by Wang et al.
4463_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
4466@tf_export('image.ssim_multiscale')
4467@dispatch.add_dispatch_support
4468def ssim_multiscale(img1,
4469 img2,
4470 max_val,
4471 power_factors=_MSSSIM_WEIGHTS,
4472 filter_size=11,
4473 filter_sigma=1.5,
4474 k1=0.01,
4475 k2=0.03):
4476 """Computes the MS-SSIM between img1 and img2.
4478 This function assumes that `img1` and `img2` are image batches, i.e. the last
4479 three dimensions are [height, width, channels].
4481 Note: The true SSIM is only defined on grayscale. This function does not
4482 perform any colorspace transform. (If the input is already YUV, then it will
4483 compute YUV SSIM average.)
4485 Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale
4486 structural similarity for image quality assessment." Signals, Systems and
4487 Computers, 2004.
4489 Args:
4490 img1: First image batch with only Positive Pixel Values.
4491 img2: Second image batch with only Positive Pixel Values. Must have the
4492 same rank as img1.
4493 max_val: The dynamic range of the images (i.e., the difference between the
4494 maximum the and minimum allowed values).
4495 power_factors: Iterable of weights for each of the scales. The number of
4496 scales used is the length of the list. Index 0 is the unscaled
4497 resolution's weight and each increasing scale corresponds to the image
4498 being downsampled by 2. Defaults to (0.0448, 0.2856, 0.3001, 0.2363,
4499 0.1333), which are the values obtained in the original paper.
4500 filter_size: Default value 11 (size of gaussian filter).
4501 filter_sigma: Default value 1.5 (width of gaussian filter).
4502 k1: Default value 0.01
4503 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4504 it would be better if we took the values in the range of 0 < K2 < 0.4).
4506 Returns:
4507 A tensor containing an MS-SSIM value for each image in batch. The values
4508 are in range [0, 1]. Returns a tensor with shape:
4509 broadcast(img1.shape[:-3], img2.shape[:-3]).
4510 """
4511 with ops.name_scope(None, 'MS-SSIM', [img1, img2]):
4512 # Convert to tensor if needed.
4513 img1 = ops.convert_to_tensor(img1, name='img1')
4514 img2 = ops.convert_to_tensor(img2, name='img2')
4515 # Shape checking.
4516 shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2)
4517 with ops.control_dependencies(checks):
4518 img1 = array_ops.identity(img1)
4520 # Need to convert the images to float32. Scale max_val accordingly so that
4521 # SSIM is computed correctly.
4522 max_val = math_ops.cast(max_val, img1.dtype)
4523 max_val = convert_image_dtype(max_val, dtypes.float32)
4524 img1 = convert_image_dtype(img1, dtypes.float32)
4525 img2 = convert_image_dtype(img2, dtypes.float32)
4527 imgs = [img1, img2]
4528 shapes = [shape1, shape2]
4530 # img1 and img2 are assumed to be a (multi-dimensional) batch of
4531 # 3-dimensional images (height, width, channels). `heads` contain the batch
4532 # dimensions, and `tails` contain the image dimensions.
4533 heads = [s[:-3] for s in shapes]
4534 tails = [s[-3:] for s in shapes]
4536 divisor = [1, 2, 2, 1]
4537 divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32)
4539 def do_pad(images, remainder):
4540 padding = array_ops.expand_dims(remainder, -1)
4541 padding = array_ops.pad(padding, [[1, 0], [1, 0]])
4542 return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images]
4544 mcs = []
4545 for k in range(len(power_factors)):
4546 with ops.name_scope(None, 'Scale%d' % k, imgs):
4547 if k > 0:
4548 # Avg pool takes rank 4 tensors. Flatten leading dimensions.
4549 flat_imgs = [
4550 array_ops.reshape(x, array_ops.concat([[-1], t], 0))
4551 for x, t in zip(imgs, tails)
4552 ]
4554 remainder = tails[0] % divisor_tensor
4555 need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0))
4556 # pylint: disable=cell-var-from-loop
4557 padded = tf_cond.cond(need_padding,
4558 lambda: do_pad(flat_imgs, remainder),
4559 lambda: flat_imgs)
4560 # pylint: enable=cell-var-from-loop
4562 downscaled = [
4563 nn_ops.avg_pool(
4564 x, ksize=divisor, strides=divisor, padding='VALID')
4565 for x in padded
4566 ]
4567 tails = [x[1:] for x in array_ops.shape_n(downscaled)]
4568 imgs = [
4569 array_ops.reshape(x, array_ops.concat([h, t], 0))
4570 for x, h, t in zip(downscaled, heads, tails)
4571 ]
4573 # Overwrite previous ssim value since we only need the last one.
4574 ssim_per_channel, cs = _ssim_per_channel(
4575 *imgs,
4576 max_val=max_val,
4577 filter_size=filter_size,
4578 filter_sigma=filter_sigma,
4579 k1=k1,
4580 k2=k2)
4581 mcs.append(nn_ops.relu(cs))
4583 # Remove the cs score for the last scale. In the MS-SSIM calculation,
4584 # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p).
4585 mcs.pop() # Remove the cs score for the last scale.
4586 mcs_and_ssim = array_ops_stack.stack(
4587 mcs + [nn_ops.relu(ssim_per_channel)], axis=-1)
4588 # Take weighted geometric mean across the scale axis.
4589 ms_ssim = math_ops.reduce_prod(
4590 math_ops.pow(mcs_and_ssim, power_factors), [-1])
4592 return math_ops.reduce_mean(ms_ssim, [-1]) # Avg over color channels.
4595@tf_export('image.image_gradients')
4596@dispatch.add_dispatch_support
4597def image_gradients(image):
4598 """Returns image gradients (dy, dx) for each color channel.
4600 Both output tensors have the same shape as the input: [batch_size, h, w,
4601 d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in
4602 location (x, y). That means that dy will always have zeros in the last row,
4603 and dx will always have zeros in the last column.
4605 Usage Example:
4606 ```python
4607 BATCH_SIZE = 1
4608 IMAGE_HEIGHT = 5
4609 IMAGE_WIDTH = 5
4610 CHANNELS = 1
4611 image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS,
4612 delta=1, dtype=tf.float32),
4613 shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
4614 dy, dx = tf.image.image_gradients(image)
4615 print(image[0, :,:,0])
4616 tf.Tensor(
4617 [[ 0. 1. 2. 3. 4.]
4618 [ 5. 6. 7. 8. 9.]
4619 [10. 11. 12. 13. 14.]
4620 [15. 16. 17. 18. 19.]
4621 [20. 21. 22. 23. 24.]], shape=(5, 5), dtype=float32)
4622 print(dy[0, :,:,0])
4623 tf.Tensor(
4624 [[5. 5. 5. 5. 5.]
4625 [5. 5. 5. 5. 5.]
4626 [5. 5. 5. 5. 5.]
4627 [5. 5. 5. 5. 5.]
4628 [0. 0. 0. 0. 0.]], shape=(5, 5), dtype=float32)
4629 print(dx[0, :,:,0])
4630 tf.Tensor(
4631 [[1. 1. 1. 1. 0.]
4632 [1. 1. 1. 1. 0.]
4633 [1. 1. 1. 1. 0.]
4634 [1. 1. 1. 1. 0.]
4635 [1. 1. 1. 1. 0.]], shape=(5, 5), dtype=float32)
4636 ```
4638 Args:
4639 image: Tensor with shape [batch_size, h, w, d].
4641 Returns:
4642 Pair of tensors (dy, dx) holding the vertical and horizontal image
4643 gradients (1-step finite difference).
4645 Raises:
4646 ValueError: If `image` is not a 4D tensor.
4647 """
4648 if image.get_shape().ndims != 4:
4649 raise ValueError('image_gradients expects a 4D tensor '
4650 '[batch_size, h, w, d], not {}.'.format(image.get_shape()))
4651 image_shape = array_ops.shape(image)
4652 batch_size, height, width, depth = array_ops_stack.unstack(image_shape)
4653 dy = image[:, 1:, :, :] - image[:, :-1, :, :]
4654 dx = image[:, :, 1:, :] - image[:, :, :-1, :]
4656 # Return tensors with same size as original image by concatenating
4657 # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y).
4658 shape = array_ops_stack.stack([batch_size, 1, width, depth])
4659 dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1)
4660 dy = array_ops.reshape(dy, image_shape)
4662 shape = array_ops_stack.stack([batch_size, height, 1, depth])
4663 dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2)
4664 dx = array_ops.reshape(dx, image_shape)
4666 return dy, dx
4669@tf_export('image.sobel_edges')
4670@dispatch.add_dispatch_support
4671def sobel_edges(image):
4672 """Returns a tensor holding Sobel edge maps.
4674 Example usage:
4676 For general usage, `image` would be loaded from a file as below:
4678 ```python
4679 image_bytes = tf.io.read_file(path_to_image_file)
4680 image = tf.image.decode_image(image_bytes)
4681 image = tf.cast(image, tf.float32)
4682 image = tf.expand_dims(image, 0)
4683 ```
4684 But for demo purposes, we are using randomly generated values for `image`:
4686 >>> image = tf.random.uniform(
4687 ... maxval=255, shape=[1, 28, 28, 3], dtype=tf.float32)
4688 >>> sobel = tf.image.sobel_edges(image)
4689 >>> sobel_y = np.asarray(sobel[0, :, :, :, 0]) # sobel in y-direction
4690 >>> sobel_x = np.asarray(sobel[0, :, :, :, 1]) # sobel in x-direction
4692 For displaying the sobel results, PIL's [Image Module](
4693 https://pillow.readthedocs.io/en/stable/reference/Image.html) can be used:
4695 ```python
4696 # Display edge maps for the first channel (at index 0)
4697 Image.fromarray(sobel_y[..., 0] / 4 + 0.5).show()
4698 Image.fromarray(sobel_x[..., 0] / 4 + 0.5).show()
4699 ```
4701 Args:
4702 image: Image tensor with shape [batch_size, h, w, d] and type float32 or
4703 float64. The image(s) must be 2x2 or larger.
4705 Returns:
4706 Tensor holding edge maps for each channel. Returns a tensor with shape
4707 [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]],
4708 [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter.
4709 """
4710 # Define vertical and horizontal Sobel filters.
4711 static_image_shape = image.get_shape()
4712 image_shape = array_ops.shape(image)
4713 kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]],
4714 [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]]
4715 num_kernels = len(kernels)
4716 kernels = np.transpose(np.asarray(kernels), (1, 2, 0))
4717 kernels = np.expand_dims(kernels, -2)
4718 kernels_tf = constant_op.constant(kernels, dtype=image.dtype)
4720 kernels_tf = array_ops.tile(
4721 kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters')
4723 # Use depth-wise convolution to calculate edge maps per channel.
4724 pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]]
4725 padded = array_ops.pad(image, pad_sizes, mode='REFLECT')
4727 # Output tensor has shape [batch_size, h, w, d * num_kernels].
4728 strides = [1, 1, 1, 1]
4729 output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID')
4731 # Reshape to [batch_size, h, w, d, num_kernels].
4732 shape = array_ops.concat([image_shape, [num_kernels]], 0)
4733 output = array_ops.reshape(output, shape=shape)
4734 output.set_shape(static_image_shape.concatenate([num_kernels]))
4735 return output
4738def resize_bicubic(images,
4739 size,
4740 align_corners=False,
4741 name=None,
4742 half_pixel_centers=False):
4743 return gen_image_ops.resize_bicubic(
4744 images=images,
4745 size=size,
4746 align_corners=align_corners,
4747 half_pixel_centers=half_pixel_centers,
4748 name=name)
4751def resize_bilinear(images,
4752 size,
4753 align_corners=False,
4754 name=None,
4755 half_pixel_centers=False):
4756 return gen_image_ops.resize_bilinear(
4757 images=images,
4758 size=size,
4759 align_corners=align_corners,
4760 half_pixel_centers=half_pixel_centers,
4761 name=name)
4764def resize_nearest_neighbor(images,
4765 size,
4766 align_corners=False,
4767 name=None,
4768 half_pixel_centers=False):
4769 return gen_image_ops.resize_nearest_neighbor(
4770 images=images,
4771 size=size,
4772 align_corners=align_corners,
4773 half_pixel_centers=half_pixel_centers,
4774 name=name)
4777resize_area_deprecation = deprecation.deprecated(
4778 date=None,
4779 instructions=(
4780 'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.'))
4781tf_export(v1=['image.resize_area'])(
4782 resize_area_deprecation(
4783 dispatch.add_dispatch_support(gen_image_ops.resize_area)))
4785resize_bicubic_deprecation = deprecation.deprecated(
4786 date=None,
4787 instructions=(
4788 'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.'))
4789tf_export(v1=['image.resize_bicubic'])(
4790 dispatch.add_dispatch_support(resize_bicubic_deprecation(resize_bicubic)))
4792resize_bilinear_deprecation = deprecation.deprecated(
4793 date=None,
4794 instructions=(
4795 'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.'))
4796tf_export(v1=['image.resize_bilinear'])(
4797 dispatch.add_dispatch_support(resize_bilinear_deprecation(resize_bilinear)))
4799resize_nearest_neighbor_deprecation = deprecation.deprecated(
4800 date=None,
4801 instructions=(
4802 'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` '
4803 'instead.'))
4804tf_export(v1=['image.resize_nearest_neighbor'])(
4805 dispatch.add_dispatch_support(
4806 resize_nearest_neighbor_deprecation(resize_nearest_neighbor)))
4809@tf_export('image.crop_and_resize', v1=[])
4810@dispatch.add_dispatch_support
4811def crop_and_resize_v2(image,
4812 boxes,
4813 box_indices,
4814 crop_size,
4815 method='bilinear',
4816 extrapolation_value=.0,
4817 name=None):
4818 """Extracts crops from the input image tensor and resizes them.
4820 Extracts crops from the input image tensor and resizes them using bilinear
4821 sampling or nearest neighbor sampling (possibly with aspect ratio change) to a
4822 common output size specified by `crop_size`. This is more general than the
4823 `crop_to_bounding_box` op which extracts a fixed size slice from the input
4824 image and does not allow resizing or aspect ratio change. The crops occur
4825 first and then the resize.
4827 Returns a tensor with `crops` from the input `image` at positions defined at
4828 the bounding box locations in `boxes`. The cropped boxes are all resized (with
4829 bilinear or nearest neighbor interpolation) to a fixed
4830 `size = [crop_height, crop_width]`. The result is a 4-D tensor
4831 `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
4832 In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
4833 results to using `tf.compat.v1.image.resize_bilinear()` or
4834 `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method`
4835 argument) with
4836 `align_corners=True`.
4838 Args:
4839 image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
4840 Both `image_height` and `image_width` need to be positive.
4841 boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
4842 specifies the coordinates of a box in the `box_ind[i]` image and is
4843 specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
4844 coordinate value of `y` is mapped to the image coordinate at `y *
4845 (image_height - 1)`, so as the `[0, 1]` interval of normalized image
4846 height is mapped to `[0, image_height - 1]` in image height coordinates.
4847 We do allow `y1` > `y2`, in which case the sampled crop is an up-down
4848 flipped version of the original image. The width dimension is treated
4849 similarly. Normalized coordinates outside the `[0, 1]` range are allowed,
4850 in which case we use `extrapolation_value` to extrapolate the input image
4851 values.
4852 box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0,
4853 batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box
4854 refers to.
4855 crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`.
4856 All cropped image patches are resized to this size. The aspect ratio of
4857 the image content is not preserved. Both `crop_height` and `crop_width`
4858 need to be positive.
4859 method: An optional string specifying the sampling method for resizing. It
4860 can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`.
4861 Currently two sampling methods are supported: Bilinear and Nearest
4862 Neighbor.
4863 extrapolation_value: An optional `float`. Defaults to `0.0`. Value used for
4864 extrapolation, when applicable.
4865 name: A name for the operation (optional).
4867 Returns:
4868 A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
4870 Usage example:
4872 >>> BATCH_SIZE = 1
4873 >>> NUM_BOXES = 5
4874 >>> IMAGE_HEIGHT = 256
4875 >>> IMAGE_WIDTH = 256
4876 >>> CHANNELS = 3
4877 >>> CROP_SIZE = (24, 24)
4879 >>> image = tf.random.normal(shape=(
4880 ... BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS) )
4881 >>> boxes = tf.random.uniform(shape=(NUM_BOXES, 4))
4882 >>> box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0,
4883 ... maxval=BATCH_SIZE, dtype=tf.int32)
4884 >>> output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE)
4885 >>> output.shape
4886 TensorShape([5, 24, 24, 3])
4888 Example with linear interpolation:
4890 >>> image = np.arange(0, 18, 2).astype('float32').reshape(3, 3)
4891 >>> result = tf.image.crop_and_resize(
4892 ... image[None, :, :, None],
4893 ... np.asarray([[0.5,0.5,1,1]]), [0], [3, 3], method='bilinear')
4894 >>> result[0][:, :, 0]
4895 <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
4896 array([[ 8., 9., 10.],
4897 [11., 12., 13.],
4898 [14., 15., 16.]], dtype=float32)>
4900 Example with nearest interpolation:
4902 >>> image = np.arange(0, 18, 2).astype('float32').reshape(3, 3)
4903 >>> result = tf.image.crop_and_resize(
4904 ... image[None, :, :, None],
4905 ... np.asarray([[0.5,0.5,1,1]]), [0], [3, 3], method='nearest')
4906 >>> result[0][:, :, 0]
4907 <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
4908 array([[ 8., 10., 10.],
4909 [14., 16., 16.],
4910 [14., 16., 16.]], dtype=float32)>
4913 """
4914 return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size,
4915 method, extrapolation_value, name)
4918@tf_export(v1=['image.crop_and_resize'])
4919@dispatch.add_dispatch_support
4920@deprecation.deprecated_args(None,
4921 'box_ind is deprecated, use box_indices instead',
4922 'box_ind')
4923def crop_and_resize_v1( # pylint: disable=missing-docstring
4924 image,
4925 boxes,
4926 box_ind=None,
4927 crop_size=None,
4928 method='bilinear',
4929 extrapolation_value=0,
4930 name=None,
4931 box_indices=None):
4932 box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices,
4933 'box_ind', box_ind)
4934 return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method,
4935 extrapolation_value, name)
4938crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__
4941@tf_export(v1=['image.extract_glimpse'])
4942@dispatch.add_dispatch_support
4943def extract_glimpse(
4944 input, # pylint: disable=redefined-builtin
4945 size,
4946 offsets,
4947 centered=True,
4948 normalized=True,
4949 uniform_noise=True,
4950 name=None):
4951 """Extracts a glimpse from the input tensor.
4953 Returns a set of windows called glimpses extracted at location
4954 `offsets` from the input tensor. If the windows only partially
4955 overlaps the inputs, the non-overlapping areas will be filled with
4956 random noise.
4958 The result is a 4-D tensor of shape `[batch_size, glimpse_height,
4959 glimpse_width, channels]`. The channels and batch dimensions are the
4960 same as that of the input tensor. The height and width of the output
4961 windows are specified in the `size` parameter.
4963 The argument `normalized` and `centered` controls how the windows are built:
4965 * If the coordinates are normalized but not centered, 0.0 and 1.0
4966 correspond to the minimum and maximum of each height and width
4967 dimension.
4968 * If the coordinates are both normalized and centered, they range from
4969 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
4970 left corner, the lower right corner is located at (1.0, 1.0) and the
4971 center is at (0, 0).
4972 * If the coordinates are not normalized they are interpreted as
4973 numbers of pixels.
4975 Usage Example:
4977 >>> x = [[[[0.0],
4978 ... [1.0],
4979 ... [2.0]],
4980 ... [[3.0],
4981 ... [4.0],
4982 ... [5.0]],
4983 ... [[6.0],
4984 ... [7.0],
4985 ... [8.0]]]]
4986 >>> tf.compat.v1.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]],
4987 ... centered=False, normalized=False)
4988 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
4989 array([[[[0.],
4990 [1.]],
4991 [[3.],
4992 [4.]]]], dtype=float32)>
4994 Args:
4995 input: A `Tensor` of type `float32`. A 4-D float tensor of shape
4996 `[batch_size, height, width, channels]`.
4997 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the
4998 size of the glimpses to extract. The glimpse height must be specified
4999 first, following by the glimpse width.
5000 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape
5001 `[batch_size, 2]` containing the y, x locations of the center of each
5002 window.
5003 centered: An optional `bool`. Defaults to `True`. indicates if the offset
5004 coordinates are centered relative to the image, in which case the (0, 0)
5005 offset is relative to the center of the input images. If false, the (0,0)
5006 offset corresponds to the upper left corner of the input images.
5007 normalized: An optional `bool`. Defaults to `True`. indicates if the offset
5008 coordinates are normalized.
5009 uniform_noise: An optional `bool`. Defaults to `True`. indicates if the
5010 noise should be generated using a uniform distribution or a Gaussian
5011 distribution.
5012 name: A name for the operation (optional).
5014 Returns:
5015 A `Tensor` of type `float32`.
5016 """
5017 return gen_image_ops.extract_glimpse(
5018 input=input,
5019 size=size,
5020 offsets=offsets,
5021 centered=centered,
5022 normalized=normalized,
5023 uniform_noise=uniform_noise,
5024 name=name)
5027@tf_export('image.extract_glimpse', v1=[])
5028@dispatch.add_dispatch_support
5029def extract_glimpse_v2(
5030 input, # pylint: disable=redefined-builtin
5031 size,
5032 offsets,
5033 centered=True,
5034 normalized=True,
5035 noise='uniform',
5036 name=None):
5037 """Extracts a glimpse from the input tensor.
5039 Returns a set of windows called glimpses extracted at location
5040 `offsets` from the input tensor. If the windows only partially
5041 overlaps the inputs, the non-overlapping areas will be filled with
5042 random noise.
5044 The result is a 4-D tensor of shape `[batch_size, glimpse_height,
5045 glimpse_width, channels]`. The channels and batch dimensions are the
5046 same as that of the input tensor. The height and width of the output
5047 windows are specified in the `size` parameter.
5049 The argument `normalized` and `centered` controls how the windows are built:
5051 * If the coordinates are normalized but not centered, 0.0 and 1.0
5052 correspond to the minimum and maximum of each height and width
5053 dimension.
5054 * If the coordinates are both normalized and centered, they range from
5055 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
5056 left corner, the lower right corner is located at (1.0, 1.0) and the
5057 center is at (0, 0).
5058 * If the coordinates are not normalized they are interpreted as
5059 numbers of pixels.
5061 Usage Example:
5063 >>> x = [[[[0.0],
5064 ... [1.0],
5065 ... [2.0]],
5066 ... [[3.0],
5067 ... [4.0],
5068 ... [5.0]],
5069 ... [[6.0],
5070 ... [7.0],
5071 ... [8.0]]]]
5072 >>> tf.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]],
5073 ... centered=False, normalized=False)
5074 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
5075 array([[[[4.],
5076 [5.]],
5077 [[7.],
5078 [8.]]]], dtype=float32)>
5080 Args:
5081 input: A `Tensor` of type `float32`. A 4-D float tensor of shape
5082 `[batch_size, height, width, channels]`.
5083 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the
5084 size of the glimpses to extract. The glimpse height must be specified
5085 first, following by the glimpse width.
5086 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape
5087 `[batch_size, 2]` containing the y, x locations of the center of each
5088 window.
5089 centered: An optional `bool`. Defaults to `True`. indicates if the offset
5090 coordinates are centered relative to the image, in which case the (0, 0)
5091 offset is relative to the center of the input images. If false, the (0,0)
5092 offset corresponds to the upper left corner of the input images.
5093 normalized: An optional `bool`. Defaults to `True`. indicates if the offset
5094 coordinates are normalized.
5095 noise: An optional `string`. Defaults to `uniform`. indicates if the noise
5096 should be `uniform` (uniform distribution), `gaussian` (gaussian
5097 distribution), or `zero` (zero padding).
5098 name: A name for the operation (optional).
5100 Returns:
5101 A `Tensor` of type `float32`.
5102 """
5103 return gen_image_ops.extract_glimpse_v2(
5104 input=input,
5105 size=size,
5106 offsets=offsets,
5107 centered=centered,
5108 normalized=normalized,
5109 noise=noise,
5110 uniform_noise=False,
5111 name=name)
5114@tf_export('image.combined_non_max_suppression')
5115@dispatch.add_dispatch_support
5116def combined_non_max_suppression(boxes,
5117 scores,
5118 max_output_size_per_class,
5119 max_total_size,
5120 iou_threshold=0.5,
5121 score_threshold=float('-inf'),
5122 pad_per_class=False,
5123 clip_boxes=True,
5124 name=None):
5125 """Greedily selects a subset of bounding boxes in descending order of score.
5127 This operation performs non_max_suppression on the inputs per batch, across
5128 all classes.
5129 Prunes away boxes that have high intersection-over-union (IOU) overlap
5130 with previously selected boxes. Bounding boxes are supplied as
5131 [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
5132 diagonal pair of box corners and the coordinates can be provided as normalized
5133 (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm
5134 is agnostic to where the origin is in the coordinate system. Also note that
5135 this algorithm is invariant to orthogonal transformations and translations
5136 of the coordinate system; thus translating or reflections of the coordinate
5137 system result in the same boxes being selected by the algorithm.
5138 The output of this operation is the final boxes, scores and classes tensor
5139 returned after performing non_max_suppression.
5141 Args:
5142 boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q`
5143 is 1 then same boxes are used for all classes otherwise, if `q` is equal
5144 to number of classes, class-specific boxes are used.
5145 scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]`
5146 representing a single score corresponding to each box (each row of boxes).
5147 max_output_size_per_class: A scalar integer `Tensor` representing the
5148 maximum number of boxes to be selected by non-max suppression per class
5149 max_total_size: A int32 scalar representing maximum number of boxes retained
5150 over all classes. Note that setting this value to a large number may
5151 result in OOM error depending on the system workload.
5152 iou_threshold: A float representing the threshold for deciding whether boxes
5153 overlap too much with respect to IOU.
5154 score_threshold: A float representing the threshold for deciding when to
5155 remove boxes based on score.
5156 pad_per_class: If false, the output nmsed boxes, scores and classes are
5157 padded/clipped to `max_total_size`. If true, the output nmsed boxes,
5158 scores and classes are padded to be of length
5159 `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in
5160 which case it is clipped to `max_total_size`. Defaults to false.
5161 clip_boxes: If true, the coordinates of output nmsed boxes will be clipped
5162 to [0, 1]. If false, output the box coordinates as it is. Defaults to
5163 true.
5164 name: A name for the operation (optional).
5166 Returns:
5167 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
5168 containing the non-max suppressed boxes.
5169 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
5170 the scores for the boxes.
5171 'nmsed_classes': A [batch_size, max_detections] float32 tensor
5172 containing the class for boxes.
5173 'valid_detections': A [batch_size] int32 tensor indicating the number of
5174 valid detections per batch item. Only the top valid_detections[i] entries
5175 in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
5176 entries are zero paddings.
5177 """
5178 with ops.name_scope(name, 'combined_non_max_suppression'):
5179 iou_threshold = ops.convert_to_tensor(
5180 iou_threshold, dtype=dtypes.float32, name='iou_threshold')
5181 score_threshold = ops.convert_to_tensor(
5182 score_threshold, dtype=dtypes.float32, name='score_threshold')
5184 # Convert `max_total_size` to tensor *without* setting the `dtype` param.
5185 # This allows us to catch `int32` overflow case with `max_total_size`
5186 # whose expected dtype is `int32` by the op registration. Any number within
5187 # `int32` will get converted to `int32` tensor. Anything larger will get
5188 # converted to `int64`. Passing in `int64` for `max_total_size` to the op
5189 # will throw dtype mismatch exception.
5190 # TODO(b/173251596): Once there is a more general solution to warn against
5191 # int overflow conversions, revisit this check.
5192 max_total_size = ops.convert_to_tensor(max_total_size)
5194 return gen_image_ops.combined_non_max_suppression(
5195 boxes, scores, max_output_size_per_class, max_total_size, iou_threshold,
5196 score_threshold, pad_per_class, clip_boxes)
5199def _bbox_overlap(boxes_a, boxes_b):
5200 """Calculates the overlap (iou - intersection over union) between boxes_a and boxes_b.
5202 Args:
5203 boxes_a: a tensor with a shape of [batch_size, N, 4]. N is the number of
5204 boxes per image. The last dimension is the pixel coordinates in
5205 [ymin, xmin, ymax, xmax] form.
5206 boxes_b: a tensor with a shape of [batch_size, M, 4]. M is the number of
5207 boxes. The last dimension is the pixel coordinates in
5208 [ymin, xmin, ymax, xmax] form.
5209 Returns:
5210 intersection_over_union: a tensor with as a shape of [batch_size, N, M],
5211 representing the ratio of intersection area over union area (IoU) between
5212 two boxes
5213 """
5214 with ops.name_scope('bbox_overlap'):
5215 a_y_min, a_x_min, a_y_max, a_x_max = array_ops.split(
5216 value=boxes_a, num_or_size_splits=4, axis=2)
5217 b_y_min, b_x_min, b_y_max, b_x_max = array_ops.split(
5218 value=boxes_b, num_or_size_splits=4, axis=2)
5220 # Calculates the intersection area.
5221 i_xmin = math_ops.maximum(
5222 a_x_min, array_ops.transpose(b_x_min, [0, 2, 1]))
5223 i_xmax = math_ops.minimum(
5224 a_x_max, array_ops.transpose(b_x_max, [0, 2, 1]))
5225 i_ymin = math_ops.maximum(
5226 a_y_min, array_ops.transpose(b_y_min, [0, 2, 1]))
5227 i_ymax = math_ops.minimum(
5228 a_y_max, array_ops.transpose(b_y_max, [0, 2, 1]))
5229 i_area = math_ops.maximum(
5230 (i_xmax - i_xmin), 0) * math_ops.maximum((i_ymax - i_ymin), 0)
5232 # Calculates the union area.
5233 a_area = (a_y_max - a_y_min) * (a_x_max - a_x_min)
5234 b_area = (b_y_max - b_y_min) * (b_x_max - b_x_min)
5235 EPSILON = 1e-8
5236 # Adds a small epsilon to avoid divide-by-zero.
5237 u_area = a_area + array_ops.transpose(b_area, [0, 2, 1]) - i_area + EPSILON
5239 # Calculates IoU.
5240 intersection_over_union = i_area / u_area
5242 return intersection_over_union
5245def _self_suppression(iou, _, iou_sum, iou_threshold):
5246 """Suppress boxes in the same tile.
5248 Compute boxes that cannot be suppressed by others (i.e.,
5249 can_suppress_others), and then use them to suppress boxes in the same tile.
5251 Args:
5252 iou: a tensor of shape [batch_size, num_boxes_with_padding] representing
5253 intersection over union.
5254 iou_sum: a scalar tensor.
5255 iou_threshold: a scalar tensor.
5257 Returns:
5258 iou_suppressed: a tensor of shape [batch_size, num_boxes_with_padding].
5259 iou_diff: a scalar tensor representing whether any box is supressed in
5260 this step.
5261 iou_sum_new: a scalar tensor of shape [batch_size] that represents
5262 the iou sum after suppression.
5263 iou_threshold: a scalar tensor.
5264 """
5265 batch_size = array_ops.shape(iou)[0]
5266 can_suppress_others = math_ops.cast(
5267 array_ops.reshape(
5268 math_ops.reduce_max(iou, 1) < iou_threshold, [batch_size, -1, 1]),
5269 iou.dtype)
5270 iou_after_suppression = array_ops.reshape(
5271 math_ops.cast(
5272 math_ops.reduce_max(can_suppress_others * iou, 1) < iou_threshold,
5273 iou.dtype),
5274 [batch_size, -1, 1]) * iou
5275 iou_sum_new = math_ops.reduce_sum(iou_after_suppression, [1, 2])
5276 return [
5277 iou_after_suppression,
5278 math_ops.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new,
5279 iou_threshold
5280 ]
5283def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, tile_size):
5284 """Suppress boxes between different tiles.
5286 Args:
5287 boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4]
5288 box_slice: a tensor of shape [batch_size, tile_size, 4]
5289 iou_threshold: a scalar tensor
5290 inner_idx: a scalar tensor representing the tile index of the tile
5291 that is used to supress box_slice
5292 tile_size: an integer representing the number of boxes in a tile
5294 Returns:
5295 boxes: unchanged boxes as input
5296 box_slice_after_suppression: box_slice after suppression
5297 iou_threshold: unchanged
5298 """
5299 batch_size = array_ops.shape(boxes)[0]
5300 new_slice = array_ops.slice(
5301 boxes, [0, inner_idx * tile_size, 0],
5302 [batch_size, tile_size, 4])
5303 iou = _bbox_overlap(new_slice, box_slice)
5304 box_slice_after_suppression = array_ops.expand_dims(
5305 math_ops.cast(math_ops.reduce_all(iou < iou_threshold, [1]),
5306 box_slice.dtype),
5307 2) * box_slice
5308 return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1
5311def _suppression_loop_body(boxes, iou_threshold, output_size, idx, tile_size):
5312 """Process boxes in the range [idx*tile_size, (idx+1)*tile_size).
5314 Args:
5315 boxes: a tensor with a shape of [batch_size, anchors, 4].
5316 iou_threshold: a float representing the threshold for deciding whether boxes
5317 overlap too much with respect to IOU.
5318 output_size: an int32 tensor of size [batch_size]. Representing the number
5319 of selected boxes for each batch.
5320 idx: an integer scalar representing induction variable.
5321 tile_size: an integer representing the number of boxes in a tile
5323 Returns:
5324 boxes: updated boxes.
5325 iou_threshold: pass down iou_threshold to the next iteration.
5326 output_size: the updated output_size.
5327 idx: the updated induction variable.
5328 """
5329 with ops.name_scope('suppression_loop_body'):
5330 num_tiles = array_ops.shape(boxes)[1] // tile_size
5331 batch_size = array_ops.shape(boxes)[0]
5333 def cross_suppression_func(boxes, box_slice, iou_threshold, inner_idx):
5334 return _cross_suppression(boxes, box_slice, iou_threshold, inner_idx,
5335 tile_size)
5337 # Iterates over tiles that can possibly suppress the current tile.
5338 box_slice = array_ops.slice(boxes, [0, idx * tile_size, 0],
5339 [batch_size, tile_size, 4])
5340 _, box_slice, _, _ = while_loop.while_loop(
5341 lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,
5342 cross_suppression_func,
5343 [boxes, box_slice, iou_threshold,
5344 constant_op.constant(0)])
5346 # Iterates over the current tile to compute self-suppression.
5347 iou = _bbox_overlap(box_slice, box_slice)
5348 mask = array_ops.expand_dims(
5349 array_ops.reshape(
5350 math_ops.range(tile_size), [1, -1]) > array_ops.reshape(
5351 math_ops.range(tile_size), [-1, 1]), 0)
5352 iou *= math_ops.cast(
5353 math_ops.logical_and(mask, iou >= iou_threshold), iou.dtype)
5354 suppressed_iou, _, _, _ = while_loop.while_loop(
5355 lambda _iou, loop_condition, _iou_sum, _: loop_condition,
5356 _self_suppression, [
5357 iou,
5358 constant_op.constant(True),
5359 math_ops.reduce_sum(iou, [1, 2]), iou_threshold
5360 ])
5361 suppressed_box = math_ops.reduce_sum(suppressed_iou, 1) > 0
5362 box_slice *= array_ops.expand_dims(
5363 1.0 - math_ops.cast(suppressed_box, box_slice.dtype), 2)
5365 # Uses box_slice to update the input boxes.
5366 mask = array_ops.reshape(
5367 math_ops.cast(
5368 math_ops.equal(math_ops.range(num_tiles), idx), boxes.dtype),
5369 [1, -1, 1, 1])
5370 boxes = array_ops.tile(array_ops.expand_dims(
5371 box_slice, [1]), [1, num_tiles, 1, 1]) * mask + array_ops.reshape(
5372 boxes, [batch_size, num_tiles, tile_size, 4]) * (1 - mask)
5373 boxes = array_ops.reshape(boxes, [batch_size, -1, 4])
5375 # Updates output_size.
5376 output_size += math_ops.reduce_sum(
5377 math_ops.cast(
5378 math_ops.reduce_any(box_slice > 0, [2]), dtypes.int32), [1])
5379 return boxes, iou_threshold, output_size, idx + 1
5382@tf_export('image.non_max_suppression_padded')
5383@dispatch.add_dispatch_support
5384def non_max_suppression_padded(boxes,
5385 scores,
5386 max_output_size,
5387 iou_threshold=0.5,
5388 score_threshold=float('-inf'),
5389 pad_to_max_output_size=False,
5390 name=None,
5391 sorted_input=False,
5392 canonicalized_coordinates=False,
5393 tile_size=512):
5394 """Greedily selects a subset of bounding boxes in descending order of score.
5396 Performs algorithmically equivalent operation to tf.image.non_max_suppression,
5397 with the addition of an optional parameter which zero-pads the output to
5398 be of size `max_output_size`.
5399 The output of this operation is a tuple containing the set of integers
5400 indexing into the input collection of bounding boxes representing the selected
5401 boxes and the number of valid indices in the index set. The bounding box
5402 coordinates corresponding to the selected indices can then be obtained using
5403 the `tf.slice` and `tf.gather` operations. For example:
5404 ```python
5405 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(
5406 boxes, scores, max_output_size, iou_threshold,
5407 score_threshold, pad_to_max_output_size=True)
5408 selected_indices = tf.slice(
5409 selected_indices_padded, tf.constant([0]), num_valid)
5410 selected_boxes = tf.gather(boxes, selected_indices)
5411 ```
5413 Args:
5414 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4].
5415 Dimensions except the last two are batch dimensions.
5416 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes].
5417 max_output_size: a scalar integer `Tensor` representing the maximum number
5418 of boxes to be selected by non max suppression. Note that setting this
5419 value to a large number may result in OOM error depending on the system
5420 workload.
5421 iou_threshold: a float representing the threshold for deciding whether boxes
5422 overlap too much with respect to IoU (intersection over union).
5423 score_threshold: a float representing the threshold for box scores. Boxes
5424 with a score that is not larger than this threshold will be suppressed.
5425 pad_to_max_output_size: whether to pad the output idx to max_output_size.
5426 Must be set to True when the input is a batch of images.
5427 name: name of operation.
5428 sorted_input: a boolean indicating whether the input boxes and scores
5429 are sorted in descending order by the score.
5430 canonicalized_coordinates: if box coordinates are given as
5431 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant
5432 computation to canonicalize box coordinates.
5433 tile_size: an integer representing the number of boxes in a tile, i.e.,
5434 the maximum number of boxes per image that can be used to suppress other
5435 boxes in parallel; larger tile_size means larger parallelism and
5436 potentially more redundant work.
5437 Returns:
5438 idx: a tensor with a shape of [..., num_boxes] representing the
5439 indices selected by non-max suppression. The leading dimensions
5440 are the batch dimensions of the input boxes. All numbers are within
5441 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i]
5442 indices (i.e., idx[i][:num_valid[i]]) are valid.
5443 num_valid: a tensor of rank 0 or higher with a shape of [...]
5444 representing the number of valid indices in idx. Its dimensions are the
5445 batch dimensions of the input boxes.
5446 Raises:
5447 ValueError: When set pad_to_max_output_size to False for batched input.
5448 """
5449 with ops.name_scope(name, 'non_max_suppression_padded'):
5450 if not pad_to_max_output_size:
5451 # pad_to_max_output_size may be set to False only when the shape of
5452 # boxes is [num_boxes, 4], i.e., a single image. We make best effort to
5453 # detect violations at compile time. If `boxes` does not have a static
5454 # rank, the check allows computation to proceed.
5455 if boxes.get_shape().rank is not None and boxes.get_shape().rank > 2:
5456 raise ValueError("'pad_to_max_output_size' (value {}) must be True for "
5457 'batched input'.format(pad_to_max_output_size))
5458 if name is None:
5459 name = ''
5460 idx, num_valid = non_max_suppression_padded_v2(
5461 boxes, scores, max_output_size, iou_threshold, score_threshold,
5462 sorted_input, canonicalized_coordinates, tile_size)
5463 # def_function.function seems to lose shape information, so set it here.
5464 if not pad_to_max_output_size:
5465 idx = idx[0, :num_valid]
5466 else:
5467 batch_dims = array_ops.concat([
5468 array_ops.shape(boxes)[:-2],
5469 array_ops.expand_dims(max_output_size, 0)
5470 ], 0)
5471 idx = array_ops.reshape(idx, batch_dims)
5472 return idx, num_valid
5475# TODO(b/158709815): Improve performance regression due to
5476# def_function.function.
5477@def_function.function(
5478 experimental_implements='non_max_suppression_padded_v2')
5479def non_max_suppression_padded_v2(boxes,
5480 scores,
5481 max_output_size,
5482 iou_threshold=0.5,
5483 score_threshold=float('-inf'),
5484 sorted_input=False,
5485 canonicalized_coordinates=False,
5486 tile_size=512):
5487 """Non-maximum suppression.
5489 Prunes away boxes that have high intersection-over-union (IOU) overlap
5490 with previously selected boxes. Bounding boxes are supplied as
5491 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
5492 diagonal pair of box corners and the coordinates can be provided as normalized
5493 (i.e., lying in the interval `[0, 1]`) or absolute. The bounding box
5494 coordinates are cannonicalized to `[y_min, x_min, y_max, x_max]`,
5495 where `(y_min, x_min)` and `(y_max, x_mas)` are the coordinates of the lower
5496 left and upper right corner. User may indiciate the input box coordinates are
5497 already canonicalized to eliminate redundant work by setting
5498 canonicalized_coordinates to `True`. Note that this algorithm is agnostic to
5499 where the origin is in the coordinate system. Note that this algorithm is
5500 invariant to orthogonal transformations and translations of the coordinate
5501 system; thus translating or reflections of the coordinate system result in the
5502 same boxes being selected by the algorithm.
5504 Similar to tf.image.non_max_suppression, non_max_suppression_padded
5505 implements hard NMS but can operate on a batch of images and improves
5506 performance by titling the bounding boxes. Non_max_suppression_padded should
5507 be preferred over tf.image_non_max_suppression when running on devices with
5508 abundant parallelsim for higher computation speed. For soft NMS, refer to
5509 tf.image.non_max_suppression_with_scores.
5511 While a serial NMS algorithm iteratively uses the highest-scored unprocessed
5512 box to suppress boxes, this algorithm uses many boxes to suppress other boxes
5513 in parallel. The key idea is to partition boxes into tiles based on their
5514 score and suppresses boxes tile by tile, thus achieving parallelism within a
5515 tile. The tile size determines the degree of parallelism.
5517 In cross suppression (using boxes of tile A to suppress boxes of tile B),
5518 all boxes in A can independently suppress boxes in B.
5520 Self suppression (suppressing boxes of the same tile) needs to be iteratively
5521 applied until there's no more suppression. In each iteration, boxes that
5522 cannot be suppressed are used to suppress boxes in the same tile.
5524 boxes = boxes.pad_to_multiply_of(tile_size)
5525 num_tiles = len(boxes) // tile_size
5526 output_boxes = []
5527 for i in range(num_tiles):
5528 box_tile = boxes[i*tile_size : (i+1)*tile_size]
5529 for j in range(i - 1):
5530 # in parallel suppress boxes in box_tile using boxes from suppressing_tile
5531 suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
5532 iou = _bbox_overlap(box_tile, suppressing_tile)
5533 # if the box is suppressed in iou, clear it to a dot
5534 box_tile *= _update_boxes(iou)
5535 # Iteratively handle the diagnal tile.
5536 iou = _box_overlap(box_tile, box_tile)
5537 iou_changed = True
5538 while iou_changed:
5539 # boxes that are not suppressed by anything else
5540 suppressing_boxes = _get_suppressing_boxes(iou)
5541 # boxes that are suppressed by suppressing_boxes
5542 suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
5543 # clear iou to 0 for boxes that are suppressed, as they cannot be used
5544 # to suppress other boxes any more
5545 new_iou = _clear_iou(iou, suppressed_boxes)
5546 iou_changed = (new_iou != iou)
5547 iou = new_iou
5548 # remaining boxes that can still suppress others, are selected boxes.
5549 output_boxes.append(_get_suppressing_boxes(iou))
5550 if len(output_boxes) >= max_output_size:
5551 break
5553 Args:
5554 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4].
5555 Dimensions except the last two are batch dimensions. The last dimension
5556 represents box coordinates, given as [y_1, x_1, y_2, x_2]. The coordinates
5557 on each dimension can be given in any order
5558 (see also `canonicalized_coordinates`) but must describe a box with
5559 a positive area.
5560 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes].
5561 max_output_size: a scalar integer `Tensor` representing the maximum number
5562 of boxes to be selected by non max suppression.
5563 iou_threshold: a float representing the threshold for deciding whether boxes
5564 overlap too much with respect to IoU (intersection over union).
5565 score_threshold: a float representing the threshold for box scores. Boxes
5566 with a score that is not larger than this threshold will be suppressed.
5567 sorted_input: a boolean indicating whether the input boxes and scores
5568 are sorted in descending order by the score.
5569 canonicalized_coordinates: if box coordinates are given as
5570 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant
5571 computation to canonicalize box coordinates.
5572 tile_size: an integer representing the number of boxes in a tile, i.e.,
5573 the maximum number of boxes per image that can be used to suppress other
5574 boxes in parallel; larger tile_size means larger parallelism and
5575 potentially more redundant work.
5576 Returns:
5577 idx: a tensor with a shape of [..., num_boxes] representing the
5578 indices selected by non-max suppression. The leading dimensions
5579 are the batch dimensions of the input boxes. All numbers are within
5580 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i]
5581 indices (i.e., idx[i][:num_valid[i]]) are valid.
5582 num_valid: a tensor of rank 0 or higher with a shape of [...]
5583 representing the number of valid indices in idx. Its dimensions are the
5584 batch dimensions of the input boxes.
5585 Raises:
5586 ValueError: When set pad_to_max_output_size to False for batched input.
5587 """
5588 def _sort_scores_and_boxes(scores, boxes):
5589 """Sort boxes based their score from highest to lowest.
5591 Args:
5592 scores: a tensor with a shape of [batch_size, num_boxes] representing
5593 the scores of boxes.
5594 boxes: a tensor with a shape of [batch_size, num_boxes, 4] representing
5595 the boxes.
5596 Returns:
5597 sorted_scores: a tensor with a shape of [batch_size, num_boxes]
5598 representing the sorted scores.
5599 sorted_boxes: a tensor representing the sorted boxes.
5600 sorted_scores_indices: a tensor with a shape of [batch_size, num_boxes]
5601 representing the index of the scores in a sorted descending order.
5602 """
5603 with ops.name_scope('sort_scores_and_boxes'):
5604 sorted_scores_indices = sort_ops.argsort(
5605 scores, axis=1, direction='DESCENDING')
5606 sorted_scores = array_ops.gather(
5607 scores, sorted_scores_indices, axis=1, batch_dims=1
5608 )
5609 sorted_boxes = array_ops.gather(
5610 boxes, sorted_scores_indices, axis=1, batch_dims=1
5611 )
5612 return sorted_scores, sorted_boxes, sorted_scores_indices
5614 batch_dims = array_ops.shape(boxes)[:-2]
5615 num_boxes = array_ops.shape(boxes)[-2]
5616 boxes = array_ops.reshape(boxes, [-1, num_boxes, 4])
5617 scores = array_ops.reshape(scores, [-1, num_boxes])
5618 batch_size = array_ops.shape(boxes)[0]
5619 if score_threshold != float('-inf'):
5620 with ops.name_scope('filter_by_score'):
5621 score_mask = math_ops.cast(scores > score_threshold, scores.dtype)
5622 scores *= score_mask
5623 box_mask = array_ops.expand_dims(
5624 math_ops.cast(score_mask, boxes.dtype), 2)
5625 boxes *= box_mask
5627 if not canonicalized_coordinates:
5628 with ops.name_scope('canonicalize_coordinates'):
5629 y_1, x_1, y_2, x_2 = array_ops.split(
5630 value=boxes, num_or_size_splits=4, axis=2)
5631 y_1_is_min = math_ops.reduce_all(
5632 math_ops.less_equal(y_1[0, 0, 0], y_2[0, 0, 0]))
5633 y_min, y_max = tf_cond.cond(
5634 y_1_is_min, lambda: (y_1, y_2), lambda: (y_2, y_1))
5635 x_1_is_min = math_ops.reduce_all(
5636 math_ops.less_equal(x_1[0, 0, 0], x_2[0, 0, 0]))
5637 x_min, x_max = tf_cond.cond(
5638 x_1_is_min, lambda: (x_1, x_2), lambda: (x_2, x_1))
5639 boxes = array_ops.concat([y_min, x_min, y_max, x_max], axis=2)
5640 # TODO(@bhack): https://github.com/tensorflow/tensorflow/issues/56089
5641 # this will be required after deprecation
5642 #else:
5643 # y_1, x_1, y_2, x_2 = array_ops.split(
5644 # value=boxes, num_or_size_splits=4, axis=2)
5646 if not sorted_input:
5647 scores, boxes, sorted_indices = _sort_scores_and_boxes(scores, boxes)
5648 else:
5649 # Default value required for Autograph.
5650 sorted_indices = array_ops.zeros_like(scores, dtype=dtypes.int32)
5652 pad = math_ops.cast(
5653 math_ops.ceil(
5654 math_ops.cast(
5655 math_ops.maximum(num_boxes, max_output_size), dtypes.float32) /
5656 math_ops.cast(tile_size, dtypes.float32)),
5657 dtypes.int32) * tile_size - num_boxes
5658 boxes = array_ops.pad(
5659 math_ops.cast(boxes, dtypes.float32), [[0, 0], [0, pad], [0, 0]])
5660 scores = array_ops.pad(
5661 math_ops.cast(scores, dtypes.float32), [[0, 0], [0, pad]])
5662 num_boxes_after_padding = num_boxes + pad
5663 num_iterations = num_boxes_after_padding // tile_size
5664 def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
5665 return math_ops.logical_and(
5666 math_ops.reduce_min(output_size) < max_output_size,
5667 idx < num_iterations)
5669 def suppression_loop_body(boxes, iou_threshold, output_size, idx):
5670 return _suppression_loop_body(
5671 boxes, iou_threshold, output_size, idx, tile_size)
5673 selected_boxes, _, output_size, _ = while_loop.while_loop(
5674 _loop_cond,
5675 suppression_loop_body,
5676 [
5677 boxes, iou_threshold,
5678 array_ops.zeros([batch_size], dtypes.int32),
5679 constant_op.constant(0)
5680 ],
5681 shape_invariants=[
5682 tensor_shape.TensorShape([None, None, 4]),
5683 tensor_shape.TensorShape([]),
5684 tensor_shape.TensorShape([None]),
5685 tensor_shape.TensorShape([]),
5686 ],
5687 )
5688 num_valid = math_ops.minimum(output_size, max_output_size)
5689 idx = num_boxes_after_padding - math_ops.cast(
5690 nn_ops.top_k(
5691 math_ops.cast(math_ops.reduce_any(
5692 selected_boxes > 0, [2]), dtypes.int32) *
5693 array_ops.expand_dims(
5694 math_ops.range(num_boxes_after_padding, 0, -1), 0),
5695 max_output_size)[0], dtypes.int32)
5696 idx = math_ops.minimum(idx, num_boxes - 1)
5698 if not sorted_input:
5699 index_offsets = math_ops.range(batch_size) * num_boxes
5700 gather_idx = array_ops.reshape(
5701 idx + array_ops.expand_dims(index_offsets, 1), [-1])
5702 idx = array_ops.reshape(
5703 array_ops.gather(array_ops.reshape(sorted_indices, [-1]),
5704 gather_idx),
5705 [batch_size, -1])
5706 invalid_index = array_ops.zeros([batch_size, max_output_size],
5707 dtype=dtypes.int32)
5708 idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0)
5709 num_valid_expanded = array_ops.expand_dims(num_valid, 1)
5710 idx = array_ops.where(idx_index < num_valid_expanded,
5711 idx, invalid_index)
5713 num_valid = array_ops.reshape(num_valid, batch_dims)
5714 return idx, num_valid
5717def non_max_suppression_padded_v1(boxes,
5718 scores,
5719 max_output_size,
5720 iou_threshold=0.5,
5721 score_threshold=float('-inf'),
5722 pad_to_max_output_size=False,
5723 name=None):
5724 """Greedily selects a subset of bounding boxes in descending order of score.
5726 Performs algorithmically equivalent operation to tf.image.non_max_suppression,
5727 with the addition of an optional parameter which zero-pads the output to
5728 be of size `max_output_size`.
5729 The output of this operation is a tuple containing the set of integers
5730 indexing into the input collection of bounding boxes representing the selected
5731 boxes and the number of valid indices in the index set. The bounding box
5732 coordinates corresponding to the selected indices can then be obtained using
5733 the `tf.slice` and `tf.gather` operations. For example:
5734 ```python
5735 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(
5736 boxes, scores, max_output_size, iou_threshold,
5737 score_threshold, pad_to_max_output_size=True)
5738 selected_indices = tf.slice(
5739 selected_indices_padded, tf.constant([0]), num_valid)
5740 selected_boxes = tf.gather(boxes, selected_indices)
5741 ```
5743 Args:
5744 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
5745 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
5746 score corresponding to each box (each row of boxes).
5747 max_output_size: A scalar integer `Tensor` representing the maximum number
5748 of boxes to be selected by non-max suppression.
5749 iou_threshold: A float representing the threshold for deciding whether boxes
5750 overlap too much with respect to IOU.
5751 score_threshold: A float representing the threshold for deciding when to
5752 remove boxes based on score.
5753 pad_to_max_output_size: bool. If True, size of `selected_indices` output is
5754 padded to `max_output_size`.
5755 name: A name for the operation (optional).
5757 Returns:
5758 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
5759 selected indices from the boxes tensor, where `M <= max_output_size`.
5760 valid_outputs: A scalar integer `Tensor` denoting how many elements in
5761 `selected_indices` are valid. Valid elements occur first, then padding.
5762 """
5763 with ops.name_scope(name, 'non_max_suppression_padded'):
5764 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
5765 score_threshold = ops.convert_to_tensor(
5766 score_threshold, name='score_threshold')
5767 return gen_image_ops.non_max_suppression_v4(boxes, scores, max_output_size,
5768 iou_threshold, score_threshold,
5769 pad_to_max_output_size)
5772@tf_export('image.draw_bounding_boxes', v1=[])
5773@dispatch.add_dispatch_support
5774def draw_bounding_boxes_v2(images, boxes, colors, name=None):
5775 """Draw bounding boxes on a batch of images.
5777 Outputs a copy of `images` but draws on top of the pixels zero or more
5778 bounding boxes specified by the locations in `boxes`. The coordinates of the
5779 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`.
5780 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
5781 and the height of the underlying image.
5783 For example, if an image is 100 x 200 pixels (height x width) and the bounding
5784 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
5785 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
5787 Parts of the bounding box may fall outside the image.
5789 Args:
5790 images: A `Tensor`. Must be one of the following types: `float32`, `half`.
5791 4-D with shape `[batch, height, width, depth]`. A batch of images.
5792 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch,
5793 num_bounding_boxes, 4]` containing bounding boxes.
5794 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle
5795 through for the boxes.
5796 name: A name for the operation (optional).
5798 Returns:
5799 A `Tensor`. Has the same type as `images`.
5801 Usage Example:
5803 >>> # create an empty image
5804 >>> img = tf.zeros([1, 3, 3, 3])
5805 >>> # draw a box around the image
5806 >>> box = np.array([0, 0, 1, 1])
5807 >>> boxes = box.reshape([1, 1, 4])
5808 >>> # alternate between red and blue
5809 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
5810 >>> tf.image.draw_bounding_boxes(img, boxes, colors)
5811 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=
5812 array([[[[1., 0., 0.],
5813 [1., 0., 0.],
5814 [1., 0., 0.]],
5815 [[1., 0., 0.],
5816 [0., 0., 0.],
5817 [1., 0., 0.]],
5818 [[1., 0., 0.],
5819 [1., 0., 0.],
5820 [1., 0., 0.]]]], dtype=float32)>
5821 """
5822 if colors is None:
5823 return gen_image_ops.draw_bounding_boxes(images, boxes, name)
5824 return gen_image_ops.draw_bounding_boxes_v2(images, boxes, colors, name)
5827@tf_export(v1=['image.draw_bounding_boxes'])
5828@dispatch.add_dispatch_support
5829def draw_bounding_boxes(images, boxes, name=None, colors=None):
5830 """Draw bounding boxes on a batch of images.
5832 Outputs a copy of `images` but draws on top of the pixels zero or more
5833 bounding boxes specified by the locations in `boxes`. The coordinates of the
5834 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`.
5835 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
5836 and the height of the underlying image.
5838 For example, if an image is 100 x 200 pixels (height x width) and the bounding
5839 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
5840 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
5842 Parts of the bounding box may fall outside the image.
5844 Args:
5845 images: A `Tensor`. Must be one of the following types: `float32`, `half`.
5846 4-D with shape `[batch, height, width, depth]`. A batch of images.
5847 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch,
5848 num_bounding_boxes, 4]` containing bounding boxes.
5849 name: A name for the operation (optional).
5850 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle
5851 through for the boxes.
5853 Returns:
5854 A `Tensor`. Has the same type as `images`.
5856 Usage Example:
5858 >>> # create an empty image
5859 >>> img = tf.zeros([1, 3, 3, 3])
5860 >>> # draw a box around the image
5861 >>> box = np.array([0, 0, 1, 1])
5862 >>> boxes = box.reshape([1, 1, 4])
5863 >>> # alternate between red and blue
5864 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
5865 >>> tf.image.draw_bounding_boxes(img, boxes, colors)
5866 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=
5867 array([[[[1., 0., 0.],
5868 [1., 0., 0.],
5869 [1., 0., 0.]],
5870 [[1., 0., 0.],
5871 [0., 0., 0.],
5872 [1., 0., 0.]],
5873 [[1., 0., 0.],
5874 [1., 0., 0.],
5875 [1., 0., 0.]]]], dtype=float32)>
5876 """
5877 return draw_bounding_boxes_v2(images, boxes, colors, name)
5880@tf_export('image.generate_bounding_box_proposals')
5881@dispatch.add_dispatch_support
5882def generate_bounding_box_proposals(scores,
5883 bbox_deltas,
5884 image_info,
5885 anchors,
5886 nms_threshold=0.7,
5887 pre_nms_topn=6000,
5888 min_size=16,
5889 post_nms_topn=300,
5890 name=None):
5891 """Generate bounding box proposals from encoded bounding boxes.
5893 Args:
5894 scores: A 4-D float `Tensor` of shape
5895 `[num_images, height, width, num_achors]` containing scores of
5896 the boxes for given anchors, can be unsorted.
5897 bbox_deltas: A 4-D float `Tensor` of shape
5898 `[num_images, height, width, 4 x num_anchors]` encoding boxes
5899 with respect to each anchor. Coordinates are given
5900 in the form `[dy, dx, dh, dw]`.
5901 image_info: A 2-D float `Tensor` of shape `[num_images, 5]`
5902 containing image information Height, Width, Scale.
5903 anchors: A 2-D float `Tensor` of shape `[num_anchors, 4]`
5904 describing the anchor boxes.
5905 Boxes are formatted in the form `[y1, x1, y2, x2]`.
5906 nms_threshold: A scalar float `Tensor` for non-maximal-suppression
5907 threshold. Defaults to 0.7.
5908 pre_nms_topn: A scalar int `Tensor` for the number of
5909 top scoring boxes to be used as input. Defaults to 6000.
5910 min_size: A scalar float `Tensor`. Any box that has a smaller size
5911 than min_size will be discarded. Defaults to 16.
5912 post_nms_topn: An integer. Maximum number of rois in the output.
5913 name: A name for this operation (optional).
5915 Returns:
5916 rois: Region of interest boxes sorted by their scores.
5917 roi_probabilities: scores of the ROI boxes in the ROIs' `Tensor`.
5918 """
5919 return gen_image_ops.generate_bounding_box_proposals(
5920 scores=scores,
5921 bbox_deltas=bbox_deltas,
5922 image_info=image_info,
5923 anchors=anchors,
5924 nms_threshold=nms_threshold,
5925 pre_nms_topn=pre_nms_topn,
5926 min_size=min_size,
5927 post_nms_topn=post_nms_topn,
5928 name=name)