Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/image_ops

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Implementation of image ops."""

17import functools

18import numpy as np

20from tensorflow.python.eager import context

21from tensorflow.python.eager import def_function

22from tensorflow.python.framework import config

23from tensorflow.python.framework import constant_op

24from tensorflow.python.framework import dtypes

25from tensorflow.python.framework import ops

26from tensorflow.python.framework import random_seed

27from tensorflow.python.framework import tensor_shape

28from tensorflow.python.framework import tensor_util

29from tensorflow.python.ops import array_ops

30from tensorflow.python.ops import array_ops_stack

31from tensorflow.python.ops import check_ops

32from tensorflow.python.ops import cond as tf_cond

33from tensorflow.python.ops import control_flow_assert

34from tensorflow.python.ops import control_flow_case

35from tensorflow.python.ops import control_flow_ops

36from tensorflow.python.ops import gen_image_ops

37from tensorflow.python.ops import math_ops

38from tensorflow.python.ops import nn

39from tensorflow.python.ops import nn_ops

40from tensorflow.python.ops import random_ops

41from tensorflow.python.ops import sort_ops

42from tensorflow.python.ops import stateless_random_ops

43from tensorflow.python.ops import string_ops

44from tensorflow.python.ops import variables

45from tensorflow.python.ops import while_loop

46from tensorflow.python.util import deprecation

47from tensorflow.python.util import dispatch

48from tensorflow.python.util.tf_export import tf_export

50ops.NotDifferentiable('RandomCrop')

51# TODO(b/31222613): This op may be differentiable, and there may be

52# latent bugs here.

53ops.NotDifferentiable('HSVToRGB')

54ops.NotDifferentiable('DrawBoundingBoxes')

55ops.NotDifferentiable('SampleDistortedBoundingBox')

56ops.NotDifferentiable('SampleDistortedBoundingBoxV2')

57# TODO(bsteiner): Implement the gradient function for extract_glimpse

58# TODO(b/31222613): This op may be differentiable, and there may be

59# latent bugs here.

60ops.NotDifferentiable('ExtractGlimpse')

61ops.NotDifferentiable('NonMaxSuppression')

62ops.NotDifferentiable('NonMaxSuppressionV2')

63ops.NotDifferentiable('NonMaxSuppressionWithOverlaps')

64ops.NotDifferentiable('GenerateBoundingBoxProposals')

67# pylint: disable=invalid-name

68def _assert(cond, ex_type, msg):

69 """A polymorphic assert, works with tensors and boolean expressions.

71 If `cond` is not a tensor, behave like an ordinary assert statement, except

72 that a empty list is returned. If `cond` is a tensor, return a list

73 containing a single TensorFlow assert op.

75 Args:

76 cond: Something evaluates to a boolean value. May be a tensor.

77 ex_type: The exception class to use.

78 msg: The error message.

80 Returns:

81 A list, containing at most one assert op.

82 """

83 if _is_tensor(cond):

84 return [control_flow_assert.Assert(cond, [msg])]

85 else:

86 if not cond:

87 raise ex_type(msg)

88 else:

89 return []

92def _is_tensor(x):

93 """Returns `True` if `x` is a symbolic tensor-like object.

95 Args:

96 x: A python object to check.

98 Returns:

99 `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.

100 """

101 return isinstance(x, (ops.Tensor, variables.Variable))

102

103

104def _ImageDimensions(image, rank):

105 """Returns the dimensions of an image tensor.

106

107 Args:

108 image: A rank-D Tensor. For 3-D of shape: `[height, width, channels]`.

109 rank: The expected rank of the image

110

111 Returns:

112 A list of corresponding to the dimensions of the

113 input image. Dimensions that are statically known are python integers,

114 otherwise, they are integer scalar tensors.

115 """

116 if image.get_shape().is_fully_defined():

117 return image.get_shape().as_list()

118 else:

119 static_shape = image.get_shape().with_rank(rank).as_list()

120 dynamic_shape = array_ops_stack.unstack(array_ops.shape(image), rank)

121 return [

122 s if s is not None else d for s, d in zip(static_shape, dynamic_shape)

123 ]

124

125

126def _Check3DImage(image, require_static=True):

127 """Assert that we are working with a properly shaped image.

128

129 Args:

130 image: 3-D Tensor of shape [height, width, channels]

131 require_static: If `True`, requires that all dimensions of `image` are known

132 and non-zero.

133

134 Raises:

135 ValueError: if `image.shape` is not a 3-vector.

136

137 Returns:

138 An empty list, if `image` has fully defined dimensions. Otherwise, a list

139 containing an assert op is returned.

140 """

141 try:

142 image_shape = image.get_shape().with_rank(3)

143 except ValueError:

144 raise ValueError("'image' (shape %s) must be three-dimensional." %

145 image.shape)

146 if require_static and not image_shape.is_fully_defined():

147 raise ValueError("'image' (shape %s) must be fully defined." % image_shape)

148 if any(x == 0 for x in image_shape):

149 raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape)

150 if not image_shape.is_fully_defined():

151 return [

152 check_ops.assert_positive(

153 array_ops.shape(image),

154 ["all dims of 'image.shape' "

155 'must be > 0.'])

156 ]

157 else:

158 return []

159

160

161def _Assert3DImage(image):

162 """Assert that we are working with a properly shaped image.

163

164 Performs the check statically if possible (i.e. if the shape

165 is statically known). Otherwise adds a control dependency

166 to an assert op that checks the dynamic shape.

167

168 Args:

169 image: 3-D Tensor of shape [height, width, channels]

170

171 Raises:

172 ValueError: if `image.shape` is not a 3-vector.

173

174 Returns:

175 If the shape of `image` could be verified statically, `image` is

176 returned unchanged, otherwise there will be a control dependency

177 added that asserts the correct dynamic shape.

178 """

179 return control_flow_ops.with_dependencies(

180 _Check3DImage(image, require_static=False), image)

181

182

183def _AssertAtLeast3DImage(image):

184 """Assert that we are working with a properly shaped image.

185

186 Performs the check statically if possible (i.e. if the shape

187 is statically known). Otherwise adds a control dependency

188 to an assert op that checks the dynamic shape.

189

190 Args:

191 image: >= 3-D Tensor of size [*, height, width, depth]

192

193 Raises:

194 ValueError: if image.shape is not a [>= 3] vector.

195

196 Returns:

197 If the shape of `image` could be verified statically, `image` is

198 returned unchanged, otherwise there will be a control dependency

199 added that asserts the correct dynamic shape.

200 """

201 return control_flow_ops.with_dependencies(

202 _CheckAtLeast3DImage(image, require_static=False), image)

203

204

205def _CheckAtLeast3DImage(image, require_static=True):

206 """Assert that we are working with a properly shaped image.

207

208 Args:

209 image: >= 3-D Tensor of size [*, height, width, depth]

210 require_static: If `True`, requires that all dimensions of `image` are known

211 and non-zero.

212

213 Raises:

214 ValueError: if image.shape is not a [>= 3] vector.

215

216 Returns:

217 An empty list, if `image` has fully defined dimensions. Otherwise, a list

218 containing an assert op is returned.

219 """

220 try:

221 if image.get_shape().ndims is None:

222 image_shape = image.get_shape().with_rank(3)

223 else:

224 image_shape = image.get_shape().with_rank_at_least(3)

225 except ValueError:

226 raise ValueError("'image' (shape %s) must be at least three-dimensional." %

227 image.shape)

228 if require_static and not image_shape.is_fully_defined():

229 raise ValueError('\'image\' must be fully defined.')

230 if any(x == 0 for x in image_shape[-3:]):

231 raise ValueError('inner 3 dims of \'image.shape\' must be > 0: %s' %

232 image_shape)

233 if not image_shape[-3:].is_fully_defined():

234 return [

235 check_ops.assert_positive(

236 array_ops.shape(image)[-3:],

237 ["inner 3 dims of 'image.shape' "

238 'must be > 0.']),

239 check_ops.assert_greater_equal(

240 array_ops.rank(image),

241 3,

242 message="'image' must be at least three-dimensional.")

243 ]

244 else:

245 return []

246

247

248def _AssertGrayscaleImage(image):

249 """Assert that we are working with a properly shaped grayscale image.

250

251 Performs the check statically if possible (i.e. if the shape

252 is statically known). Otherwise adds a control dependency

253 to an assert op that checks the dynamic shape.

254

255 Args:

256 image: >= 2-D Tensor of size [*, 1]

257

258 Raises:

259 ValueError: if image.shape is not a [>= 2] vector or if

260 last dimension is not size 1.

261

262 Returns:

263 If the shape of `image` could be verified statically, `image` is

264 returned unchanged, otherwise there will be a control dependency

265 added that asserts the correct dynamic shape.

266 """

267 return control_flow_ops.with_dependencies(

268 _CheckGrayscaleImage(image, require_static=False), image)

269

270

271def _CheckGrayscaleImage(image, require_static=True):

272 """Assert that we are working with properly shaped grayscale image.

273

274 Args:

275 image: >= 2-D Tensor of size [*, 1]

276 require_static: Boolean, whether static shape is required.

277

278 Raises:

279 ValueError: if image.shape is not a [>= 2] vector or if

280 last dimension is not size 1.

281

282 Returns:

283 An empty list, if `image` has fully defined dimensions. Otherwise, a list

284 containing an assert op is returned.

285 """

286 try:

287 if image.get_shape().ndims is None:

288 image_shape = image.get_shape().with_rank(2)

289 else:

290 image_shape = image.get_shape().with_rank_at_least(2)

291 except ValueError:

292 raise ValueError('A grayscale image (shape %s) must be at least '

293 'two-dimensional.' % image.shape)

294 if require_static and not image_shape.is_fully_defined():

295 raise ValueError('\'image\' must be fully defined.')

296 if image_shape.is_fully_defined():

297 if image_shape[-1] != 1:

298 raise ValueError('Last dimension of a grayscale image should be size 1.')

299 if not image_shape.is_fully_defined():

300 return [

301 check_ops.assert_equal(

302 array_ops.shape(image)[-1],

303 1,

304 message='Last dimension of a grayscale image should be size 1.'),

305 check_ops.assert_greater_equal(

306 array_ops.rank(image),

307 3,

308 message='A grayscale image must be at least two-dimensional.')

309 ]

310 else:

311 return []

312

313

314def fix_image_flip_shape(image, result):

315 """Set the shape to 3 dimensional if we don't know anything else.

316

317 Args:

318 image: original image size

319 result: flipped or transformed image

320

321 Returns:

322 An image whose shape is at least (None, None, None).

323 """

324

325 image_shape = image.get_shape()

326 if image_shape == tensor_shape.unknown_shape():

327 result.set_shape([None, None, None])

328 else:

329 result.set_shape(image_shape)

330 return result

331

332

333@tf_export('image.random_flip_up_down')

334@dispatch.add_dispatch_support

335def random_flip_up_down(image, seed=None):

336 """Randomly flips an image vertically (upside down).

337

338 With a 1 in 2 chance, outputs the contents of `image` flipped along the first

339 dimension, which is `height`. Otherwise, output the image as-is.

340 When passing a batch of images, each image will be randomly flipped

341 independent of other images.

342

343 Example usage:

344

345 >>> image = np.array([[[1], [2]], [[3], [4]]])

346 >>> tf.image.random_flip_up_down(image, 3).numpy().tolist()

347 [[[3], [4]], [[1], [2]]]

348

349 Randomly flip multiple images.

350

351 >>> images = np.array(

352 ... [

353 ... [[[1], [2]], [[3], [4]]],

354 ... [[[5], [6]], [[7], [8]]]

355 ... ])

356 >>> tf.image.random_flip_up_down(images, 4).numpy().tolist()

357 [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]]

358

359 For producing deterministic results given a `seed` value, use

360 `tf.image.stateless_random_flip_up_down`. Unlike using the `seed` param

361 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the

362 same results given the same seed independent of how many times the function is

363 called, and independent of global seed settings (e.g. tf.random.set_seed).

364

365 Args:

366 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

367 of shape `[height, width, channels]`.

368 seed: A Python integer. Used to create a random seed. See

369 `tf.compat.v1.set_random_seed` for behavior.

370

371 Returns:

372 A tensor of the same type and shape as `image`.

373 Raises:

374 ValueError: if the shape of `image` not supported.

375 """

376 random_func = functools.partial(random_ops.random_uniform, seed=seed)

377 return _random_flip(image, 0, random_func, 'random_flip_up_down')

378

379

380@tf_export('image.random_flip_left_right')

381@dispatch.add_dispatch_support

382def random_flip_left_right(image, seed=None):

383 """Randomly flip an image horizontally (left to right).

384

385 With a 1 in 2 chance, outputs the contents of `image` flipped along the

386 second dimension, which is `width`. Otherwise output the image as-is.

387 When passing a batch of images, each image will be randomly flipped

388 independent of other images.

389

390 Example usage:

391

392 >>> image = np.array([[[1], [2]], [[3], [4]]])

393 >>> tf.image.random_flip_left_right(image, 5).numpy().tolist()

394 [[[2], [1]], [[4], [3]]]

395

396 Randomly flip multiple images.

397

398 >>> images = np.array(

399 ... [

400 ... [[[1], [2]], [[3], [4]]],

401 ... [[[5], [6]], [[7], [8]]]

402 ... ])

403 >>> tf.image.random_flip_left_right(images, 6).numpy().tolist()

404 [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]]

405

406 For producing deterministic results given a `seed` value, use

407 `tf.image.stateless_random_flip_left_right`. Unlike using the `seed` param

408 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the

409 same results given the same seed independent of how many times the function is

410 called, and independent of global seed settings (e.g. tf.random.set_seed).

411

412 Args:

413 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

414 of shape `[height, width, channels]`.

415 seed: A Python integer. Used to create a random seed. See

416 `tf.compat.v1.set_random_seed` for behavior.

417

418 Returns:

419 A tensor of the same type and shape as `image`.

420

421 Raises:

422 ValueError: if the shape of `image` not supported.

423 """

424 random_func = functools.partial(random_ops.random_uniform, seed=seed)

425 return _random_flip(image, 1, random_func, 'random_flip_left_right')

426

427

428@tf_export('image.stateless_random_flip_left_right', v1=[])

429@dispatch.add_dispatch_support

430def stateless_random_flip_left_right(image, seed):

431 """Randomly flip an image horizontally (left to right) deterministically.

432

433 Guarantees the same results given the same `seed` independent of how many

434 times the function is called, and independent of global seed settings (e.g.

435 `tf.random.set_seed`).

436

437 Example usage:

438

439 >>> image = np.array([[[1], [2]], [[3], [4]]])

440 >>> seed = (2, 3)

441 >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist()

442 [[[2], [1]], [[4], [3]]]

443

444 Args:

445 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

446 of shape `[height, width, channels]`.

447 seed: A shape [2] Tensor, the seed to the random number generator. Must have

448 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)

449

450 Returns:

451 A tensor of the same type and shape as `image`.

452 """

453 random_func = functools.partial(

454 stateless_random_ops.stateless_random_uniform, seed=seed)

455 return _random_flip(

456 image, 1, random_func, 'stateless_random_flip_left_right')

457

458

459@tf_export('image.stateless_random_flip_up_down', v1=[])

460@dispatch.add_dispatch_support

461def stateless_random_flip_up_down(image, seed):

462 """Randomly flip an image vertically (upside down) deterministically.

463

464 Guarantees the same results given the same `seed` independent of how many

465 times the function is called, and independent of global seed settings (e.g.

466 `tf.random.set_seed`).

467

468 Example usage:

469

470 >>> image = np.array([[[1], [2]], [[3], [4]]])

471 >>> seed = (2, 3)

472 >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist()

473 [[[3], [4]], [[1], [2]]]

474

475 Args:

476 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

477 of shape `[height, width, channels]`.

478 seed: A shape [2] Tensor, the seed to the random number generator. Must have

479 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)

480

481 Returns:

482 A tensor of the same type and shape as `image`.

483 """

484 random_func = functools.partial(

485 stateless_random_ops.stateless_random_uniform, seed=seed)

486 return _random_flip(

487 image, 0, random_func, 'stateless_random_flip_up_down')

488

489

490def _random_flip(image, flip_index, random_func, scope_name):

491 """Randomly (50% chance) flip an image along axis `flip_index`.

492

493 Args:

494 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

495 of shape `[height, width, channels]`.

496 flip_index: Dimension along which to flip the image.

497 Vertical is 0, Horizontal is 1.

498 random_func: partial function for calling either stateful or stateless

499 random ops with `seed` parameter specified.

500 scope_name: Name of the scope in which the ops are added.

501

502 Returns:

503 A tensor of the same type and shape as `image`.

504

505 Raises:

506 ValueError: if the shape of `image` not supported.

507 """

508 with ops.name_scope(None, scope_name, [image]) as scope:

509 image = ops.convert_to_tensor(image, name='image')

510 image = _AssertAtLeast3DImage(image)

511 shape = image.get_shape()

512

513 def f_rank3():

514 uniform_random = random_func(shape=[], minval=0, maxval=1.0)

515 mirror_cond = math_ops.less(uniform_random, .5)

516 result = tf_cond.cond(

517 mirror_cond,

518 lambda: array_ops.reverse(image, [flip_index]),

519 lambda: image,

520 name=scope)

521 return fix_image_flip_shape(image, result)

522

523 def f_rank4():

524 batch_size = array_ops.shape(image)[0]

525 uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0)

526 flips = math_ops.round(

527 array_ops.reshape(uniform_random, [batch_size, 1, 1, 1]))

528 flips = math_ops.cast(flips, image.dtype)

529 flipped_input = array_ops.reverse(image, [flip_index + 1])

530 return flips * flipped_input + (1 - flips) * image

531

532 if shape.ndims is None:

533 rank = array_ops.rank(image)

534 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)

535 if shape.ndims == 3:

536 return f_rank3()

537 elif shape.ndims == 4:

538 return f_rank4()

539 else:

540 raise ValueError(

541 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)

542

543

544@tf_export('image.flip_left_right')

545@dispatch.add_dispatch_support

546def flip_left_right(image):

547 """Flip an image horizontally (left to right).

548

549 Outputs the contents of `image` flipped along the width dimension.

550

551 See also `tf.reverse`.

552

553 Usage Example:

554

555 >>> x = [[[1.0, 2.0, 3.0],

556 ... [4.0, 5.0, 6.0]],

557 ... [[7.0, 8.0, 9.0],

558 ... [10.0, 11.0, 12.0]]]

559 >>> tf.image.flip_left_right(x)

560 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

561 array([[[ 4., 5., 6.],

562 [ 1., 2., 3.]],

563 [[10., 11., 12.],

564 [ 7., 8., 9.]]], dtype=float32)>

565

566 Args:

567 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

568 of shape `[height, width, channels]`.

569

570 Returns:

571 A tensor of the same type and shape as `image`.

572

573 Raises:

574 ValueError: if the shape of `image` not supported.

575 """

576 return _flip(image, 1, 'flip_left_right')

577

578

579@tf_export('image.flip_up_down')

580@dispatch.add_dispatch_support

581def flip_up_down(image):

582 """Flip an image vertically (upside down).

583

584 Outputs the contents of `image` flipped along the height dimension.

585

586 See also `reverse()`.

587

588 Usage Example:

589

590 >>> x = [[[1.0, 2.0, 3.0],

591 ... [4.0, 5.0, 6.0]],

592 ... [[7.0, 8.0, 9.0],

593 ... [10.0, 11.0, 12.0]]]

594 >>> tf.image.flip_up_down(x)

595 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

596 array([[[ 7., 8., 9.],

597 [10., 11., 12.]],

598 [[ 1., 2., 3.],

599 [ 4., 5., 6.]]], dtype=float32)>

600

601 Args:

602 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

603 of shape `[height, width, channels]`.

604

605 Returns:

606 A `Tensor` of the same type and shape as `image`.

607

608 Raises:

609 ValueError: if the shape of `image` not supported.

610 """

611 return _flip(image, 0, 'flip_up_down')

612

613

614def _flip(image, flip_index, scope_name):

615 """Flip an image either horizontally or vertically.

616

617 Outputs the contents of `image` flipped along the dimension `flip_index`.

618

619 See also `reverse()`.

620

621 Args:

622 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

623 of shape `[height, width, channels]`.

624 flip_index: 0 For vertical, 1 for horizontal.

625 scope_name: string, scope name.

626

627 Returns:

628 A `Tensor` of the same type and shape as `image`.

629

630 Raises:

631 ValueError: if the shape of `image` not supported.

632 """

633 with ops.name_scope(None, scope_name, [image]):

634 image = ops.convert_to_tensor(image, name='image')

635 image = _AssertAtLeast3DImage(image)

636 shape = image.get_shape()

637

638 def f_rank3():

639 return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index]))

640

641 def f_rank4():

642 return array_ops.reverse(image, [flip_index + 1])

643

644 if shape.ndims is None:

645 rank = array_ops.rank(image)

646 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)

647 elif shape.ndims == 3:

648 return f_rank3()

649 elif shape.ndims == 4:

650 return f_rank4()

651 else:

652 raise ValueError(

653 '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape)

654

655

656@tf_export('image.rot90')

657@dispatch.add_dispatch_support

658def rot90(image, k=1, name=None):

659 """Rotate image(s) by 90 degrees.

660

661

662 For example:

663

664 >>> a=tf.constant([[[1],[2]],

665 ... [[3],[4]]])

666 >>> # rotating `a` counter clockwise by 90 degrees

667 >>> a_rot=tf.image.rot90(a)

668 >>> print(a_rot[...,0].numpy())

669 [[2 4]

670 [1 3]]

671 >>> # rotating `a` counter clockwise by 270 degrees

672 >>> a_rot=tf.image.rot90(a, k=3)

673 >>> print(a_rot[...,0].numpy())

674 [[3 1]

675 [4 2]]

676 >>> # rotating `a` clockwise by 180 degrees

677 >>> a_rot=tf.image.rot90(a, k=-2)

678 >>> print(a_rot[...,0].numpy())

679 [[4 3]

680 [2 1]]

681

682 Args:

683 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

684 of shape `[height, width, channels]`.

685 k: A scalar integer tensor. The number of times the image(s) are rotated by

686 90 degrees.

687 name: A name for this operation (optional).

688

689 Returns:

690 A rotated tensor of the same type and shape as `image`.

691

692 Raises:

693 ValueError: if the shape of `image` not supported.

694 """

695 with ops.name_scope(name, 'rot90', [image, k]) as scope:

696 image = ops.convert_to_tensor(image, name='image')

697 image = _AssertAtLeast3DImage(image)

698 k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k')

699 k.get_shape().assert_has_rank(0)

700 k = math_ops.mod(k, 4)

701

702 shape = image.get_shape()

703 if shape.ndims is None:

704 rank = array_ops.rank(image)

705

706 def f_rank3():

707 return _rot90_3D(image, k, scope)

708

709 def f_rank4():

710 return _rot90_4D(image, k, scope)

711

712 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)

713 elif shape.ndims == 3:

714 return _rot90_3D(image, k, scope)

715 elif shape.ndims == 4:

716 return _rot90_4D(image, k, scope)

717 else:

718 raise ValueError(

719 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)

720

721

722def _rot90_3D(image, k, name_scope):

723 """Rotate image counter-clockwise by 90 degrees `k` times.

724

725 Args:

726 image: 3-D Tensor of shape `[height, width, channels]`.

727 k: A scalar integer. The number of times the image is rotated by 90 degrees.

728 name_scope: A valid TensorFlow name scope.

729

730 Returns:

731 A 3-D tensor of the same type and shape as `image`.

732

733 """

734

735 def _rot90():

736 return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2])

737

738 def _rot180():

739 return array_ops.reverse_v2(image, [0, 1])

740

741 def _rot270():

742 return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1])

743

744 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),

745 (math_ops.equal(k, 3), _rot270)]

746

747 result = control_flow_case.case(

748 cases, default=lambda: image, exclusive=True, name=name_scope)

749 result.set_shape([None, None, image.get_shape()[2]])

750 return result

751

752

753def _rot90_4D(images, k, name_scope):

754 """Rotate batch of images counter-clockwise by 90 degrees `k` times.

755

756 Args:

757 images: 4-D Tensor of shape `[height, width, channels]`.

758 k: A scalar integer. The number of times the images are rotated by 90

759 degrees.

760 name_scope: A valid TensorFlow name scope.

761

762 Returns:

763 A 4-D `Tensor` of the same type and shape as `images`.

764 """

765

766 def _rot90():

767 return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3])

768

769 def _rot180():

770 return array_ops.reverse_v2(images, [1, 2])

771

772 def _rot270():

773 return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2])

774

775 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),

776 (math_ops.equal(k, 3), _rot270)]

777

778 result = control_flow_case.case(

779 cases, default=lambda: images, exclusive=True, name=name_scope)

780 shape = result.get_shape()

781 result.set_shape([shape[0], None, None, shape[3]])

782 return result

783

784

785@tf_export('image.transpose', v1=['image.transpose', 'image.transpose_image'])

786@dispatch.add_dispatch_support

787def transpose(image, name=None):

788 """Transpose image(s) by swapping the height and width dimension.

789

790 Usage Example:

791

792 >>> x = [[[1.0, 2.0, 3.0],

793 ... [4.0, 5.0, 6.0]],

794 ... [[7.0, 8.0, 9.0],

795 ... [10.0, 11.0, 12.0]]]

796 >>> tf.image.transpose(x)

797 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

798 array([[[ 1., 2., 3.],

799 [ 7., 8., 9.]],

800 [[ 4., 5., 6.],

801 [10., 11., 12.]]], dtype=float32)>

802

803 Args:

804 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

805 of shape `[height, width, channels]`.

806 name: A name for this operation (optional).

807

808 Returns:

809 If `image` was 4-D, a 4-D float Tensor of shape

810 `[batch, width, height, channels]`

811 If `image` was 3-D, a 3-D float Tensor of shape

812 `[width, height, channels]`

813

814 Raises:

815 ValueError: if the shape of `image` not supported.

816

817 Usage Example:

818

819 >>> image = [[[1, 2], [3, 4]],

820 ... [[5, 6], [7, 8]],

821 ... [[9, 10], [11, 12]]]

822 >>> image = tf.constant(image)

823 >>> tf.image.transpose(image)

824 <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy=

825 array([[[ 1, 2],

826 [ 5, 6],

827 [ 9, 10]],

828 [[ 3, 4],

829 [ 7, 8],

830 [11, 12]]], dtype=int32)>

831 """

832 with ops.name_scope(name, 'transpose', [image]):

833 image = ops.convert_to_tensor(image, name='image')

834 image = _AssertAtLeast3DImage(image)

835 shape = image.get_shape()

836 if shape.ndims is None:

837 rank = array_ops.rank(image)

838

839 def f_rank3():

840 return array_ops.transpose(image, [1, 0, 2], name=name)

841

842 def f_rank4():

843 return array_ops.transpose(image, [0, 2, 1, 3], name=name)

844

845 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)

846 elif shape.ndims == 3:

847 return array_ops.transpose(image, [1, 0, 2], name=name)

848 elif shape.ndims == 4:

849 return array_ops.transpose(image, [0, 2, 1, 3], name=name)

850 else:

851 raise ValueError(

852 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)

853

854

855@tf_export('image.central_crop')

856@dispatch.add_dispatch_support

857def central_crop(image, central_fraction):

858 """Crop the central region of the image(s).

859

860 Remove the outer parts of an image but retain the central region of the image

861 along each dimension. If we specify `central_fraction = 0.5`, this function

862 returns the region marked with "X" in the below diagram. The larger the value

863 of `central_fraction`, the larger the dimension of the region to be cropped

864 and retained.

865

866 --------

867 | |

868 | XXXX |

869 | XXXX |

870 | | where "X" is the central 50% of the image.

871 --------

872

873 This function works on either a single image (`image` is a 3-D Tensor), or a

874 batch of images (`image` is a 4-D Tensor).

875

876 Usage Example:

877

878 >>> x = [[[1.0, 2.0, 3.0],

879 ... [4.0, 5.0, 6.0],

880 ... [7.0, 8.0, 9.0],

881 ... [10.0, 11.0, 12.0]],

882 ... [[13.0, 14.0, 15.0],

883 ... [16.0, 17.0, 18.0],

884 ... [19.0, 20.0, 21.0],

885 ... [22.0, 23.0, 24.0]],

886 ... [[25.0, 26.0, 27.0],

887 ... [28.0, 29.0, 30.0],

888 ... [31.0, 32.0, 33.0],

889 ... [34.0, 35.0, 36.0]],

890 ... [[37.0, 38.0, 39.0],

891 ... [40.0, 41.0, 42.0],

892 ... [43.0, 44.0, 45.0],

893 ... [46.0, 47.0, 48.0]]]

894 >>> tf.image.central_crop(x, 0.5)

895 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

896 array([[[16., 17., 18.],

897 [19., 20., 21.]],

898 [[28., 29., 30.],

899 [31., 32., 33.]]], dtype=float32)>

900

901 Args:

902 image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D

903 Tensor of shape [batch_size, height, width, depth].

904 central_fraction: float (0, 1], fraction of size to crop

905

906 Raises:

907 ValueError: if central_crop_fraction is not within (0, 1].

908

909 Returns:

910 3-D / 4-D float Tensor, as per the input.

911 """

912 with ops.name_scope(None, 'central_crop', [image]):

913 image = ops.convert_to_tensor(image, name='image')

914 central_fraction_static = tensor_util.constant_value(central_fraction)

915 if central_fraction_static is not None:

916 if central_fraction_static <= 0.0 or central_fraction_static > 1.0:

917 raise ValueError('central_fraction must be within (0, 1]')

918 if central_fraction_static == 1.0:

919 return image

920 else:

921 assert_ops = _assert(

922 math_ops.logical_or(central_fraction > 0.0, central_fraction <= 1.0),

923 ValueError, 'central_fraction must be within (0, 1]')

924 image = control_flow_ops.with_dependencies(assert_ops, image)

925

926 _AssertAtLeast3DImage(image)

927 rank = image.get_shape().ndims

928 if rank != 3 and rank != 4:

929 raise ValueError('`image` should either be a Tensor with rank = 3 or '

930 'rank = 4. Had rank = {}.'.format(rank))

931

932 # Helper method to return the `idx`-th dimension of `tensor`, along with

933 # a boolean signifying if the dimension is dynamic.

934 def _get_dim(tensor, idx):

935 static_shape = tensor.get_shape().dims[idx].value

936 if static_shape is not None:

937 return static_shape, False

938 return array_ops.shape(tensor)[idx], True

939

940 # Get the height, width, depth (and batch size, if the image is a 4-D

941 # tensor).

942 if rank == 3:

943 img_h, dynamic_h = _get_dim(image, 0)

944 img_w, dynamic_w = _get_dim(image, 1)

945 img_d = image.get_shape()[2]

946 else:

947 img_bs = image.get_shape()[0]

948 img_h, dynamic_h = _get_dim(image, 1)

949 img_w, dynamic_w = _get_dim(image, 2)

950 img_d = image.get_shape()[3]

951

952 dynamic_h = dynamic_h or (central_fraction_static is None)

953 dynamic_w = dynamic_w or (central_fraction_static is None)

954

955 # Compute the bounding boxes for the crop. The type and value of the

956 # bounding boxes depend on the `image` tensor's rank and whether / not the

957 # dimensions are statically defined.

958 if dynamic_h:

959 img_hd = math_ops.cast(img_h, dtypes.float64)

960 bbox_h_start = math_ops.cast(

961 (img_hd - img_hd * math_ops.cast(central_fraction, dtypes.float64)) /

962 2, dtypes.int32)

963 else:

964 img_hd = float(img_h)

965 bbox_h_start = int((img_hd - img_hd * central_fraction_static) / 2)

966

967 if dynamic_w:

968 img_wd = math_ops.cast(img_w, dtypes.float64)

969 bbox_w_start = math_ops.cast(

970 (img_wd - img_wd * math_ops.cast(central_fraction, dtypes.float64)) /

971 2, dtypes.int32)

972 else:

973 img_wd = float(img_w)

974 bbox_w_start = int((img_wd - img_wd * central_fraction_static) / 2)

975

976 bbox_h_size = img_h - bbox_h_start * 2

977 bbox_w_size = img_w - bbox_w_start * 2

978

979 if rank == 3:

980 bbox_begin = array_ops_stack.stack([bbox_h_start, bbox_w_start, 0])

981 bbox_size = array_ops_stack.stack([bbox_h_size, bbox_w_size, -1])

982 else:

983 bbox_begin = array_ops_stack.stack([0, bbox_h_start, bbox_w_start, 0])

984 bbox_size = array_ops_stack.stack([-1, bbox_h_size, bbox_w_size, -1])

985

986 image = array_ops.slice(image, bbox_begin, bbox_size)

987

988 # Reshape the `image` tensor to the desired size.

989 if rank == 3:

990 image.set_shape([

991 None if dynamic_h else bbox_h_size,

992 None if dynamic_w else bbox_w_size, img_d

993 ])

994 else:

995 image.set_shape([

996 img_bs, None if dynamic_h else bbox_h_size,

997 None if dynamic_w else bbox_w_size, img_d

998 ])

999 return image

1000

1001

1002@tf_export('image.pad_to_bounding_box')

1003@dispatch.add_dispatch_support

1004def pad_to_bounding_box(image, offset_height, offset_width, target_height,

1005 target_width):

1006 """Pad `image` with zeros to the specified `height` and `width`.

1007

1008 Adds `offset_height` rows of zeros on top, `offset_width` columns of

1009 zeros on the left, and then pads the image on the bottom and right

1010 with zeros until it has dimensions `target_height`, `target_width`.

1011

1012 This op does nothing if `offset_*` is zero and the image already has size

1013 `target_height` by `target_width`.

1014

1015 Usage Example:

1016

1017 >>> x = [[[1., 2., 3.],

1018 ... [4., 5., 6.]],

1019 ... [[7., 8., 9.],

1020 ... [10., 11., 12.]]]

1021 >>> padded_image = tf.image.pad_to_bounding_box(x, 1, 1, 4, 4)

1022 >>> padded_image

1023 <tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy=

1024 array([[[ 0., 0., 0.],

1025 [ 0., 0., 0.],

1026 [ 0., 0., 0.],

1027 [ 0., 0., 0.]],

1028 [[ 0., 0., 0.],

1029 [ 1., 2., 3.],

1030 [ 4., 5., 6.],

1031 [ 0., 0., 0.]],

1032 [[ 0., 0., 0.],

1033 [ 7., 8., 9.],

1034 [10., 11., 12.],

1035 [ 0., 0., 0.]],

1036 [[ 0., 0., 0.],

1037 [ 0., 0., 0.],

1038 [ 0., 0., 0.],

1039 [ 0., 0., 0.]]], dtype=float32)>

1040

1041 Args:

1042 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

1043 of shape `[height, width, channels]`.

1044 offset_height: Number of rows of zeros to add on top.

1045 offset_width: Number of columns of zeros to add on the left.

1046 target_height: Height of output image.

1047 target_width: Width of output image.

1048

1049 Returns:

1050 If `image` was 4-D, a 4-D float Tensor of shape

1051 `[batch, target_height, target_width, channels]`

1052 If `image` was 3-D, a 3-D float Tensor of shape

1053 `[target_height, target_width, channels]`

1054

1055 Raises:

1056 ValueError: If the shape of `image` is incompatible with the `offset_*` or

1057 `target_*` arguments, or either `offset_height` or `offset_width` is

1058 negative.

1059 """

1060 return pad_to_bounding_box_internal(

1061 image,

1062 offset_height,

1063 offset_width,

1064 target_height,

1065 target_width,

1066 check_dims=True)

1067

1068

1069# TODO(b/190099338) Remove this internal method and remap call sites to call

1070# image_ops.pad_to_bounding_box when asserts are no longer serialized. See also

1071# b/204377079#comment6 for more context.

1072def pad_to_bounding_box_internal(image, offset_height, offset_width,

1073 target_height, target_width, check_dims):

1074 """Pad `image` with zeros to the specified `height` and `width`.

1075

1076 Adds `offset_height` rows of zeros on top, `offset_width` columns of

1077 zeros on the left, and then pads the image on the bottom and right

1078 with zeros until it has dimensions `target_height`, `target_width`.

1079

1080 This op does nothing if `offset_*` is zero and the image already has size

1081 `target_height` by `target_width`.

1082

1083 Args:

1084 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

1085 of shape `[height, width, channels]`.

1086 offset_height: Number of rows of zeros to add on top.

1087 offset_width: Number of columns of zeros to add on the left.

1088 target_height: Height of output image.

1089 target_width: Width of output image.

1090 check_dims: If True, assert that dimensions are non-negative and in range.

1091 In multi-GPU distributed settings, assertions can cause program slowdown.

1092 Setting this parameter to `False` avoids this, resulting in faster speed

1093 in some situations, with the tradeoff being that some error checking is

1094 not happening.

1095

1096 Returns:

1097 If `image` was 4-D, a 4-D float Tensor of shape

1098 `[batch, target_height, target_width, channels]`

1099 If `image` was 3-D, a 3-D float Tensor of shape

1100 `[target_height, target_width, channels]`

1101

1102 Raises:

1103 ValueError: If the shape of `image` is incompatible with the `offset_*` or

1104 `target_*` arguments, or either `offset_height` or `offset_width` is

1105 negative. Not raised if `check_dims` is `False`.

1106 """

1107 with ops.name_scope(None, 'pad_to_bounding_box', [image]):

1108 image = ops.convert_to_tensor(image, name='image')

1109

1110 is_batch = True

1111 image_shape = image.get_shape()

1112 if image_shape.ndims == 3:

1113 is_batch = False

1114 image = array_ops.expand_dims(image, 0)

1115 elif image_shape.ndims is None:

1116 is_batch = False

1117 image = array_ops.expand_dims(image, 0)

1118 image.set_shape([None] * 4)

1119 elif image_shape.ndims != 4:

1120 raise ValueError(

1121 '\'image\' (shape %s) must have either 3 or 4 dimensions.' %

1122 image_shape)

1123

1124 batch, height, width, depth = _ImageDimensions(image, rank=4)

1125

1126 after_padding_width = target_width - offset_width - width

1127

1128 after_padding_height = target_height - offset_height - height

1129

1130 if check_dims:

1131 assert_ops = _CheckAtLeast3DImage(image, require_static=False)

1132 assert_ops += _assert(offset_height >= 0, ValueError,

1133 'offset_height must be >= 0')

1134 assert_ops += _assert(offset_width >= 0, ValueError,

1135 'offset_width must be >= 0')

1136 assert_ops += _assert(after_padding_width >= 0, ValueError,

1137 'width must be <= target - offset')

1138 assert_ops += _assert(after_padding_height >= 0, ValueError,

1139 'height must be <= target - offset')

1140 image = control_flow_ops.with_dependencies(assert_ops, image)

1141

1142 # Do not pad on the depth dimensions.

1143 paddings = array_ops.reshape(

1144 array_ops_stack.stack([

1145 0, 0, offset_height, after_padding_height, offset_width,

1146 after_padding_width, 0, 0

1147 ]), [4, 2])

1148 padded = array_ops.pad(image, paddings)

1149

1150 padded_shape = [

1151 None if _is_tensor(i) else i

1152 for i in [batch, target_height, target_width, depth]

1153 ]

1154 padded.set_shape(padded_shape)

1155

1156 if not is_batch:

1157 padded = array_ops.squeeze(padded, axis=[0])

1158

1159 return padded

1160

1161

1162@tf_export('image.crop_to_bounding_box')

1163@dispatch.add_dispatch_support

1164def crop_to_bounding_box(image, offset_height, offset_width, target_height,

1165 target_width):

1166 """Crops an `image` to a specified bounding box.

1167

1168 This op cuts a rectangular bounding box out of `image`. The top-left corner

1169 of the bounding box is at `offset_height, offset_width` in `image`, and the

1170 lower-right corner is at

1171 `offset_height + target_height, offset_width + target_width`.

1172

1173 Example Usage:

1174

1175 >>> image = tf.constant(np.arange(1, 28, dtype=np.float32), shape=[3, 3, 3])

1176 >>> image[:,:,0] # print the first channel of the 3-D tensor

1177 <tf.Tensor: shape=(3, 3), dtype=float32, numpy=

1178 array([[ 1., 4., 7.],

1179 [10., 13., 16.],

1180 [19., 22., 25.]], dtype=float32)>

1181 >>> cropped_image = tf.image.crop_to_bounding_box(image, 0, 0, 2, 2)

1182 >>> cropped_image[:,:,0] # print the first channel of the cropped 3-D tensor

1183 <tf.Tensor: shape=(2, 2), dtype=float32, numpy=

1184 array([[ 1., 4.],

1185 [10., 13.]], dtype=float32)>

1186

1187 Args:

1188 image: 4-D `Tensor` of shape `[batch, height, width, channels]` or 3-D

1189 `Tensor` of shape `[height, width, channels]`.

1190 offset_height: Vertical coordinate of the top-left corner of the bounding

1191 box in `image`.

1192 offset_width: Horizontal coordinate of the top-left corner of the bounding

1193 box in `image`.

1194 target_height: Height of the bounding box.

1195 target_width: Width of the bounding box.

1196

1197 Returns:

1198 If `image` was 4-D, a 4-D `Tensor` of shape

1199 `[batch, target_height, target_width, channels]`.

1200 If `image` was 3-D, a 3-D `Tensor` of shape

1201 `[target_height, target_width, channels]`.

1202 It has the same dtype with `image`.

1203

1204 Raises:

1205 ValueError: `image` is not a 3-D or 4-D `Tensor`.

1206 ValueError: `offset_width < 0` or `offset_height < 0`.

1207 ValueError: `target_width <= 0` or `target_height <= 0`.

1208 ValueError: `width < offset_width + target_width` or

1209 `height < offset_height + target_height`.

1210 """

1211 with ops.name_scope(None, 'crop_to_bounding_box', [image]):

1212 image = ops.convert_to_tensor(image, name='image')

1213

1214 is_batch = True

1215 image_shape = image.get_shape()

1216 if image_shape.ndims == 3:

1217 is_batch = False

1218 image = array_ops.expand_dims(image, 0)

1219 elif image_shape.ndims is None:

1220 is_batch = False

1221 image = array_ops.expand_dims(image, 0)

1222 image.set_shape([None] * 4)

1223 elif image_shape.ndims != 4:

1224 raise ValueError(

1225 '\'image\' (shape %s) must have either 3 or 4 dimensions.' %

1226 image_shape)

1227

1228 assert_ops = _CheckAtLeast3DImage(image, require_static=False)

1229

1230 batch, height, width, depth = _ImageDimensions(image, rank=4)

1231

1232 assert_ops += _assert(offset_width >= 0, ValueError,

1233 'offset_width must be >= 0.')

1234 assert_ops += _assert(offset_height >= 0, ValueError,

1235 'offset_height must be >= 0.')

1236 assert_ops += _assert(target_width > 0, ValueError,

1237 'target_width must be > 0.')

1238 assert_ops += _assert(target_height > 0, ValueError,

1239 'target_height must be > 0.')

1240 assert_ops += _assert(width >= (target_width + offset_width), ValueError,

1241 'width must be >= target + offset.')

1242 assert_ops += _assert(height >= (target_height + offset_height), ValueError,

1243 'height must be >= target + offset.')

1244 image = control_flow_ops.with_dependencies(assert_ops, image)

1245

1246 cropped = array_ops.slice(

1247 image,

1248 array_ops_stack.stack([0, offset_height, offset_width, 0]),

1249 array_ops_stack.stack([

1250 array_ops.shape(image)[0],

1251 target_height,

1252 target_width,

1253 array_ops.shape(image)[3]]))

1254

1255 cropped_shape = [

1256 None if _is_tensor(i) else i

1257 for i in [batch, target_height, target_width, depth]

1258 ]

1259 cropped.set_shape(cropped_shape)

1260

1261 if not is_batch:

1262 cropped = array_ops.squeeze(cropped, axis=[0])

1263

1264 return cropped

1265

1266

1267@tf_export(

1268 'image.resize_with_crop_or_pad',

1269 v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad'])

1270@dispatch.add_dispatch_support

1271def resize_image_with_crop_or_pad(image, target_height, target_width):

1272 """Crops and/or pads an image to a target width and height.

1273

1274 Resizes an image to a target width and height by either centrally

1275 cropping the image or padding it evenly with zeros.

1276

1277 If `width` or `height` is greater than the specified `target_width` or

1278 `target_height` respectively, this op centrally crops along that dimension.

1279

1280 For example:

1281

1282 >>> image = np.arange(75).reshape(5, 5, 3) # create 3-D image input

1283 >>> image[:,:,0] # print first channel just for demo purposes

1284 array([[ 0, 3, 6, 9, 12],

1285 [15, 18, 21, 24, 27],

1286 [30, 33, 36, 39, 42],

1287 [45, 48, 51, 54, 57],

1288 [60, 63, 66, 69, 72]])

1289 >>> image = tf.image.resize_with_crop_or_pad(image, 3, 3) # crop

1290 >>> # print first channel for demo purposes; centrally cropped output

1291 >>> image[:,:,0]

1292 <tf.Tensor: shape=(3, 3), dtype=int64, numpy=

1293 array([[18, 21, 24],

1294 [33, 36, 39],

1295 [48, 51, 54]])>

1296

1297 If `width` or `height` is smaller than the specified `target_width` or

1298 `target_height` respectively, this op centrally pads with 0 along that

1299 dimension.

1300

1301 For example:

1302

1303 >>> image = np.arange(1, 28).reshape(3, 3, 3) # create 3-D image input

1304 >>> image[:,:,0] # print first channel just for demo purposes

1305 array([[ 1, 4, 7],

1306 [10, 13, 16],

1307 [19, 22, 25]])

1308 >>> image = tf.image.resize_with_crop_or_pad(image, 5, 5) # pad

1309 >>> # print first channel for demo purposes; we should see 0 paddings

1310 >>> image[:,:,0]

1311 <tf.Tensor: shape=(5, 5), dtype=int64, numpy=

1312 array([[ 0, 0, 0, 0, 0],

1313 [ 0, 1, 4, 7, 0],

1314 [ 0, 10, 13, 16, 0],

1315 [ 0, 19, 22, 25, 0],

1316 [ 0, 0, 0, 0, 0]])>

1317

1318 Args:

1319 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

1320 of shape `[height, width, channels]`.

1321 target_height: Target height.

1322 target_width: Target width.

1323

1324 Raises:

1325 ValueError: if `target_height` or `target_width` are zero or negative.

1326

1327 Returns:

1328 Cropped and/or padded image.

1329 If `images` was 4-D, a 4-D float Tensor of shape

1330 `[batch, new_height, new_width, channels]`.

1331 If `images` was 3-D, a 3-D float Tensor of shape

1332 `[new_height, new_width, channels]`.

1333 """

1334 with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]):

1335 image = ops.convert_to_tensor(image, name='image')

1336 image_shape = image.get_shape()

1337 is_batch = True

1338 if image_shape.ndims == 3:

1339 is_batch = False

1340 image = array_ops.expand_dims(image, 0)

1341 elif image_shape.ndims is None:

1342 is_batch = False

1343 image = array_ops.expand_dims(image, 0)

1344 image.set_shape([None] * 4)

1345 elif image_shape.ndims != 4:

1346 raise ValueError(

1347 '\'image\' (shape %s) must have either 3 or 4 dimensions.' %

1348 image_shape)

1349

1350 assert_ops = _CheckAtLeast3DImage(image, require_static=False)

1351 assert_ops += _assert(target_width > 0, ValueError,

1352 'target_width must be > 0.')

1353 assert_ops += _assert(target_height > 0, ValueError,

1354 'target_height must be > 0.')

1355

1356 image = control_flow_ops.with_dependencies(assert_ops, image)

1357 # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.

1358 # Make sure our checks come first, so that error messages are clearer.

1359 if _is_tensor(target_height):

1360 target_height = control_flow_ops.with_dependencies(

1361 assert_ops, target_height)

1362 if _is_tensor(target_width):

1363 target_width = control_flow_ops.with_dependencies(assert_ops,

1364 target_width)

1365

1366 def max_(x, y):

1367 if _is_tensor(x) or _is_tensor(y):

1368 return math_ops.maximum(x, y)

1369 else:

1370 return max(x, y)

1371

1372 def min_(x, y):

1373 if _is_tensor(x) or _is_tensor(y):

1374 return math_ops.minimum(x, y)

1375 else:

1376 return min(x, y)

1377

1378 def equal_(x, y):

1379 if _is_tensor(x) or _is_tensor(y):

1380 return math_ops.equal(x, y)

1381 else:

1382 return x == y

1383

1384 _, height, width, _ = _ImageDimensions(image, rank=4)

1385 width_diff = target_width - width

1386 offset_crop_width = max_(-width_diff // 2, 0)

1387 offset_pad_width = max_(width_diff // 2, 0)

1388

1389 height_diff = target_height - height

1390 offset_crop_height = max_(-height_diff // 2, 0)

1391 offset_pad_height = max_(height_diff // 2, 0)

1392

1393 # Maybe crop if needed.

1394 cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width,

1395 min_(target_height, height),

1396 min_(target_width, width))

1397

1398 # Maybe pad if needed.

1399 resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,

1400 target_height, target_width)

1401

1402 # In theory all the checks below are redundant.

1403 if resized.get_shape().ndims is None:

1404 raise ValueError('resized contains no shape.')

1405

1406 _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4)

1407

1408 assert_ops = []

1409 assert_ops += _assert(

1410 equal_(resized_height, target_height), ValueError,

1411 'resized height is not correct.')

1412 assert_ops += _assert(

1413 equal_(resized_width, target_width), ValueError,

1414 'resized width is not correct.')

1415

1416 resized = control_flow_ops.with_dependencies(assert_ops, resized)

1417

1418 if not is_batch:

1419 resized = array_ops.squeeze(resized, axis=[0])

1420

1421 return resized

1422

1423

1424@tf_export(v1=['image.ResizeMethod'])

1425class ResizeMethodV1:

1426 """See `v1.image.resize` for details."""

1427 BILINEAR = 0

1428 NEAREST_NEIGHBOR = 1

1429 BICUBIC = 2

1430 AREA = 3

1431

1432

1433@tf_export('image.ResizeMethod', v1=[])

1434class ResizeMethod:

1435 """See `tf.image.resize` for details."""

1436 BILINEAR = 'bilinear'

1437 NEAREST_NEIGHBOR = 'nearest'

1438 BICUBIC = 'bicubic'

1439 AREA = 'area'

1440 LANCZOS3 = 'lanczos3'

1441 LANCZOS5 = 'lanczos5'

1442 GAUSSIAN = 'gaussian'

1443 MITCHELLCUBIC = 'mitchellcubic'

1444

1445

1446def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name,

1447 skip_resize_if_same):

1448 """Core functionality for v1 and v2 resize functions."""

1449 with ops.name_scope(name, 'resize', [images, size]):

1450 images = ops.convert_to_tensor(images, name='images')

1451 if images.get_shape().ndims is None:

1452 raise ValueError('\'images\' contains no shape.')

1453 # TODO(shlens): Migrate this functionality to the underlying Op's.

1454 is_batch = True

1455 if images.get_shape().ndims == 3:

1456 is_batch = False

1457 images = array_ops.expand_dims(images, 0)

1458 elif images.get_shape().ndims != 4:

1459 raise ValueError('\'images\' must have either 3 or 4 dimensions.')

1460

1461 _, height, width, _ = images.get_shape().as_list()

1462

1463 try:

1464 size = ops.convert_to_tensor(size, dtypes.int32, name='size')

1465 except (TypeError, ValueError):

1466 raise ValueError('\'size\' must be a 1-D int32 Tensor')

1467 if not size.get_shape().is_compatible_with([2]):

1468 raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '

1469 'new_height, new_width')

1470

1471 if preserve_aspect_ratio:

1472 # Get the current shapes of the image, even if dynamic.

1473 _, current_height, current_width, _ = _ImageDimensions(images, rank=4)

1474

1475 # do the computation to find the right scale and height/width.

1476 scale_factor_height = (

1477 math_ops.cast(size[0], dtypes.float32) /

1478 math_ops.cast(current_height, dtypes.float32))

1479 scale_factor_width = (

1480 math_ops.cast(size[1], dtypes.float32) /

1481 math_ops.cast(current_width, dtypes.float32))

1482 scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width)

1483 scaled_height_const = math_ops.cast(

1484 math_ops.round(scale_factor *

1485 math_ops.cast(current_height, dtypes.float32)),

1486 dtypes.int32)

1487 scaled_width_const = math_ops.cast(

1488 math_ops.round(scale_factor *

1489 math_ops.cast(current_width, dtypes.float32)),

1490 dtypes.int32)

1491

1492 # NOTE: Reset the size and other constants used later.

1493 size = ops.convert_to_tensor([scaled_height_const, scaled_width_const],

1494 dtypes.int32,

1495 name='size')

1496

1497 size_const_as_shape = tensor_util.constant_value_as_shape(size)

1498 new_height_const = tensor_shape.dimension_at_index(size_const_as_shape,

1499 0).value

1500 new_width_const = tensor_shape.dimension_at_index(size_const_as_shape,

1501 1).value

1502

1503 # If we can determine that the height and width will be unmodified by this

1504 # transformation, we avoid performing the resize.

1505 if skip_resize_if_same and all(

1506 x is not None

1507 for x in [new_width_const, width, new_height_const, height]) and (

1508 width == new_width_const and height == new_height_const):

1509 if not is_batch:

1510 images = array_ops.squeeze(images, axis=[0])

1511 return images

1512

1513 images = resizer_fn(images, size)

1514

1515 # NOTE(mrry): The shape functions for the resize ops cannot unpack

1516 # the packed values in `new_size`, so set the shape here.

1517 images.set_shape([None, new_height_const, new_width_const, None])

1518

1519 if not is_batch:

1520 images = array_ops.squeeze(images, axis=[0])

1521 return images

1522

1523

1524@tf_export(v1=['image.resize_images', 'image.resize'])

1525@dispatch.add_dispatch_support

1526def resize_images(images,

1527 size,

1528 method=ResizeMethodV1.BILINEAR,

1529 align_corners=False,

1530 preserve_aspect_ratio=False,

1531 name=None):

1532 """Resize `images` to `size` using the specified `method`.

1533

1534 Resized images will be distorted if their original aspect ratio is not

1535 the same as `size`. To avoid distortions see

1536 `tf.image.resize_with_pad` or `tf.image.resize_with_crop_or_pad`.

1537

1538 The `method` can be one of:

1539

1540 * `tf.image.ResizeMethod.BILINEAR`: [Bilinear interpolation.](

1541 https://en.wikipedia.org/wiki/Bilinear_interpolation)

1542 * `tf.image.ResizeMethod.NEAREST_NEIGHBOR`: [

1543 Nearest neighbor interpolation.](

1544 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)

1545 * `tf.image.ResizeMethod.BICUBIC`: [Bicubic interpolation.](

1546 https://en.wikipedia.org/wiki/Bicubic_interpolation)

1547 * `tf.image.ResizeMethod.AREA`: Area interpolation.

1548

1549 The return value has the same type as `images` if `method` is

1550 `tf.image.ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type

1551 as `images` if the size of `images` can be statically determined to be the

1552 same as `size`, because `images` is returned in this case. Otherwise, the

1553 return value has type `float32`.

1554

1555 Args:

1556 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

1557 of shape `[height, width, channels]`.

1558 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new

1559 size for the images.

1560 method: ResizeMethod. Defaults to `tf.image.ResizeMethod.BILINEAR`.

1561 align_corners: bool. If True, the centers of the 4 corner pixels of the

1562 input and output tensors are aligned, preserving the values at the corner

1563 pixels. Defaults to `False`.

1564 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,

1565 then `images` will be resized to a size that fits in `size` while

1566 preserving the aspect ratio of the original image. Scales up the image if

1567 `size` is bigger than the current size of the `image`. Defaults to False.

1568 name: A name for this operation (optional).

1569

1570 Raises:

1571 ValueError: if the shape of `images` is incompatible with the

1572 shape arguments to this function

1573 ValueError: if `size` has invalid shape or type.

1574 ValueError: if an unsupported resize method is specified.

1575

1576 Returns:

1577 If `images` was 4-D, a 4-D float Tensor of shape

1578 `[batch, new_height, new_width, channels]`.

1579 If `images` was 3-D, a 3-D float Tensor of shape

1580 `[new_height, new_width, channels]`.

1581 """

1582

1583 def resize_fn(images_t, new_size):

1584 """Legacy resize core function, passed to _resize_images_common."""

1585 if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR:

1586 return gen_image_ops.resize_bilinear(

1587 images_t, new_size, align_corners=align_corners)

1588 elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or

1589 method == ResizeMethod.NEAREST_NEIGHBOR):

1590 return gen_image_ops.resize_nearest_neighbor(

1591 images_t, new_size, align_corners=align_corners)

1592 elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC:

1593 return gen_image_ops.resize_bicubic(

1594 images_t, new_size, align_corners=align_corners)

1595 elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA:

1596 return gen_image_ops.resize_area(

1597 images_t, new_size, align_corners=align_corners)

1598 else:

1599 raise ValueError('Resize method is not implemented: {}'.format(method))

1600

1601 return _resize_images_common(

1602 images,

1603 resize_fn,

1604 size,

1605 preserve_aspect_ratio=preserve_aspect_ratio,

1606 name=name,

1607 skip_resize_if_same=True)

1608

1609

1610@tf_export('image.resize', v1=[])

1611@dispatch.add_dispatch_support

1612def resize_images_v2(images,

1613 size,

1614 method=ResizeMethod.BILINEAR,

1615 preserve_aspect_ratio=False,

1616 antialias=False,

1617 name=None):

1618 """Resize `images` to `size` using the specified `method`.

1619

1620 Resized images will be distorted if their original aspect ratio is not

1621 the same as `size`. To avoid distortions see

1622 `tf.image.resize_with_pad`.

1623

1624 >>> image = tf.constant([

1625 ... [1,0,0,0,0],

1626 ... [0,1,0,0,0],

1627 ... [0,0,1,0,0],

1628 ... [0,0,0,1,0],

1629 ... [0,0,0,0,1],

1630 ... ])

1631 >>> # Add "batch" and "channels" dimensions

1632 >>> image = image[tf.newaxis, ..., tf.newaxis]

1633 >>> image.shape.as_list() # [batch, height, width, channels]

1634 [1, 5, 5, 1]

1635 >>> tf.image.resize(image, [3,5])[0,...,0].numpy()

1636 array([[0.6666667, 0.3333333, 0. , 0. , 0. ],

1637 [0. , 0. , 1. , 0. , 0. ],

1638 [0. , 0. , 0. , 0.3333335, 0.6666665]],

1639 dtype=float32)

1640

1641 It works equally well with a single image instead of a batch of images:

1642

1643 >>> tf.image.resize(image[0], [3,5]).shape.as_list()

1644 [3, 5, 1]

1645

1646 When `antialias` is true, the sampling filter will anti-alias the input image

1647 as well as interpolate. When downsampling an image with [anti-aliasing](

1648 https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter

1649 kernel is scaled in order to properly anti-alias the input image signal.

1650 `antialias` has no effect when upsampling an image:

1651

1652 >>> a = tf.image.resize(image, [5,10])

1653 >>> b = tf.image.resize(image, [5,10], antialias=True)

1654 >>> tf.reduce_max(abs(a - b)).numpy()

1655 0.0

1656

1657 The `method` argument expects an item from the `image.ResizeMethod` enum, or

1658 the string equivalent. The options are:

1659

1660 * `bilinear`: [Bilinear interpolation.](

1661 https://en.wikipedia.org/wiki/Bilinear_interpolation) If `antialias` is

1662 true, becomes a hat/tent filter function with radius 1 when downsampling.

1663 * `lanczos3`: [Lanczos kernel](

1664 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3.

1665 High-quality practical filter but may have some ringing, especially on

1666 synthetic images.

1667 * `lanczos5`: [Lanczos kernel] (

1668 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5.

1669 Very-high-quality filter but may have stronger ringing.

1670 * `bicubic`: [Cubic interpolant](

1671 https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to

1672 Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel,

1673 particularly when upsampling.

1674 * `gaussian`: [Gaussian kernel](

1675 https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3,

1676 sigma = 1.5 / 3.0.

1677 * `nearest`: [Nearest neighbor interpolation.](

1678 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)

1679 `antialias` has no effect when used with nearest neighbor interpolation.

1680 * `area`: Anti-aliased resampling with area interpolation.

1681 `antialias` has no effect when used with area interpolation; it

1682 always anti-aliases.

1683 * `mitchellcubic`: Mitchell-Netravali Cubic non-interpolating filter.

1684 For synthetic images (especially those lacking proper prefiltering), less

1685 ringing than Keys cubic kernel but less sharp.

1686

1687 Note: Near image edges the filtering kernel may be partially outside the

1688 image boundaries. For these pixels, only input pixels inside the image will be

1689 included in the filter sum, and the output value will be appropriately

1690 normalized.

1691

1692 The return value has type `float32`, unless the `method` is

1693 `ResizeMethod.NEAREST_NEIGHBOR`, then the return dtype is the dtype

1694 of `images`:

1695

1696 >>> nn = tf.image.resize(image, [5,7], method='nearest')

1697 >>> nn[0,...,0].numpy()

1698 array([[1, 0, 0, 0, 0, 0, 0],

1699 [0, 1, 1, 0, 0, 0, 0],

1700 [0, 0, 0, 1, 0, 0, 0],

1701 [0, 0, 0, 0, 1, 1, 0],

1702 [0, 0, 0, 0, 0, 0, 1]], dtype=int32)

1703

1704 With `preserve_aspect_ratio=True`, the aspect ratio is preserved, so `size`

1705 is the maximum for each dimension:

1706

1707 >>> max_10_20 = tf.image.resize(image, [10,20], preserve_aspect_ratio=True)

1708 >>> max_10_20.shape.as_list()

1709 [1, 10, 10, 1]

1710

1711 Args:

1712 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

1713 of shape `[height, width, channels]`.

1714 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new

1715 size for the images.

1716 method: An `image.ResizeMethod`, or string equivalent. Defaults to

1717 `bilinear`.

1718 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,

1719 then `images` will be resized to a size that fits in `size` while

1720 preserving the aspect ratio of the original image. Scales up the image if

1721 `size` is bigger than the current size of the `image`. Defaults to False.

1722 antialias: Whether to use an anti-aliasing filter when downsampling an

1723 image.

1724 name: A name for this operation (optional).

1725

1726 Raises:

1727 ValueError: if the shape of `images` is incompatible with the

1728 shape arguments to this function

1729 ValueError: if `size` has an invalid shape or type.

1730 ValueError: if an unsupported resize method is specified.

1731

1732 Returns:

1733 If `images` was 4-D, a 4-D float Tensor of shape

1734 `[batch, new_height, new_width, channels]`.

1735 If `images` was 3-D, a 3-D float Tensor of shape

1736 `[new_height, new_width, channels]`.

1737 """

1738

1739 def resize_fn(images_t, new_size):

1740 """Resize core function, passed to _resize_images_common."""

1741 scale_and_translate_methods = [

1742 ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN,

1743 ResizeMethod.MITCHELLCUBIC

1744 ]

1745

1746 def resize_with_scale_and_translate(method):

1747 scale = (

1748 math_ops.cast(new_size, dtype=dtypes.float32) /

1749 math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32))

1750 return gen_image_ops.scale_and_translate(

1751 images_t,

1752 new_size,

1753 scale,

1754 array_ops.zeros([2]),

1755 kernel_type=method,

1756 antialias=antialias)

1757

1758 if method == ResizeMethod.BILINEAR:

1759 if antialias:

1760 return resize_with_scale_and_translate('triangle')

1761 else:

1762 return gen_image_ops.resize_bilinear(

1763 images_t, new_size, half_pixel_centers=True)

1764 elif method == ResizeMethod.NEAREST_NEIGHBOR:

1765 return gen_image_ops.resize_nearest_neighbor(

1766 images_t, new_size, half_pixel_centers=True)

1767 elif method == ResizeMethod.BICUBIC:

1768 if antialias:

1769 return resize_with_scale_and_translate('keyscubic')

1770 else:

1771 return gen_image_ops.resize_bicubic(

1772 images_t, new_size, half_pixel_centers=True)

1773 elif method == ResizeMethod.AREA:

1774 return gen_image_ops.resize_area(images_t, new_size)

1775 elif method in scale_and_translate_methods:

1776 return resize_with_scale_and_translate(method)

1777 else:

1778 raise ValueError('Resize method is not implemented: {}'.format(method))

1779

1780 return _resize_images_common(

1781 images,

1782 resize_fn,

1783 size,

1784 preserve_aspect_ratio=preserve_aspect_ratio,

1785 name=name,

1786 skip_resize_if_same=False)

1787

1788

1789def _resize_image_with_pad_common(image, target_height, target_width,

1790 resize_fn):

1791 """Core functionality for v1 and v2 resize_image_with_pad functions."""

1792 with ops.name_scope(None, 'resize_image_with_pad', [image]):

1793 image = ops.convert_to_tensor(image, name='image')

1794 image_shape = image.get_shape()

1795 is_batch = True

1796 if image_shape.ndims == 3:

1797 is_batch = False

1798 image = array_ops.expand_dims(image, 0)

1799 elif image_shape.ndims is None:

1800 is_batch = False

1801 image = array_ops.expand_dims(image, 0)

1802 image.set_shape([None] * 4)

1803 elif image_shape.ndims != 4:

1804 raise ValueError(

1805 '\'image\' (shape %s) must have either 3 or 4 dimensions.' %

1806 image_shape)

1807

1808 assert_ops = _CheckAtLeast3DImage(image, require_static=False)

1809 assert_ops += _assert(target_width > 0, ValueError,

1810 'target_width must be > 0.')

1811 assert_ops += _assert(target_height > 0, ValueError,

1812 'target_height must be > 0.')

1813

1814 image = control_flow_ops.with_dependencies(assert_ops, image)

1815

1816 def max_(x, y):

1817 if _is_tensor(x) or _is_tensor(y):

1818 return math_ops.maximum(x, y)

1819 else:

1820 return max(x, y)

1821

1822 _, height, width, _ = _ImageDimensions(image, rank=4)

1823

1824 # convert values to float, to ease divisions

1825 f_height = math_ops.cast(height, dtype=dtypes.float32)

1826 f_width = math_ops.cast(width, dtype=dtypes.float32)

1827 f_target_height = math_ops.cast(target_height, dtype=dtypes.float32)

1828 f_target_width = math_ops.cast(target_width, dtype=dtypes.float32)

1829

1830 # Find the ratio by which the image must be adjusted

1831 # to fit within the target

1832 ratio = max_(f_width / f_target_width, f_height / f_target_height)

1833 resized_height_float = f_height / ratio

1834 resized_width_float = f_width / ratio

1835 resized_height = math_ops.cast(

1836 math_ops.floor(resized_height_float), dtype=dtypes.int32)

1837 resized_width = math_ops.cast(

1838 math_ops.floor(resized_width_float), dtype=dtypes.int32)

1839

1840 padding_height = (f_target_height - resized_height_float) / 2

1841 padding_width = (f_target_width - resized_width_float) / 2

1842 f_padding_height = math_ops.floor(padding_height)

1843 f_padding_width = math_ops.floor(padding_width)

1844 p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32))

1845 p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32))

1846

1847 # Resize first, then pad to meet requested dimensions

1848 resized = resize_fn(image, [resized_height, resized_width])

1849

1850 padded = pad_to_bounding_box(resized, p_height, p_width, target_height,

1851 target_width)

1852

1853 if padded.get_shape().ndims is None:

1854 raise ValueError('padded contains no shape.')

1855

1856 _ImageDimensions(padded, rank=4)

1857

1858 if not is_batch:

1859 padded = array_ops.squeeze(padded, axis=[0])

1860

1861 return padded

1862

1863

1864@tf_export(v1=['image.resize_image_with_pad'])

1865@dispatch.add_dispatch_support

1866def resize_image_with_pad_v1(image,

1867 target_height,

1868 target_width,

1869 method=ResizeMethodV1.BILINEAR,

1870 align_corners=False):

1871 """Resizes and pads an image to a target width and height.

1872

1873 Resizes an image to a target width and height by keeping

1874 the aspect ratio the same without distortion. If the target

1875 dimensions don't match the image dimensions, the image

1876 is resized and then padded with zeroes to match requested

1877 dimensions.

1878

1879 Args:

1880 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

1881 of shape `[height, width, channels]`.

1882 target_height: Target height.

1883 target_width: Target width.

1884 method: Method to use for resizing image. See `resize_images()`

1885 align_corners: bool. If True, the centers of the 4 corner pixels of the

1886 input and output tensors are aligned, preserving the values at the corner

1887 pixels. Defaults to `False`.

1888

1889 Raises:

1890 ValueError: if `target_height` or `target_width` are zero or negative.

1891

1892 Returns:

1893 Resized and padded image.

1894 If `images` was 4-D, a 4-D float Tensor of shape

1895 `[batch, new_height, new_width, channels]`.

1896 If `images` was 3-D, a 3-D float Tensor of shape

1897 `[new_height, new_width, channels]`.

1898 """

1899

1900 def _resize_fn(im, new_size):

1901 return resize_images(im, new_size, method, align_corners=align_corners)

1902

1903 return _resize_image_with_pad_common(image, target_height, target_width,

1904 _resize_fn)

1905

1906

1907@tf_export('image.resize_with_pad', v1=[])

1908@dispatch.add_dispatch_support

1909def resize_image_with_pad_v2(image,

1910 target_height,

1911 target_width,

1912 method=ResizeMethod.BILINEAR,

1913 antialias=False):

1914 """Resizes and pads an image to a target width and height.

1915

1916 Resizes an image to a target width and height by keeping

1917 the aspect ratio the same without distortion. If the target

1918 dimensions don't match the image dimensions, the image

1919 is resized and then padded with zeroes to match requested

1920 dimensions.

1921

1922 Args:

1923 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

1924 of shape `[height, width, channels]`.

1925 target_height: Target height.

1926 target_width: Target width.

1927 method: Method to use for resizing image. See `image.resize()`

1928 antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'.

1929

1930 Raises:

1931 ValueError: if `target_height` or `target_width` are zero or negative.

1932

1933 Returns:

1934 Resized and padded image.

1935 If `images` was 4-D, a 4-D float Tensor of shape

1936 `[batch, new_height, new_width, channels]`.

1937 If `images` was 3-D, a 3-D float Tensor of shape

1938 `[new_height, new_width, channels]`.

1939 """

1940

1941 def _resize_fn(im, new_size):

1942 return resize_images_v2(im, new_size, method, antialias=antialias)

1943

1944 return _resize_image_with_pad_common(image, target_height, target_width,

1945 _resize_fn)

1946

1947

1948@tf_export('image.per_image_standardization')

1949@dispatch.add_dispatch_support

1950def per_image_standardization(image):

1951 """Linearly scales each image in `image` to have mean 0 and variance 1.

1952

1953 For each 3-D image `x` in `image`, computes `(x - mean) / adjusted_stddev`,

1954 where

1955

1956 - `mean` is the average of all values in `x`

1957 - `adjusted_stddev = max(stddev, 1.0/sqrt(N))` is capped away from 0 to

1958 protect against division by 0 when handling uniform images

1959 - `N` is the number of elements in `x`

1960 - `stddev` is the standard deviation of all values in `x`

1961

1962 Example Usage:

1963

1964 >>> image = tf.constant(np.arange(1, 13, dtype=np.int32), shape=[2, 2, 3])

1965 >>> image # 3-D tensor

1966 <tf.Tensor: shape=(2, 2, 3), dtype=int32, numpy=

1967 array([[[ 1, 2, 3],

1968 [ 4, 5, 6]],

1969 [[ 7, 8, 9],

1970 [10, 11, 12]]], dtype=int32)>

1971 >>> new_image = tf.image.per_image_standardization(image)

1972 >>> new_image # 3-D tensor with mean ~= 0 and variance ~= 1

1973 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

1974 array([[[-1.593255 , -1.3035723 , -1.0138896 ],

1975 [-0.7242068 , -0.4345241 , -0.14484136]],

1976 [[ 0.14484136, 0.4345241 , 0.7242068 ],

1977 [ 1.0138896 , 1.3035723 , 1.593255 ]]], dtype=float32)>

1978

1979 Args:

1980 image: An n-D `Tensor` with at least 3 dimensions, the last 3 of which are

1981 the dimensions of each image.

1982

1983 Returns:

1984 A `Tensor` with the same shape as `image` and its dtype is `float32`.

1985

1986 Raises:

1987 ValueError: The shape of `image` has fewer than 3 dimensions.

1988 """

1989 with ops.name_scope(None, 'per_image_standardization', [image]) as scope:

1990 image = ops.convert_to_tensor(image, name='image')

1991 image = _AssertAtLeast3DImage(image)

1992

1993 image = math_ops.cast(image, dtype=dtypes.float32)

1994 num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:])

1995 image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True)

1996

1997 # Apply a minimum normalization that protects us against uniform images.

1998 stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True)

1999 min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))

2000 adjusted_stddev = math_ops.maximum(stddev, min_stddev)

2001

2002 image -= image_mean

2003 image = math_ops.divide(image, adjusted_stddev, name=scope)

2004 return image

2005

2006

2007@tf_export('image.random_brightness')

2008@dispatch.register_unary_elementwise_api

2009@dispatch.add_dispatch_support

2010def random_brightness(image, max_delta, seed=None):

2011 """Adjust the brightness of images by a random factor.

2012

2013 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the

2014 interval `[-max_delta, max_delta)`.

2015

2016 For producing deterministic results given a `seed` value, use

2017 `tf.image.stateless_random_brightness`. Unlike using the `seed` param

2018 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the

2019 same results given the same seed independent of how many times the function is

2020 called, and independent of global seed settings (e.g. tf.random.set_seed).

2021

2022 Args:

2023 image: An image or images to adjust.

2024 max_delta: float, must be non-negative.

2025 seed: A Python integer. Used to create a random seed. See

2026 `tf.compat.v1.set_random_seed` for behavior.

2027

2028 Usage Example:

2029

2030 >>> x = [[[1.0, 2.0, 3.0],

2031 ... [4.0, 5.0, 6.0]],

2032 ... [[7.0, 8.0, 9.0],

2033 ... [10.0, 11.0, 12.0]]]

2034 >>> tf.image.random_brightness(x, 0.2)

2035 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>

2036

2037 Returns:

2038 The brightness-adjusted image(s).

2039

2040 Raises:

2041 ValueError: if `max_delta` is negative.

2042 """

2043 if max_delta < 0:

2044 raise ValueError('max_delta must be non-negative.')

2045

2046 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)

2047 return adjust_brightness(image, delta)

2048

2049

2050@tf_export('image.stateless_random_brightness', v1=[])

2051@dispatch.register_unary_elementwise_api

2052@dispatch.add_dispatch_support

2053def stateless_random_brightness(image, max_delta, seed):

2054 """Adjust the brightness of images by a random factor deterministically.

2055

2056 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the

2057 interval `[-max_delta, max_delta)`.

2058

2059 Guarantees the same results given the same `seed` independent of how many

2060 times the function is called, and independent of global seed settings (e.g.

2061 `tf.random.set_seed`).

2062

2063 Usage Example:

2064

2065 >>> x = [[[1.0, 2.0, 3.0],

2066 ... [4.0, 5.0, 6.0]],

2067 ... [[7.0, 8.0, 9.0],

2068 ... [10.0, 11.0, 12.0]]]

2069 >>> seed = (1, 2)

2070 >>> tf.image.stateless_random_brightness(x, 0.2, seed)

2071 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

2072 array([[[ 1.1376241, 2.1376243, 3.1376243],

2073 [ 4.1376243, 5.1376243, 6.1376243]],

2074 [[ 7.1376243, 8.137624 , 9.137624 ],

2075 [10.137624 , 11.137624 , 12.137624 ]]], dtype=float32)>

2076

2077 Args:

2078 image: An image or images to adjust.

2079 max_delta: float, must be non-negative.

2080 seed: A shape [2] Tensor, the seed to the random number generator. Must have

2081 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)

2082

2083 Returns:

2084 The brightness-adjusted image(s).

2085

2086 Raises:

2087 ValueError: if `max_delta` is negative.

2088 """

2089 if max_delta < 0:

2090 raise ValueError('max_delta must be non-negative.')

2091

2092 delta = stateless_random_ops.stateless_random_uniform(

2093 shape=[], minval=-max_delta, maxval=max_delta, seed=seed)

2094 return adjust_brightness(image, delta)

2095

2096

2097@tf_export('image.random_contrast')

2098@dispatch.add_dispatch_support

2099def random_contrast(image, lower, upper, seed=None):

2100 """Adjust the contrast of an image or images by a random factor.

2101

2102 Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly

2103 picked in the interval `[lower, upper)`.

2104

2105 For producing deterministic results given a `seed` value, use

2106 `tf.image.stateless_random_contrast`. Unlike using the `seed` param

2107 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the

2108 same results given the same seed independent of how many times the function is

2109 called, and independent of global seed settings (e.g. tf.random.set_seed).

2110

2111 Args:

2112 image: An image tensor with 3 or more dimensions.

2113 lower: float. Lower bound for the random contrast factor.

2114 upper: float. Upper bound for the random contrast factor.

2115 seed: A Python integer. Used to create a random seed. See

2116 `tf.compat.v1.set_random_seed` for behavior.

2117

2118 Usage Example:

2119

2120 >>> x = [[[1.0, 2.0, 3.0],

2121 ... [4.0, 5.0, 6.0]],

2122 ... [[7.0, 8.0, 9.0],

2123 ... [10.0, 11.0, 12.0]]]

2124 >>> tf.image.random_contrast(x, 0.2, 0.5)

2125 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>

2126

2127 Returns:

2128 The contrast-adjusted image(s).

2129

2130 Raises:

2131 ValueError: if `upper <= lower` or if `lower < 0`.

2132 """

2133 if upper <= lower:

2134 raise ValueError('upper must be > lower.')

2135

2136 if lower < 0:

2137 raise ValueError('lower must be non-negative.')

2138

2139 contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed)

2140 return adjust_contrast(image, contrast_factor)

2141

2142

2143@tf_export('image.stateless_random_contrast', v1=[])

2144@dispatch.add_dispatch_support

2145def stateless_random_contrast(image, lower, upper, seed):

2146 """Adjust the contrast of images by a random factor deterministically.

2147

2148 Guarantees the same results given the same `seed` independent of how many

2149 times the function is called, and independent of global seed settings (e.g.

2150 `tf.random.set_seed`).

2151

2152 Args:

2153 image: An image tensor with 3 or more dimensions.

2154 lower: float. Lower bound for the random contrast factor.

2155 upper: float. Upper bound for the random contrast factor.

2156 seed: A shape [2] Tensor, the seed to the random number generator. Must have

2157 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)

2158

2159 Usage Example:

2160

2161 >>> x = [[[1.0, 2.0, 3.0],

2162 ... [4.0, 5.0, 6.0]],

2163 ... [[7.0, 8.0, 9.0],

2164 ... [10.0, 11.0, 12.0]]]

2165 >>> seed = (1, 2)

2166 >>> tf.image.stateless_random_contrast(x, 0.2, 0.5, seed)

2167 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

2168 array([[[3.4605184, 4.4605184, 5.4605184],

2169 [4.820173 , 5.820173 , 6.820173 ]],

2170 [[6.179827 , 7.179827 , 8.179828 ],

2171 [7.5394816, 8.539482 , 9.539482 ]]], dtype=float32)>

2172

2173 Returns:

2174 The contrast-adjusted image(s).

2175

2176 Raises:

2177 ValueError: if `upper <= lower` or if `lower < 0`.

2178 """

2179 if upper <= lower:

2180 raise ValueError('upper must be > lower.')

2181

2182 if lower < 0:

2183 raise ValueError('lower must be non-negative.')

2184

2185 contrast_factor = stateless_random_ops.stateless_random_uniform(

2186 shape=[], minval=lower, maxval=upper, seed=seed)

2187 return adjust_contrast(image, contrast_factor)

2188

2189

2190@tf_export('image.adjust_brightness')

2191@dispatch.register_unary_elementwise_api

2192@dispatch.add_dispatch_support

2193def adjust_brightness(image, delta):

2194 """Adjust the brightness of RGB or Grayscale images.

2195

2196 This is a convenience method that converts RGB images to float

2197 representation, adjusts their brightness, and then converts them back to the

2198 original data type. If several adjustments are chained, it is advisable to

2199 minimize the number of redundant conversions.

2200

2201 The value `delta` is added to all components of the tensor `image`. `image` is

2202 converted to `float` and scaled appropriately if it is in fixed-point

2203 representation, and `delta` is converted to the same data type. For regular

2204 images, `delta` should be in the range `(-1,1)`, as it is added to the image

2205 in floating point representation, where pixel values are in the `[0,1)` range.

2206

2207 Usage Example:

2208

2209 >>> x = [[[1.0, 2.0, 3.0],

2210 ... [4.0, 5.0, 6.0]],

2211 ... [[7.0, 8.0, 9.0],

2212 ... [10.0, 11.0, 12.0]]]

2213 >>> tf.image.adjust_brightness(x, delta=0.1)

2214 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

2215 array([[[ 1.1, 2.1, 3.1],

2216 [ 4.1, 5.1, 6.1]],

2217 [[ 7.1, 8.1, 9.1],

2218 [10.1, 11.1, 12.1]]], dtype=float32)>

2219

2220 Args:

2221 image: RGB image or images to adjust.

2222 delta: A scalar. Amount to add to the pixel values.

2223

2224 Returns:

2225 A brightness-adjusted tensor of the same shape and type as `image`.

2226 """

2227 with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name:

2228 image = ops.convert_to_tensor(image, name='image')

2229 # Remember original dtype to so we can convert back if needed

2230 orig_dtype = image.dtype

2231

2232 if orig_dtype in [dtypes.float16, dtypes.float32]:

2233 flt_image = image

2234 else:

2235 flt_image = convert_image_dtype(image, dtypes.float32)

2236

2237 adjusted = math_ops.add(

2238 flt_image, math_ops.cast(delta, flt_image.dtype), name=name)

2239

2240 return convert_image_dtype(adjusted, orig_dtype, saturate=True)

2241

2242

2243@tf_export('image.adjust_contrast')

2244@dispatch.add_dispatch_support

2245def adjust_contrast(images, contrast_factor):

2246 """Adjust contrast of RGB or grayscale images.

2247

2248 This is a convenience method that converts RGB images to float

2249 representation, adjusts their contrast, and then converts them back to the

2250 original data type. If several adjustments are chained, it is advisable to

2251 minimize the number of redundant conversions.

2252

2253 `images` is a tensor of at least 3 dimensions. The last 3 dimensions are

2254 interpreted as `[height, width, channels]`. The other dimensions only

2255 represent a collection of images, such as `[batch, height, width, channels].`

2256

2257 Contrast is adjusted independently for each channel of each image.

2258

2259 For each channel, this Op computes the mean of the image pixels in the

2260 channel and then adjusts each component `x` of each pixel to

2261 `(x - mean) * contrast_factor + mean`.

2262

2263 `contrast_factor` must be in the interval `(-inf, inf)`.

2264

2265 Usage Example:

2266

2267 >>> x = [[[1.0, 2.0, 3.0],

2268 ... [4.0, 5.0, 6.0]],

2269 ... [[7.0, 8.0, 9.0],

2270 ... [10.0, 11.0, 12.0]]]

2271 >>> tf.image.adjust_contrast(x, 2.)

2272 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

2273 array([[[-3.5, -2.5, -1.5],

2274 [ 2.5, 3.5, 4.5]],

2275 [[ 8.5, 9.5, 10.5],

2276 [14.5, 15.5, 16.5]]], dtype=float32)>

2277

2278 Args:

2279 images: Images to adjust. At least 3-D.

2280 contrast_factor: A float multiplier for adjusting contrast.

2281

2282 Returns:

2283 The contrast-adjusted image or images.

2284 """

2285 with ops.name_scope(None, 'adjust_contrast',

2286 [images, contrast_factor]) as name:

2287 images = ops.convert_to_tensor(images, name='images')

2288 # Remember original dtype to so we can convert back if needed

2289 orig_dtype = images.dtype

2290

2291 if orig_dtype in (dtypes.float16, dtypes.float32):

2292 flt_images = images

2293 else:

2294 flt_images = convert_image_dtype(images, dtypes.float32)

2295

2296 adjusted = gen_image_ops.adjust_contrastv2(

2297 flt_images, contrast_factor=contrast_factor, name=name)

2298

2299 return convert_image_dtype(adjusted, orig_dtype, saturate=True)

2300

2301

2302@tf_export('image.adjust_gamma')

2303@dispatch.register_unary_elementwise_api

2304@dispatch.add_dispatch_support

2305def adjust_gamma(image, gamma=1, gain=1):

2306 """Performs [Gamma Correction](http://en.wikipedia.org/wiki/Gamma_correction).

2307

2308 on the input image.

2309

2310 Also known as Power Law Transform. This function converts the

2311 input images at first to float representation, then transforms them

2312 pixelwise according to the equation `Out = gain * In**gamma`,

2313 and then converts the back to the original data type.

2314

2315 Usage Example:

2316

2317 >>> x = [[[1.0, 2.0, 3.0],

2318 ... [4.0, 5.0, 6.0]],

2319 ... [[7.0, 8.0, 9.0],

2320 ... [10.0, 11.0, 12.0]]]

2321 >>> tf.image.adjust_gamma(x, 0.2)

2322 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

2323 array([[[1. , 1.1486983, 1.2457309],

2324 [1.319508 , 1.3797297, 1.4309691]],

2325 [[1.4757731, 1.5157166, 1.5518456],

2326 [1.5848932, 1.6153942, 1.6437519]]], dtype=float32)>

2327

2328 Args:

2329 image : RGB image or images to adjust.

2330 gamma : A scalar or tensor. Non-negative real number.

2331 gain : A scalar or tensor. The constant multiplier.

2332

2333 Returns:

2334 A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`.

2335

2336 Raises:

2337 ValueError: If gamma is negative.

2338 Notes:

2339 For gamma greater than 1, the histogram will shift towards left and

2340 the output image will be darker than the input image.

2341 For gamma less than 1, the histogram will shift towards right and

2342 the output image will be brighter than the input image.

2343 References:

2344 [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction)

2345 """

2346

2347 with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name:

2348 image = ops.convert_to_tensor(image, name='image')

2349 # Remember original dtype to so we can convert back if needed

2350 orig_dtype = image.dtype

2351

2352 if orig_dtype in [dtypes.float16, dtypes.float32]:

2353 flt_image = image

2354 else:

2355 flt_image = convert_image_dtype(image, dtypes.float32)

2356

2357 assert_op = _assert(gamma >= 0, ValueError,

2358 'Gamma should be a non-negative real number.')

2359 if assert_op:

2360 gamma = control_flow_ops.with_dependencies(assert_op, gamma)

2361

2362 # According to the definition of gamma correction.

2363 adjusted_img = gain * flt_image**gamma

2364

2365 return convert_image_dtype(adjusted_img, orig_dtype, saturate=True)

2366

2367

2368@tf_export('image.convert_image_dtype')

2369@dispatch.register_unary_elementwise_api

2370@dispatch.add_dispatch_support

2371def convert_image_dtype(image, dtype, saturate=False, name=None):

2372 """Convert `image` to `dtype`, scaling its values if needed.

2373

2374 The operation supports data types (for `image` and `dtype`) of

2375 `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`,

2376 `float16`, `float32`, `float64`, `bfloat16`.

2377

2378 Images that are represented using floating point values are expected to have

2379 values in the range [0,1). Image data stored in integer data types are

2380 expected to have values in the range `[0,MAX]`, where `MAX` is the largest

2381 positive representable number for the data type.

2382

2383 This op converts between data types, scaling the values appropriately before

2384 casting.

2385

2386 Usage Example:

2387

2388 >>> x = [[[1, 2, 3], [4, 5, 6]],

2389 ... [[7, 8, 9], [10, 11, 12]]]

2390 >>> x_int8 = tf.convert_to_tensor(x, dtype=tf.int8)

2391 >>> tf.image.convert_image_dtype(x_int8, dtype=tf.float16, saturate=False)

2392 <tf.Tensor: shape=(2, 2, 3), dtype=float16, numpy=

2393 array([[[0.00787, 0.01575, 0.02362],

2394 [0.0315 , 0.03937, 0.04724]],

2395 [[0.0551 , 0.063 , 0.07086],

2396 [0.07874, 0.0866 , 0.0945 ]]], dtype=float16)>

2397

2398 Converting integer types to floating point types returns normalized floating

2399 point values in the range [0, 1); the values are normalized by the `MAX` value

2400 of the input dtype. Consider the following two examples:

2401

2402 >>> a = [[[1], [2]], [[3], [4]]]

2403 >>> a_int8 = tf.convert_to_tensor(a, dtype=tf.int8)

2404 >>> tf.image.convert_image_dtype(a_int8, dtype=tf.float32)

2405 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=

2406 array([[[0.00787402],

2407 [0.01574803]],

2408 [[0.02362205],

2409 [0.03149606]]], dtype=float32)>

2410

2411 >>> a_int32 = tf.convert_to_tensor(a, dtype=tf.int32)

2412 >>> tf.image.convert_image_dtype(a_int32, dtype=tf.float32)

2413 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=

2414 array([[[4.6566129e-10],

2415 [9.3132257e-10]],

2416 [[1.3969839e-09],

2417 [1.8626451e-09]]], dtype=float32)>

2418

2419 Despite having identical values of `a` and output dtype of `float32`, the

2420 outputs differ due to the different input dtypes (`int8` vs. `int32`). This

2421 is, again, because the values are normalized by the `MAX` value of the input

2422 dtype.

2423

2424 Note that converting floating point values to integer type may lose precision.

2425 In the example below, an image tensor `b` of dtype `float32` is converted to

2426 `int8` and back to `float32`. The final output, however, is different from

2427 the original input `b` due to precision loss.

2428

2429 >>> b = [[[0.12], [0.34]], [[0.56], [0.78]]]

2430 >>> b_float32 = tf.convert_to_tensor(b, dtype=tf.float32)

2431 >>> b_int8 = tf.image.convert_image_dtype(b_float32, dtype=tf.int8)

2432 >>> tf.image.convert_image_dtype(b_int8, dtype=tf.float32)

2433 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=

2434 array([[[0.11811024],

2435 [0.33858266]],

2436 [[0.5590551 ],

2437 [0.77952754]]], dtype=float32)>

2438

2439 Scaling up from an integer type (input dtype) to another integer type (output

2440 dtype) will not map input dtype's `MAX` to output dtype's `MAX` but converting

2441 back and forth should result in no change. For example, as shown below, the

2442 `MAX` value of int8 (=127) is not mapped to the `MAX` value of int16 (=32,767)

2443 but, when scaled back, we get the same, original values of `c`.

2444

2445 >>> c = [[[1], [2]], [[127], [127]]]

2446 >>> c_int8 = tf.convert_to_tensor(c, dtype=tf.int8)

2447 >>> c_int16 = tf.image.convert_image_dtype(c_int8, dtype=tf.int16)

2448 >>> print(c_int16)

2449 tf.Tensor(

2450 [[[ 256]

2451 [ 512]]

2452 [[32512]

2453 [32512]]], shape=(2, 2, 1), dtype=int16)

2454 >>> c_int8_back = tf.image.convert_image_dtype(c_int16, dtype=tf.int8)

2455 >>> print(c_int8_back)

2456 tf.Tensor(

2457 [[[ 1]

2458 [ 2]]

2459 [[127]

2460 [127]]], shape=(2, 2, 1), dtype=int8)

2461

2462 Scaling down from an integer type to another integer type can be a lossy

2463 conversion. Notice in the example below that converting `int16` to `uint8` and

2464 back to `int16` has lost precision.

2465

2466 >>> d = [[[1000], [2000]], [[3000], [4000]]]

2467 >>> d_int16 = tf.convert_to_tensor(d, dtype=tf.int16)

2468 >>> d_uint8 = tf.image.convert_image_dtype(d_int16, dtype=tf.uint8)

2469 >>> d_int16_back = tf.image.convert_image_dtype(d_uint8, dtype=tf.int16)

2470 >>> print(d_int16_back)

2471 tf.Tensor(

2472 [[[ 896]

2473 [1920]]

2474 [[2944]

2475 [3968]]], shape=(2, 2, 1), dtype=int16)

2476

2477 Note that converting from floating point inputs to integer types may lead to

2478 over/underflow problems. Set saturate to `True` to avoid such problem in

2479 problematic conversions. If enabled, saturation will clip the output into the

2480 allowed range before performing a potentially dangerous cast (and only before

2481 performing such a cast, i.e., when casting from a floating point to an integer

2482 type, and when casting from a signed to an unsigned type; `saturate` has no

2483 effect on casts between floats, or on casts that increase the type's range).

2484

2485 Args:

2486 image: An image.

2487 dtype: A `DType` to convert `image` to.

2488 saturate: If `True`, clip the input before casting (if necessary).

2489 name: A name for this operation (optional).

2490

2491 Returns:

2492 `image`, converted to `dtype`.

2493

2494 Raises:

2495 AttributeError: Raises an attribute error when dtype is neither

2496 float nor integer.

2497 """

2498 image = ops.convert_to_tensor(image, name='image')

2499 dtype = dtypes.as_dtype(dtype)

2500 if not dtype.is_floating and not dtype.is_integer:

2501 raise AttributeError('dtype must be either floating point or integer')

2502 if not image.dtype.is_floating and not image.dtype.is_integer:

2503 raise AttributeError('image dtype must be either floating point or integer')

2504 if dtype == image.dtype:

2505 return array_ops.identity(image, name=name)

2506

2507 with ops.name_scope(name, 'convert_image', [image]) as name:

2508 # Both integer: use integer multiplication in the larger range

2509 if image.dtype.is_integer and dtype.is_integer:

2510 scale_in = image.dtype.max

2511 scale_out = dtype.max

2512 if scale_in > scale_out:

2513 # Scaling down, scale first, then cast. The scaling factor will

2514 # cause in.max to be mapped to above out.max but below out.max+1,

2515 # so that the output is safely in the supported range.

2516 scale = (scale_in + 1) // (scale_out + 1)

2517 scaled = math_ops.floordiv(image, scale)

2518

2519 if saturate:

2520 return math_ops.saturate_cast(scaled, dtype, name=name)

2521 else:

2522 return math_ops.cast(scaled, dtype, name=name)

2523 else:

2524 # Scaling up, cast first, then scale. The scale will not map in.max to

2525 # out.max, but converting back and forth should result in no change.

2526 if saturate:

2527 cast = math_ops.saturate_cast(image, dtype)

2528 else:

2529 cast = math_ops.cast(image, dtype)

2530 scale = (scale_out + 1) // (scale_in + 1)

2531 return math_ops.multiply(cast, scale, name=name)

2532 elif image.dtype.is_floating and dtype.is_floating:

2533 # Both float: Just cast, no possible overflows in the allowed ranges.

2534 # Note: We're ignoring float overflows. If your image dynamic range

2535 # exceeds float range, you're on your own.

2536 return math_ops.cast(image, dtype, name=name)

2537 else:

2538 if image.dtype.is_integer:

2539 # Converting to float: first cast, then scale. No saturation possible.

2540 cast = math_ops.cast(image, dtype)

2541 scale = 1. / image.dtype.max

2542 return math_ops.multiply(cast, scale, name=name)

2543 else:

2544 # Converting from float: first scale, then cast

2545 scale = dtype.max + 0.5 # avoid rounding problems in the cast

2546 scaled = math_ops.multiply(image, scale)

2547 if saturate:

2548 return math_ops.saturate_cast(scaled, dtype, name=name)

2549 else:

2550 return math_ops.cast(scaled, dtype, name=name)

2551

2552

2553@tf_export('image.rgb_to_grayscale')

2554@dispatch.add_dispatch_support

2555def rgb_to_grayscale(images, name=None):

2556 """Converts one or more images from RGB to Grayscale.

2557

2558 Outputs a tensor of the same `DType` and rank as `images`. The size of the

2559 last dimension of the output is 1, containing the Grayscale value of the

2560 pixels.

2561

2562 >>> original = tf.constant([[[1.0, 2.0, 3.0]]])

2563 >>> converted = tf.image.rgb_to_grayscale(original)

2564 >>> print(converted.numpy())

2565 [[[1.81...]]]

2566

2567 Args:

2568 images: The RGB tensor to convert. The last dimension must have size 3 and

2569 should contain RGB values.

2570 name: A name for the operation (optional).

2571

2572 Returns:

2573 The converted grayscale image(s).

2574 """

2575 with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name:

2576 images = ops.convert_to_tensor(images, name='images')

2577 # Remember original dtype to so we can convert back if needed

2578 orig_dtype = images.dtype

2579 flt_image = convert_image_dtype(images, dtypes.float32)

2580

2581 # Reference for converting between RGB and grayscale.

2582 # https://en.wikipedia.org/wiki/Luma_%28video%29

2583 rgb_weights = [0.2989, 0.5870, 0.1140]

2584 gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1])

2585 gray_float = array_ops.expand_dims(gray_float, -1)

2586 return convert_image_dtype(gray_float, orig_dtype, name=name)

2587

2588

2589@tf_export('image.grayscale_to_rgb')

2590@dispatch.add_dispatch_support

2591def grayscale_to_rgb(images, name=None):

2592 """Converts one or more images from Grayscale to RGB.

2593

2594 Outputs a tensor of the same `DType` and rank as `images`. The size of the

2595 last dimension of the output is 3, containing the RGB value of the pixels.

2596 The input images' last dimension must be size 1.

2597

2598 >>> original = tf.constant([[[1.0], [2.0], [3.0]]])

2599 >>> converted = tf.image.grayscale_to_rgb(original)

2600 >>> print(converted.numpy())

2601 [[[1. 1. 1.]

2602 [2. 2. 2.]

2603 [3. 3. 3.]]]

2604

2605 Args:

2606 images: The Grayscale tensor to convert. The last dimension must be size 1.

2607 name: A name for the operation (optional).

2608

2609 Returns:

2610 The converted grayscale image(s).

2611 """

2612 with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name:

2613 images = _AssertGrayscaleImage(images)

2614

2615 images = ops.convert_to_tensor(images, name='images')

2616 rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)

2617 shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] +

2618 [array_ops.expand_dims(3, 0)])

2619 multiples = array_ops.concat(shape_list, 0)

2620 rgb = array_ops.tile(images, multiples, name=name)

2621 rgb.set_shape(images.get_shape()[:-1].concatenate([3]))

2622 return rgb

2623

2624

2625# pylint: disable=invalid-name

2626@tf_export('image.random_hue')

2627@dispatch.add_dispatch_support

2628def random_hue(image, max_delta, seed=None):

2629 """Adjust the hue of RGB images by a random factor.

2630

2631 Equivalent to `adjust_hue()` but uses a `delta` randomly

2632 picked in the interval `[-max_delta, max_delta)`.

2633

2634 `max_delta` must be in the interval `[0, 0.5]`.

2635

2636 Usage Example:

2637

2638 >>> x = [[[1.0, 2.0, 3.0],

2639 ... [4.0, 5.0, 6.0]],

2640 ... [[7.0, 8.0, 9.0],

2641 ... [10.0, 11.0, 12.0]]]

2642 >>> tf.image.random_hue(x, 0.2)

2643 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>

2644

2645 For producing deterministic results given a `seed` value, use

2646 `tf.image.stateless_random_hue`. Unlike using the `seed` param with

2647 `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the same

2648 results given the same seed independent of how many times the function is

2649 called, and independent of global seed settings (e.g. tf.random.set_seed).

2650

2651 Args:

2652 image: RGB image or images. The size of the last dimension must be 3.

2653 max_delta: float. The maximum value for the random delta.

2654 seed: An operation-specific seed. It will be used in conjunction with the

2655 graph-level seed to determine the real seeds that will be used in this

2656 operation. Please see the documentation of set_random_seed for its

2657 interaction with the graph-level random seed.

2658

2659 Returns:

2660 Adjusted image(s), same shape and DType as `image`.

2661

2662 Raises:

2663 ValueError: if `max_delta` is invalid.

2664 """

2665 if max_delta > 0.5:

2666 raise ValueError('max_delta must be <= 0.5.')

2667

2668 if max_delta < 0:

2669 raise ValueError('max_delta must be non-negative.')

2670

2671 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)

2672 return adjust_hue(image, delta)

2673

2674

2675@tf_export('image.stateless_random_hue', v1=[])

2676@dispatch.add_dispatch_support

2677def stateless_random_hue(image, max_delta, seed):

2678 """Adjust the hue of RGB images by a random factor deterministically.

2679

2680 Equivalent to `adjust_hue()` but uses a `delta` randomly picked in the

2681 interval `[-max_delta, max_delta)`.

2682

2683 Guarantees the same results given the same `seed` independent of how many

2684 times the function is called, and independent of global seed settings (e.g.

2685 `tf.random.set_seed`).

2686

2687 `max_delta` must be in the interval `[0, 0.5]`.

2688

2689 Usage Example:

2690

2691 >>> x = [[[1.0, 2.0, 3.0],

2692 ... [4.0, 5.0, 6.0]],

2693 ... [[7.0, 8.0, 9.0],

2694 ... [10.0, 11.0, 12.0]]]

2695 >>> seed = (1, 2)

2696 >>> tf.image.stateless_random_hue(x, 0.2, seed)

2697 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

2698 array([[[ 1.6514902, 1. , 3. ],

2699 [ 4.65149 , 4. , 6. ]],

2700 [[ 7.65149 , 7. , 9. ],

2701 [10.65149 , 10. , 12. ]]], dtype=float32)>

2702

2703 Args:

2704 image: RGB image or images. The size of the last dimension must be 3.

2705 max_delta: float. The maximum value for the random delta.

2706 seed: A shape [2] Tensor, the seed to the random number generator. Must have

2707 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)

2708

2709 Returns:

2710 Adjusted image(s), same shape and DType as `image`.

2711

2712 Raises:

2713 ValueError: if `max_delta` is invalid.

2714 """

2715 if max_delta > 0.5:

2716 raise ValueError('max_delta must be <= 0.5.')

2717

2718 if max_delta < 0:

2719 raise ValueError('max_delta must be non-negative.')

2720

2721 delta = stateless_random_ops.stateless_random_uniform(

2722 shape=[], minval=-max_delta, maxval=max_delta, seed=seed)

2723 return adjust_hue(image, delta)

2724

2725

2726@tf_export('image.adjust_hue')

2727@dispatch.add_dispatch_support

2728def adjust_hue(image, delta, name=None):

2729 """Adjust hue of RGB images.

2730

2731 This is a convenience method that converts an RGB image to float

2732 representation, converts it to HSV, adds an offset to the

2733 hue channel, converts back to RGB and then back to the original

2734 data type. If several adjustments are chained it is advisable to minimize

2735 the number of redundant conversions.

2736

2737 `image` is an RGB image. The image hue is adjusted by converting the

2738 image(s) to HSV and rotating the hue channel (H) by

2739 `delta`. The image is then converted back to RGB.

2740

2741 `delta` must be in the interval `[-1, 1]`.

2742

2743 Usage Example:

2744

2745 >>> x = [[[1.0, 2.0, 3.0],

2746 ... [4.0, 5.0, 6.0]],

2747 ... [[7.0, 8.0, 9.0],

2748 ... [10.0, 11.0, 12.0]]]

2749 >>> tf.image.adjust_hue(x, 0.2)

2750 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

2751 array([[[ 2.3999996, 1. , 3. ],

2752 [ 5.3999996, 4. , 6. ]],

2753 [[ 8.4 , 7. , 9. ],

2754 [11.4 , 10. , 12. ]]], dtype=float32)>

2755

2756 Args:

2757 image: RGB image or images. The size of the last dimension must be 3.

2758 delta: float. How much to add to the hue channel.

2759 name: A name for this operation (optional).

2760

2761 Returns:

2762 Adjusted image(s), same shape and DType as `image`.

2763

2764 Raises:

2765 InvalidArgumentError: image must have at least 3 dimensions.

2766 InvalidArgumentError: The size of the last dimension must be 3.

2767 ValueError: if `delta` is not in the interval of `[-1, 1]`.

2768

2769 Usage Example:

2770

2771 >>> image = [[[1, 2, 3], [4, 5, 6]],

2772 ... [[7, 8, 9], [10, 11, 12]],

2773 ... [[13, 14, 15], [16, 17, 18]]]

2774 >>> image = tf.constant(image)

2775 >>> tf.image.adjust_hue(image, 0.2)

2776 <tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy=

2777 array([[[ 2, 1, 3],

2778 [ 5, 4, 6]],

2779 [[ 8, 7, 9],

2780 [11, 10, 12]],

2781 [[14, 13, 15],

2782 [17, 16, 18]]], dtype=int32)>

2783 """

2784 with ops.name_scope(name, 'adjust_hue', [image]) as name:

2785 if context.executing_eagerly():

2786 if delta < -1 or delta > 1:

2787 raise ValueError('delta must be in the interval [-1, 1]')

2788 image = ops.convert_to_tensor(image, name='image')

2789 # Remember original dtype to so we can convert back if needed

2790 orig_dtype = image.dtype

2791 if orig_dtype in (dtypes.float16, dtypes.float32):

2792 flt_image = image

2793 else:

2794 flt_image = convert_image_dtype(image, dtypes.float32)

2795

2796 rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)

2797

2798 return convert_image_dtype(rgb_altered, orig_dtype)

2799

2800

2801# pylint: disable=invalid-name

2802@tf_export('image.random_jpeg_quality')

2803@dispatch.add_dispatch_support

2804def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None):

2805 """Randomly changes jpeg encoding quality for inducing jpeg noise.

2806

2807 `min_jpeg_quality` must be in the interval `[0, 100]` and less than

2808 `max_jpeg_quality`.

2809 `max_jpeg_quality` must be in the interval `[0, 100]`.

2810

2811 Usage Example:

2812

2813 >>> x = tf.constant([[[1, 2, 3],

2814 ... [4, 5, 6]],

2815 ... [[7, 8, 9],

2816 ... [10, 11, 12]]], dtype=tf.uint8)

2817 >>> tf.image.random_jpeg_quality(x, 75, 95)

2818 <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy=...>

2819

2820 For producing deterministic results given a `seed` value, use

2821 `tf.image.stateless_random_jpeg_quality`. Unlike using the `seed` param

2822 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the

2823 same results given the same seed independent of how many times the function is

2824 called, and independent of global seed settings (e.g. tf.random.set_seed).

2825

2826 Args:

2827 image: 3D image. Size of the last dimension must be 1 or 3.

2828 min_jpeg_quality: Minimum jpeg encoding quality to use.

2829 max_jpeg_quality: Maximum jpeg encoding quality to use.

2830 seed: An operation-specific seed. It will be used in conjunction with the

2831 graph-level seed to determine the real seeds that will be used in this

2832 operation. Please see the documentation of set_random_seed for its

2833 interaction with the graph-level random seed.

2834

2835 Returns:

2836 Adjusted image(s), same shape and DType as `image`.

2837

2838 Raises:

2839 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.

2840 """

2841 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or

2842 max_jpeg_quality > 100):

2843 raise ValueError('jpeg encoding range must be between 0 and 100.')

2844

2845 if min_jpeg_quality >= max_jpeg_quality:

2846 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')

2847

2848 jpeg_quality = random_ops.random_uniform([],

2849 min_jpeg_quality,

2850 max_jpeg_quality,

2851 seed=seed,

2852 dtype=dtypes.int32)

2853 return adjust_jpeg_quality(image, jpeg_quality)

2854

2855

2856@tf_export('image.stateless_random_jpeg_quality', v1=[])

2857@dispatch.add_dispatch_support

2858def stateless_random_jpeg_quality(image,

2859 min_jpeg_quality,

2860 max_jpeg_quality,

2861 seed):

2862 """Deterministically radomize jpeg encoding quality for inducing jpeg noise.

2863

2864 Guarantees the same results given the same `seed` independent of how many

2865 times the function is called, and independent of global seed settings (e.g.

2866 `tf.random.set_seed`).

2867

2868 `min_jpeg_quality` must be in the interval `[0, 100]` and less than

2869 `max_jpeg_quality`.

2870 `max_jpeg_quality` must be in the interval `[0, 100]`.

2871

2872 Usage Example:

2873

2874 >>> x = tf.constant([[[1, 2, 3],

2875 ... [4, 5, 6]],

2876 ... [[7, 8, 9],

2877 ... [10, 11, 12]]], dtype=tf.uint8)

2878 >>> seed = (1, 2)

2879 >>> tf.image.stateless_random_jpeg_quality(x, 75, 95, seed)

2880 <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy=

2881 array([[[ 0, 4, 5],

2882 [ 1, 5, 6]],

2883 [[ 5, 9, 10],

2884 [ 5, 9, 10]]], dtype=uint8)>

2885

2886 Args:

2887 image: 3D image. Size of the last dimension must be 1 or 3.

2888 min_jpeg_quality: Minimum jpeg encoding quality to use.

2889 max_jpeg_quality: Maximum jpeg encoding quality to use.

2890 seed: A shape [2] Tensor, the seed to the random number generator. Must have

2891 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)

2892

2893 Returns:

2894 Adjusted image(s), same shape and DType as `image`.

2895

2896 Raises:

2897 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.

2898 """

2899 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or

2900 max_jpeg_quality > 100):

2901 raise ValueError('jpeg encoding range must be between 0 and 100.')

2902

2903 if min_jpeg_quality >= max_jpeg_quality:

2904 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')

2905

2906 jpeg_quality = stateless_random_ops.stateless_random_uniform(

2907 shape=[], minval=min_jpeg_quality, maxval=max_jpeg_quality, seed=seed,

2908 dtype=dtypes.int32)

2909 return adjust_jpeg_quality(image, jpeg_quality)

2910

2911

2912@tf_export('image.adjust_jpeg_quality')

2913@dispatch.add_dispatch_support

2914def adjust_jpeg_quality(image, jpeg_quality, name=None):

2915 """Adjust jpeg encoding quality of an image.

2916

2917 This is a convenience method that converts an image to uint8 representation,

2918 encodes it to jpeg with `jpeg_quality`, decodes it, and then converts back

2919 to the original data type.

2920

2921 `jpeg_quality` must be in the interval `[0, 100]`.

2922

2923 Usage Examples:

2924

2925 >>> x = [[[0.01, 0.02, 0.03],

2926 ... [0.04, 0.05, 0.06]],

2927 ... [[0.07, 0.08, 0.09],

2928 ... [0.10, 0.11, 0.12]]]

2929 >>> x_jpeg = tf.image.adjust_jpeg_quality(x, 75)

2930 >>> x_jpeg.numpy()

2931 array([[[0.00392157, 0.01960784, 0.03137255],

2932 [0.02745098, 0.04313726, 0.05490196]],

2933 [[0.05882353, 0.07450981, 0.08627451],

2934 [0.08235294, 0.09803922, 0.10980393]]], dtype=float32)

2935

2936 Note that floating point values are expected to have values in the range

2937 [0,1) and values outside this range are clipped.

2938

2939 >>> x = [[[1.0, 2.0, 3.0],

2940 ... [4.0, 5.0, 6.0]],

2941 ... [[7.0, 8.0, 9.0],

2942 ... [10.0, 11.0, 12.0]]]

2943 >>> tf.image.adjust_jpeg_quality(x, 75)

2944 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

2945 array([[[1., 1., 1.],

2946 [1., 1., 1.]],

2947 [[1., 1., 1.],

2948 [1., 1., 1.]]], dtype=float32)>

2949

2950 Note that `jpeg_quality` 100 is still lossy compresson.

2951

2952 >>> x = tf.constant([[[1, 2, 3],

2953 ... [4, 5, 6]],

2954 ... [[7, 8, 9],

2955 ... [10, 11, 12]]], dtype=tf.uint8)

2956 >>> tf.image.adjust_jpeg_quality(x, 100)

2957 <tf.Tensor: shape(2, 2, 3), dtype=uint8, numpy=

2958 array([[[ 0, 1, 3],

2959 [ 3, 4, 6]],

2960 [[ 6, 7, 9],

2961 [ 9, 10, 12]]], dtype=uint8)>

2962

2963 Args:

2964 image: 3D image. The size of the last dimension must be None, 1 or 3.

2965 jpeg_quality: Python int or Tensor of type int32. jpeg encoding quality.

2966 name: A name for this operation (optional).

2967

2968 Returns:

2969 Adjusted image, same shape and DType as `image`.

2970

2971 Raises:

2972 InvalidArgumentError: quality must be in [0,100]

2973 InvalidArgumentError: image must have 1 or 3 channels

2974 """

2975 with ops.name_scope(name, 'adjust_jpeg_quality', [image]):

2976 image = ops.convert_to_tensor(image, name='image')

2977 channels = image.shape.as_list()[-1]

2978 # Remember original dtype to so we can convert back if needed

2979 orig_dtype = image.dtype

2980 image = convert_image_dtype(image, dtypes.uint8, saturate=True)

2981 if not _is_tensor(jpeg_quality):

2982 # If jpeg_quality is a int (not tensor).

2983 jpeg_quality = ops.convert_to_tensor(jpeg_quality, dtype=dtypes.int32)

2984 image = gen_image_ops.encode_jpeg_variable_quality(image, jpeg_quality)

2985

2986 image = gen_image_ops.decode_jpeg(image, channels=channels)

2987 return convert_image_dtype(image, orig_dtype, saturate=True)

2988

2989

2990@tf_export('image.random_saturation')

2991@dispatch.add_dispatch_support

2992def random_saturation(image, lower, upper, seed=None):

2993 """Adjust the saturation of RGB images by a random factor.

2994

2995 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly

2996 picked in the interval `[lower, upper)`.

2997

2998 Usage Example:

2999

3000 >>> x = [[[1.0, 2.0, 3.0],

3001 ... [4.0, 5.0, 6.0]],

3002 ... [[7.0, 8.0, 9.0],

3003 ... [10.0, 11.0, 12.0]]]

3004 >>> tf.image.random_saturation(x, 5, 10)

3005 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

3006 array([[[ 0. , 1.5, 3. ],

3007 [ 0. , 3. , 6. ]],

3008 [[ 0. , 4.5, 9. ],

3009 [ 0. , 6. , 12. ]]], dtype=float32)>

3010

3011 For producing deterministic results given a `seed` value, use

3012 `tf.image.stateless_random_saturation`. Unlike using the `seed` param

3013 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the

3014 same results given the same seed independent of how many times the function is

3015 called, and independent of global seed settings (e.g. tf.random.set_seed).

3016

3017 Args:

3018 image: RGB image or images. The size of the last dimension must be 3.

3019 lower: float. Lower bound for the random saturation factor.

3020 upper: float. Upper bound for the random saturation factor.

3021 seed: An operation-specific seed. It will be used in conjunction with the

3022 graph-level seed to determine the real seeds that will be used in this

3023 operation. Please see the documentation of set_random_seed for its

3024 interaction with the graph-level random seed.

3025

3026 Returns:

3027 Adjusted image(s), same shape and DType as `image`.

3028

3029 Raises:

3030 ValueError: if `upper <= lower` or if `lower < 0`.

3031 """

3032 if upper <= lower:

3033 raise ValueError('upper must be > lower.')

3034

3035 if lower < 0:

3036 raise ValueError('lower must be non-negative.')

3037

3038 saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed)

3039 return adjust_saturation(image, saturation_factor)

3040

3041

3042@tf_export('image.stateless_random_saturation', v1=[])

3043@dispatch.add_dispatch_support

3044def stateless_random_saturation(image, lower, upper, seed=None):

3045 """Adjust the saturation of RGB images by a random factor deterministically.

3046

3047 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly

3048 picked in the interval `[lower, upper)`.

3049

3050 Guarantees the same results given the same `seed` independent of how many

3051 times the function is called, and independent of global seed settings (e.g.

3052 `tf.random.set_seed`).

3053

3054 Usage Example:

3055

3056 >>> x = [[[1.0, 2.0, 3.0],

3057 ... [4.0, 5.0, 6.0]],

3058 ... [[7.0, 8.0, 9.0],

3059 ... [10.0, 11.0, 12.0]]]

3060 >>> seed = (1, 2)

3061 >>> tf.image.stateless_random_saturation(x, 0.5, 1.0, seed)

3062 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

3063 array([[[ 1.1559395, 2.0779698, 3. ],

3064 [ 4.1559396, 5.07797 , 6. ]],

3065 [[ 7.1559396, 8.07797 , 9. ],

3066 [10.155939 , 11.07797 , 12. ]]], dtype=float32)>

3067

3068 Args:

3069 image: RGB image or images. The size of the last dimension must be 3.

3070 lower: float. Lower bound for the random saturation factor.

3071 upper: float. Upper bound for the random saturation factor.

3072 seed: A shape [2] Tensor, the seed to the random number generator. Must have

3073 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)

3074

3075 Returns:

3076 Adjusted image(s), same shape and DType as `image`.

3077

3078 Raises:

3079 ValueError: if `upper <= lower` or if `lower < 0`.

3080 """

3081 if upper <= lower:

3082 raise ValueError('upper must be > lower.')

3083

3084 if lower < 0:

3085 raise ValueError('lower must be non-negative.')

3086

3087 saturation_factor = stateless_random_ops.stateless_random_uniform(

3088 shape=[], minval=lower, maxval=upper, seed=seed)

3089 return adjust_saturation(image, saturation_factor)

3090

3091

3092@tf_export('image.adjust_saturation')

3093@dispatch.add_dispatch_support

3094def adjust_saturation(image, saturation_factor, name=None):

3095 """Adjust saturation of RGB images.

3096

3097 This is a convenience method that converts RGB images to float

3098 representation, converts them to HSV, adds an offset to the

3099 saturation channel, converts back to RGB and then back to the original

3100 data type. If several adjustments are chained it is advisable to minimize

3101 the number of redundant conversions.

3102

3103 `image` is an RGB image or images. The image saturation is adjusted by

3104 converting the images to HSV and multiplying the saturation (S) channel by

3105 `saturation_factor` and clipping. The images are then converted back to RGB.

3106

3107 `saturation_factor` must be in the interval `[0, inf)`.

3108

3109 Usage Example:

3110

3111 >>> x = [[[1.0, 2.0, 3.0],

3112 ... [4.0, 5.0, 6.0]],

3113 ... [[7.0, 8.0, 9.0],

3114 ... [10.0, 11.0, 12.0]]]

3115 >>> tf.image.adjust_saturation(x, 0.5)

3116 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=

3117 array([[[ 2. , 2.5, 3. ],

3118 [ 5. , 5.5, 6. ]],

3119 [[ 8. , 8.5, 9. ],

3120 [11. , 11.5, 12. ]]], dtype=float32)>

3121

3122 Args:

3123 image: RGB image or images. The size of the last dimension must be 3.

3124 saturation_factor: float. Factor to multiply the saturation by.

3125 name: A name for this operation (optional).

3126

3127 Returns:

3128 Adjusted image(s), same shape and DType as `image`.

3129

3130 Raises:

3131 InvalidArgumentError: input must have 3 channels

3132 """

3133 with ops.name_scope(name, 'adjust_saturation', [image]) as name:

3134 image = ops.convert_to_tensor(image, name='image')

3135 # Remember original dtype to so we can convert back if needed

3136 orig_dtype = image.dtype

3137 if orig_dtype in (dtypes.float16, dtypes.float32):

3138 flt_image = image

3139 else:

3140 flt_image = convert_image_dtype(image, dtypes.float32)

3141

3142 adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor)

3143

3144 return convert_image_dtype(adjusted, orig_dtype)

3145

3146

3147@tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg'])

3148def is_jpeg(contents, name=None):

3149 r"""Convenience function to check if the 'contents' encodes a JPEG image.

3150

3151 Args:

3152 contents: 0-D `string`. The encoded image bytes.

3153 name: A name for the operation (optional)

3154

3155 Returns:

3156 A scalar boolean tensor indicating if 'contents' may be a JPEG image.

3157 is_jpeg is susceptible to false positives.

3158 """

3159 # Normal JPEGs start with \xff\xd8\xff\xe0

3160 # JPEG with EXIF starts with \xff\xd8\xff\xe1

3161 # Use \xff\xd8\xff to cover both.

3162 with ops.name_scope(name, 'is_jpeg'):

3163 substr = string_ops.substr(contents, 0, 3)

3164 return math_ops.equal(substr, b'\xff\xd8\xff', name=name)

3165

3166

3167def _is_png(contents, name=None):

3168 r"""Convenience function to check if the 'contents' encodes a PNG image.

3169

3170 Args:

3171 contents: 0-D `string`. The encoded image bytes.

3172 name: A name for the operation (optional)

3173

3174 Returns:

3175 A scalar boolean tensor indicating if 'contents' may be a PNG image.

3176 is_png is susceptible to false positives.

3177 """

3178 with ops.name_scope(name, 'is_png'):

3179 substr = string_ops.substr(contents, 0, 3)

3180 return math_ops.equal(substr, b'\211PN', name=name)

3181

3182

3183tf_export(

3184 'io.decode_and_crop_jpeg',

3185 'image.decode_and_crop_jpeg',

3186 v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])(

3187 dispatch.add_dispatch_support(gen_image_ops.decode_and_crop_jpeg))

3188

3189tf_export(

3190 'io.decode_bmp',

3191 'image.decode_bmp',

3192 v1=['io.decode_bmp', 'image.decode_bmp'])(

3193 dispatch.add_dispatch_support(gen_image_ops.decode_bmp))

3194tf_export(

3195 'io.decode_gif',

3196 'image.decode_gif',

3197 v1=['io.decode_gif', 'image.decode_gif'])(

3198 dispatch.add_dispatch_support(gen_image_ops.decode_gif))

3199tf_export(

3200 'io.decode_jpeg',

3201 'image.decode_jpeg',

3202 v1=['io.decode_jpeg', 'image.decode_jpeg'])(

3203 dispatch.add_dispatch_support(gen_image_ops.decode_jpeg))

3204tf_export(

3205 'io.decode_png',

3206 'image.decode_png',

3207 v1=['io.decode_png', 'image.decode_png'])(

3208 dispatch.add_dispatch_support(gen_image_ops.decode_png))

3209

3210tf_export(

3211 'io.encode_jpeg',

3212 'image.encode_jpeg',

3213 v1=['io.encode_jpeg', 'image.encode_jpeg'])(

3214 dispatch.add_dispatch_support(gen_image_ops.encode_jpeg))

3215tf_export(

3216 'io.extract_jpeg_shape',

3217 'image.extract_jpeg_shape',

3218 v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])(

3219 dispatch.add_dispatch_support(gen_image_ops.extract_jpeg_shape))

3220

3221

3222@tf_export('io.encode_png', 'image.encode_png')

3223@dispatch.add_dispatch_support

3224def encode_png(image, compression=-1, name=None):

3225 r"""PNG-encode an image.

3226

3227 `image` is a rank-N Tensor of type uint8 or uint16 with shape `batch_dims +

3228 [height, width, channels]`, where `channels` is:

3229

3230 * 1: for grayscale.

3231 * 2: for grayscale + alpha.

3232 * 3: for RGB.

3233 * 4: for RGBA.

3234

3235 The ZLIB compression level, `compression`, can be -1 for the PNG-encoder

3236 default or a value from 0 to 9. 9 is the highest compression level,

3237 generating the smallest output, but is slower.

3238

3239 Args:

3240 image: A `Tensor`. Must be one of the following types: `uint8`, `uint16`.

3241 Rank N >= 3 with shape `batch_dims + [height, width, channels]`.

3242 compression: An optional `int`. Defaults to `-1`. Compression level.

3243 name: A name for the operation (optional).

3244

3245 Returns:

3246 A `Tensor` of type `string`.

3247 """

3248 return gen_image_ops.encode_png(

3249 ops.convert_to_tensor(image), compression, name)

3250

3251

3252@tf_export(

3253 'io.decode_image',

3254 'image.decode_image',

3255 v1=['io.decode_image', 'image.decode_image'])

3256@dispatch.add_dispatch_support

3257def decode_image(contents,

3258 channels=None,

3259 dtype=dtypes.uint8,

3260 name=None,

3261 expand_animations=True):

3262 """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`.

3263

3264 Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the

3265 appropriate operation to convert the input bytes `string` into a `Tensor`

3266 of type `dtype`.

3267

3268 Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as

3269 opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D

3270 arrays `[height, width, num_channels]`. Make sure to take this into account

3271 when constructing your graph if you are intermixing GIF files with BMP, JPEG,

3272 and/or PNG files. Alternately, set the `expand_animations` argument of this

3273 function to `False`, in which case the op will return 3-dimensional tensors

3274 and will truncate animated GIF files to the first frame.

3275

3276 NOTE: If the first frame of an animated GIF does not occupy the entire

3277 canvas (maximum frame width x maximum frame height), then it fills the

3278 unoccupied areas (in the first frame) with zeros (black). For frames after the

3279 first frame that does not occupy the entire canvas, it uses the previous

3280 frame to fill the unoccupied areas.

3281

3282 Args:

3283 contents: A `Tensor` of type `string`. 0-D. The encoded image bytes.

3284 channels: An optional `int`. Defaults to `0`. Number of color channels for

3285 the decoded image.

3286 dtype: The desired DType of the returned `Tensor`.

3287 name: A name for the operation (optional)

3288 expand_animations: An optional `bool`. Defaults to `True`. Controls the

3289 shape of the returned op's output. If `True`, the returned op will produce

3290 a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all GIFs,

3291 whether animated or not. If, `False`, the returned op will produce a 3-D

3292 tensor for all file types and will truncate animated GIFs to the first

3293 frame.

3294

3295 Returns:

3296 `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on

3297 the file type and the value of the `expand_animations` parameter.

3298

3299 Raises:

3300 ValueError: On incorrect number of channels.

3301 """

3302 with ops.name_scope(name, 'decode_image'):

3303 channels = 0 if channels is None else channels

3304 if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]:

3305 dest_dtype = dtype

3306 dtype = dtypes.uint16

3307 return convert_image_dtype(

3308 gen_image_ops.decode_image(

3309 contents=contents,

3310 channels=channels,

3311 expand_animations=expand_animations,

3312 dtype=dtype), dest_dtype)

3313 else:

3314 return gen_image_ops.decode_image(

3315 contents=contents,

3316 channels=channels,

3317 expand_animations=expand_animations,

3318 dtype=dtype)

3319

3320

3321@tf_export('image.total_variation')

3322@dispatch.add_dispatch_support

3323def total_variation(images, name=None):

3324 """Calculate and return the total variation for one or more images.

3325

3326 The total variation is the sum of the absolute differences for neighboring

3327 pixel-values in the input images. This measures how much noise is in the

3328 images.

3329

3330 This can be used as a loss-function during optimization so as to suppress

3331 noise in images. If you have a batch of images, then you should calculate

3332 the scalar loss-value as the sum:

3333 `loss = tf.reduce_sum(tf.image.total_variation(images))`

3334

3335 This implements the anisotropic 2-D version of the formula described here:

3336

3337 https://en.wikipedia.org/wiki/Total_variation_denoising

3338

3339 Args:

3340 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor

3341 of shape `[height, width, channels]`.

3342 name: A name for the operation (optional).

3343

3344 Raises:

3345 ValueError: if images.shape is not a 3-D or 4-D vector.

3346

3347 Returns:

3348 The total variation of `images`.

3349

3350 If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the

3351 total variation for each image in the batch.

3352 If `images` was 3-D, return a scalar float with the total variation for

3353 that image.

3354 """

3355

3356 with ops.name_scope(name, 'total_variation'):

3357 ndims = images.get_shape().ndims

3358

3359 if ndims == 3:

3360 # The input is a single image with shape [height, width, channels].

3361

3362 # Calculate the difference of neighboring pixel-values.

3363 # The images are shifted one pixel along the height and width by slicing.

3364 pixel_dif1 = images[1:, :, :] - images[:-1, :, :]

3365 pixel_dif2 = images[:, 1:, :] - images[:, :-1, :]

3366

3367 # Sum for all axis. (None is an alias for all axis.)

3368 sum_axis = None

3369 elif ndims == 4:

3370 # The input is a batch of images with shape:

3371 # [batch, height, width, channels].

3372

3373 # Calculate the difference of neighboring pixel-values.

3374 # The images are shifted one pixel along the height and width by slicing.

3375 pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :]

3376 pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :]

3377

3378 # Only sum for the last 3 axis.

3379 # This results in a 1-D tensor with the total variation for each image.

3380 sum_axis = [1, 2, 3]

3381 else:

3382 raise ValueError('\'images\' must be either 3 or 4-dimensional.')

3383

3384 # Calculate the total variation by taking the absolute value of the

3385 # pixel-differences and summing over the appropriate axis.

3386 tot_var = (

3387 math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) +

3388 math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis))

3389

3390 return tot_var

3391

3392

3393@tf_export('image.sample_distorted_bounding_box', v1=[])

3394@dispatch.add_dispatch_support

3395def sample_distorted_bounding_box_v2(image_size,

3396 bounding_boxes,

3397 seed=0,

3398 min_object_covered=0.1,

3399 aspect_ratio_range=None,

3400 area_range=None,

3401 max_attempts=None,

3402 use_image_if_no_bounding_boxes=None,

3403 name=None):

3404 """Generate a single randomly distorted bounding box for an image.

3405

3406 Bounding box annotations are often supplied in addition to ground-truth labels

3407 in image recognition or object localization tasks. A common technique for

3408 training such a system is to randomly distort an image while preserving

3409 its content, i.e. *data augmentation*. This Op outputs a randomly distorted

3410 localization of an object, i.e. bounding box, given an `image_size`,

3411 `bounding_boxes` and a series of constraints.

3412

3413 The output of this Op is a single bounding box that may be used to crop the

3414 original image. The output is returned as 3 tensors: `begin`, `size` and

3415 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the

3416 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to

3417 visualize what the bounding box looks like.

3418

3419 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.

3420 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width

3421 and the height of the underlying image.

3422

3423 For example,

3424

3425 ```python

3426 # Generate a single distorted bounding box.

3427 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(

3428 tf.shape(image),

3429 bounding_boxes=bounding_boxes,

3430 min_object_covered=0.1)

3431

3432 # Draw the bounding box in an image summary.

3433 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),

3434 bbox_for_draw)

3435 tf.compat.v1.summary.image('images_with_box', image_with_box)

3436

3437 # Employ the bounding box to distort the image.

3438 distorted_image = tf.slice(image, begin, size)

3439 ```

3440

3441 Note that if no bounding box information is available, setting

3442 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit

3443 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is

3444 false and no bounding boxes are supplied, an error is raised.

3445

3446 For producing deterministic results given a `seed` value, use

3447 `tf.image.stateless_sample_distorted_bounding_box`. Unlike using the `seed`

3448 param with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops

3449 guarantee the same results given the same seed independent of how many times

3450 the function is called, and independent of global seed settings

3451 (e.g. tf.random.set_seed).

3452

3453 Args:

3454 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,

3455 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.

3456 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`

3457 describing the N bounding boxes associated with the image.

3458 seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the

3459 random number generator is seeded by the given `seed`. Otherwise, it is

3460 seeded by a random seed.

3461 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The

3462 cropped area of the image must contain at least this fraction of any

3463 bounding box supplied. The value of this parameter should be non-negative.

3464 In the case of 0, the cropped area does not need to overlap any of the

3465 bounding boxes supplied.

3466 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,

3467 1.33]`. The cropped area of the image must have an aspect `ratio = width /

3468 height` within this range.

3469 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The

3470 cropped area of the image must contain a fraction of the supplied image

3471 within this range.

3472 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at

3473 generating a cropped region of the image of the specified constraints.

3474 After `max_attempts` failures, return the entire image.

3475 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.

3476 Controls behavior if no bounding boxes supplied. If true, assume an

3477 implicit bounding box covering the whole input. If false, raise an error.

3478 name: A name for the operation (optional).

3479

3480 Returns:

3481 A tuple of `Tensor` objects (begin, size, bboxes).

3482

3483 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing

3484 `[offset_height, offset_width, 0]`. Provide as input to

3485 `tf.slice`.

3486 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing

3487 `[target_height, target_width, -1]`. Provide as input to

3488 `tf.slice`.

3489 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing

3490 the distorted bounding box.

3491 Provide as input to `tf.image.draw_bounding_boxes`.

3492

3493 Raises:

3494 ValueError: If no seed is specified and op determinism is enabled.

3495 """

3496 if seed:

3497 seed1, seed2 = random_seed.get_seed(seed)

3498 else:

3499 if config.is_op_determinism_enabled():

3500 raise ValueError(

3501 f'tf.image.sample_distorted_bounding_box requires a non-zero seed to '

3502 f'be passed in when determinism is enabled, but got seed={seed}. '

3503 f'Please pass in a non-zero seed, e.g. by passing "seed=1".')

3504 seed1, seed2 = (0, 0)

3505 with ops.name_scope(name, 'sample_distorted_bounding_box'):

3506 return gen_image_ops.sample_distorted_bounding_box_v2(

3507 image_size,

3508 bounding_boxes,

3509 seed=seed1,

3510 seed2=seed2,

3511 min_object_covered=min_object_covered,

3512 aspect_ratio_range=aspect_ratio_range,

3513 area_range=area_range,

3514 max_attempts=max_attempts,

3515 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,

3516 name=name)

3517

3518

3519@tf_export('image.stateless_sample_distorted_bounding_box', v1=[])

3520@dispatch.add_dispatch_support

3521def stateless_sample_distorted_bounding_box(image_size,

3522 bounding_boxes,

3523 seed,

3524 min_object_covered=0.1,

3525 aspect_ratio_range=None,

3526 area_range=None,

3527 max_attempts=None,

3528 use_image_if_no_bounding_boxes=None,

3529 name=None):

3530 """Generate a randomly distorted bounding box for an image deterministically.

3531

3532 Bounding box annotations are often supplied in addition to ground-truth labels

3533 in image recognition or object localization tasks. A common technique for

3534 training such a system is to randomly distort an image while preserving

3535 its content, i.e. *data augmentation*. This Op, given the same `seed`,

3536 deterministically outputs a randomly distorted localization of an object, i.e.

3537 bounding box, given an `image_size`, `bounding_boxes` and a series of

3538 constraints.

3539

3540 The output of this Op is a single bounding box that may be used to crop the

3541 original image. The output is returned as 3 tensors: `begin`, `size` and

3542 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the

3543 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to

3544 visualize what the bounding box looks like.

3545

3546 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.

3547 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width

3548 and the height of the underlying image.

3549

3550 The output of this Op is guaranteed to be the same given the same `seed` and

3551 is independent of how many times the function is called, and independent of

3552 global seed settings (e.g. `tf.random.set_seed`).

3553

3554 Example usage:

3555

3556 >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]])

3557 >>> bbox = tf.constant(

3558 ... [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])

3559 >>> seed = (1, 2)

3560 >>> # Generate a single distorted bounding box.

3561 >>> bbox_begin, bbox_size, bbox_draw = (

3562 ... tf.image.stateless_sample_distorted_bounding_box(

3563 ... tf.shape(image), bounding_boxes=bbox, seed=seed))

3564 >>> # Employ the bounding box to distort the image.

3565 >>> tf.slice(image, bbox_begin, bbox_size)

3566 <tf.Tensor: shape=(2, 2, 1), dtype=int64, numpy=

3567 array([[[1],

3568 [2]],

3569 [[4],

3570 [5]]])>

3571 >>> # Draw the bounding box in an image summary.

3572 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])

3573 >>> tf.image.draw_bounding_boxes(

3574 ... tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors)

3575 <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=

3576 array([[[[1.],

3577 [1.],

3578 [3.]],

3579 [[1.],

3580 [1.],

3581 [6.]],

3582 [[7.],

3583 [8.],

3584 [9.]]]], dtype=float32)>

3585

3586 Note that if no bounding box information is available, setting

3587 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit

3588 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is

3589 false and no bounding boxes are supplied, an error is raised.

3590

3591 Args:

3592 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,

3593 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.

3594 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`

3595 describing the N bounding boxes associated with the image.

3596 seed: A shape [2] Tensor, the seed to the random number generator. Must have

3597 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)

3598 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The

3599 cropped area of the image must contain at least this fraction of any

3600 bounding box supplied. The value of this parameter should be non-negative.

3601 In the case of 0, the cropped area does not need to overlap any of the

3602 bounding boxes supplied.

3603 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,

3604 1.33]`. The cropped area of the image must have an aspect `ratio = width /

3605 height` within this range.

3606 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The

3607 cropped area of the image must contain a fraction of the supplied image

3608 within this range.

3609 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at

3610 generating a cropped region of the image of the specified constraints.

3611 After `max_attempts` failures, return the entire image.

3612 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.

3613 Controls behavior if no bounding boxes supplied. If true, assume an

3614 implicit bounding box covering the whole input. If false, raise an error.

3615 name: A name for the operation (optional).

3616

3617 Returns:

3618 A tuple of `Tensor` objects (begin, size, bboxes).

3619

3620 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing

3621 `[offset_height, offset_width, 0]`. Provide as input to

3622 `tf.slice`.

3623 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing

3624 `[target_height, target_width, -1]`. Provide as input to

3625 `tf.slice`.

3626 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing

3627 the distorted bounding box.

3628 Provide as input to `tf.image.draw_bounding_boxes`.

3629 """

3630 with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'):

3631 return gen_image_ops.stateless_sample_distorted_bounding_box(

3632 image_size=image_size,

3633 bounding_boxes=bounding_boxes,

3634 seed=seed,

3635 min_object_covered=min_object_covered,

3636 aspect_ratio_range=aspect_ratio_range,

3637 area_range=area_range,

3638 max_attempts=max_attempts,

3639 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,

3640 name=name)

3641

3642

3643@tf_export(v1=['image.sample_distorted_bounding_box'])

3644@dispatch.add_dispatch_support

3645@deprecation.deprecated(

3646 date=None,

3647 instructions='`seed2` arg is deprecated.'

3648 'Use sample_distorted_bounding_box_v2 instead.')

3649def sample_distorted_bounding_box(image_size,

3650 bounding_boxes,

3651 seed=None,

3652 seed2=None,

3653 min_object_covered=0.1,

3654 aspect_ratio_range=None,

3655 area_range=None,

3656 max_attempts=None,

3657 use_image_if_no_bounding_boxes=None,

3658 name=None):

3659 """Generate a single randomly distorted bounding box for an image.

3660

3661 Bounding box annotations are often supplied in addition to ground-truth labels

3662 in image recognition or object localization tasks. A common technique for

3663 training such a system is to randomly distort an image while preserving

3664 its content, i.e. *data augmentation*. This Op outputs a randomly distorted

3665 localization of an object, i.e. bounding box, given an `image_size`,

3666 `bounding_boxes` and a series of constraints.

3667

3668 The output of this Op is a single bounding box that may be used to crop the

3669 original image. The output is returned as 3 tensors: `begin`, `size` and

3670 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the

3671 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to

3672 visualize what the bounding box looks like.

3673

3674 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.

3675 The

3676 bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and

3677 height of the underlying image.

3678

3679 For example,

3680

3681 ```python

3682 # Generate a single distorted bounding box.

3683 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(

3684 tf.shape(image),

3685 bounding_boxes=bounding_boxes,

3686 min_object_covered=0.1)

3687

3688 # Draw the bounding box in an image summary.

3689 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),

3690 bbox_for_draw)

3691 tf.compat.v1.summary.image('images_with_box', image_with_box)

3692

3693 # Employ the bounding box to distort the image.

3694 distorted_image = tf.slice(image, begin, size)

3695 ```

3696

3697 Note that if no bounding box information is available, setting

3698 `use_image_if_no_bounding_boxes = True` will assume there is a single implicit

3699 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is

3700 false and no bounding boxes are supplied, an error is raised.

3701

3702 Args:

3703 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,

3704 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.

3705 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`

3706 describing the N bounding boxes associated with the image.

3707 seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are

3708 set to non-zero, the random number generator is seeded by the given

3709 `seed`. Otherwise, it is seeded by a random seed.

3710 seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed

3711 collision.

3712 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The

3713 cropped area of the image must contain at least this fraction of any

3714 bounding box supplied. The value of this parameter should be non-negative.

3715 In the case of 0, the cropped area does not need to overlap any of the

3716 bounding boxes supplied.

3717 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,

3718 1.33]`. The cropped area of the image must have an aspect ratio = width /

3719 height within this range.

3720 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The

3721 cropped area of the image must contain a fraction of the supplied image

3722 within this range.

3723 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at

3724 generating a cropped region of the image of the specified constraints.

3725 After `max_attempts` failures, return the entire image.

3726 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.

3727 Controls behavior if no bounding boxes supplied. If true, assume an

3728 implicit bounding box covering the whole input. If false, raise an error.

3729 name: A name for the operation (optional).

3730

3731 Returns:

3732 A tuple of `Tensor` objects (begin, size, bboxes).

3733

3734 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing

3735 `[offset_height, offset_width, 0]`. Provide as input to

3736 `tf.slice`.

3737 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing

3738 `[target_height, target_width, -1]`. Provide as input to

3739 `tf.slice`.

3740 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing

3741 the distorted bounding box.

3742 Provide as input to `tf.image.draw_bounding_boxes`.

3743

3744 Raises:

3745 ValueError: If no seed is specified and op determinism is enabled.

3746 """

3747 if not seed and not seed2 and config.is_op_determinism_enabled():

3748 raise ValueError(

3749 f'tf.compat.v1.image.sample_distorted_bounding_box requires "seed" or '

3750 f'"seed2" to be non-zero when determinism is enabled. Please pass in '

3751 f'a non-zero seed, e.g. by passing "seed=1". Got seed={seed} and '

3752 f"seed2={seed2}")

3753 with ops.name_scope(name, 'sample_distorted_bounding_box'):

3754 return gen_image_ops.sample_distorted_bounding_box_v2(

3755 image_size,

3756 bounding_boxes,

3757 seed=seed,

3758 seed2=seed2,

3759 min_object_covered=min_object_covered,

3760 aspect_ratio_range=aspect_ratio_range,

3761 area_range=area_range,

3762 max_attempts=max_attempts,

3763 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,

3764 name=name)

3765

3766

3767@tf_export('image.non_max_suppression')

3768@dispatch.add_dispatch_support

3769def non_max_suppression(boxes,

3770 scores,

3771 max_output_size,

3772 iou_threshold=0.5,

3773 score_threshold=float('-inf'),

3774 name=None):

3775 """Greedily selects a subset of bounding boxes in descending order of score.

3776

3777 Prunes away boxes that have high intersection-over-union (IOU) overlap

3778 with previously selected boxes. Bounding boxes are supplied as

3779 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any

3780 diagonal pair of box corners and the coordinates can be provided as normalized

3781 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm

3782 is agnostic to where the origin is in the coordinate system. Note that this

3783 algorithm is invariant to orthogonal transformations and translations

3784 of the coordinate system; thus translating or reflections of the coordinate

3785 system result in the same boxes being selected by the algorithm.

3786 The output of this operation is a set of integers indexing into the input

3787 collection of bounding boxes representing the selected boxes. The bounding

3788 box coordinates corresponding to the selected indices can then be obtained

3789 using the `tf.gather` operation. For example:

3790 ```python

3791 selected_indices = tf.image.non_max_suppression(

3792 boxes, scores, max_output_size, iou_threshold)

3793 selected_boxes = tf.gather(boxes, selected_indices)

3794 ```

3795

3796 Args:

3797 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.

3798 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single

3799 score corresponding to each box (each row of boxes).

3800 max_output_size: A scalar integer `Tensor` representing the maximum number

3801 of boxes to be selected by non-max suppression.

3802 iou_threshold: A 0-D float tensor representing the threshold for deciding

3803 whether boxes overlap too much with respect to IOU.

3804 score_threshold: A 0-D float tensor representing the threshold for deciding

3805 when to remove boxes based on score.

3806 name: A name for the operation (optional).

3807

3808 Returns:

3809 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the

3810 selected indices from the boxes tensor, where `M <= max_output_size`.

3811 """

3812 with ops.name_scope(name, 'non_max_suppression'):

3813 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')

3814 score_threshold = ops.convert_to_tensor(

3815 score_threshold, name='score_threshold')

3816 return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size,

3817 iou_threshold, score_threshold)

3818

3819

3820@tf_export('image.non_max_suppression_with_scores')

3821@dispatch.add_dispatch_support

3822def non_max_suppression_with_scores(boxes,

3823 scores,

3824 max_output_size,

3825 iou_threshold=0.5,

3826 score_threshold=float('-inf'),

3827 soft_nms_sigma=0.0,

3828 name=None):

3829 """Greedily selects a subset of bounding boxes in descending order of score.

3830

3831 Prunes away boxes that have high intersection-over-union (IOU) overlap

3832 with previously selected boxes. Bounding boxes are supplied as

3833 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any

3834 diagonal pair of box corners and the coordinates can be provided as normalized

3835 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm

3836 is agnostic to where the origin is in the coordinate system. Note that this

3837 algorithm is invariant to orthogonal transformations and translations

3838 of the coordinate system; thus translating or reflections of the coordinate

3839 system result in the same boxes being selected by the algorithm.

3840 The output of this operation is a set of integers indexing into the input

3841 collection of bounding boxes representing the selected boxes. The bounding

3842 box coordinates corresponding to the selected indices can then be obtained

3843 using the `tf.gather` operation. For example:

3844 ```python

3845 selected_indices, selected_scores = tf.image.non_max_suppression_padded(

3846 boxes, scores, max_output_size, iou_threshold=1.0, score_threshold=0.1,

3847 soft_nms_sigma=0.5)

3848 selected_boxes = tf.gather(boxes, selected_indices)

3849 ```

3850

3851 This function generalizes the `tf.image.non_max_suppression` op by also

3852 supporting a Soft-NMS (with Gaussian weighting) mode (c.f.

3853 Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score

3854 of other overlapping boxes instead of directly causing them to be pruned.

3855 Consequently, in contrast to `tf.image.non_max_suppression`,

3856 `tf.image.non_max_suppression_with_scores` returns the new scores of each

3857 input box in the second output, `selected_scores`.

3858

3859 To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be

3860 larger than 0. When `soft_nms_sigma` equals 0, the behavior of

3861 `tf.image.non_max_suppression_with_scores` is identical to that of

3862 `tf.image.non_max_suppression` (except for the extra output) both in function

3863 and in running time.

3864

3865 Note that when `soft_nms_sigma` > 0, Soft-NMS is performed and `iou_threshold`

3866 is ignored. `iou_threshold` is only used for standard NMS.

3867

3868 Args:

3869 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.

3870 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single

3871 score corresponding to each box (each row of boxes).

3872 max_output_size: A scalar integer `Tensor` representing the maximum number

3873 of boxes to be selected by non-max suppression.

3874 iou_threshold: A 0-D float tensor representing the threshold for deciding

3875 whether boxes overlap too much with respect to IOU.

3876 score_threshold: A 0-D float tensor representing the threshold for deciding

3877 when to remove boxes based on score.

3878 soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft

3879 NMS; see Bodla et al (c.f. https://arxiv.org/abs/1704.04503). When

3880 `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)

3881 NMS.

3882 name: A name for the operation (optional).

3883

3884 Returns:

3885 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the

3886 selected indices from the boxes tensor, where `M <= max_output_size`.

3887 selected_scores: A 1-D float tensor of shape `[M]` representing the

3888 corresponding scores for each selected box, where `M <= max_output_size`.

3889 Scores only differ from corresponding input scores when using Soft NMS

3890 (i.e. when `soft_nms_sigma>0`)

3891 """

3892 with ops.name_scope(name, 'non_max_suppression_with_scores'):

3893 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')

3894 score_threshold = ops.convert_to_tensor(

3895 score_threshold, name='score_threshold')

3896 soft_nms_sigma = ops.convert_to_tensor(

3897 soft_nms_sigma, name='soft_nms_sigma')

3898 (selected_indices, selected_scores,

3899 _) = gen_image_ops.non_max_suppression_v5(

3900 boxes,

3901 scores,

3902 max_output_size,

3903 iou_threshold,

3904 score_threshold,

3905 soft_nms_sigma,

3906 pad_to_max_output_size=False)

3907 return selected_indices, selected_scores

3908

3909

3910@tf_export('image.non_max_suppression_overlaps')

3911@dispatch.add_dispatch_support

3912def non_max_suppression_with_overlaps(overlaps,

3913 scores,

3914 max_output_size,

3915 overlap_threshold=0.5,

3916 score_threshold=float('-inf'),

3917 name=None):

3918 """Greedily selects a subset of bounding boxes in descending order of score.

3919

3920 Prunes away boxes that have high overlap with previously selected boxes.

3921 N-by-n overlap values are supplied as square matrix.

3922 The output of this operation is a set of integers indexing into the input

3923 collection of bounding boxes representing the selected boxes. The bounding

3924 box coordinates corresponding to the selected indices can then be obtained

3925 using the `tf.gather` operation. For example:

3926 ```python

3927 selected_indices = tf.image.non_max_suppression_overlaps(

3928 overlaps, scores, max_output_size, iou_threshold)

3929 selected_boxes = tf.gather(boxes, selected_indices)

3930 ```

3931

3932 Args:

3933 overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]`

3934 representing the n-by-n box overlap values.

3935 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single

3936 score corresponding to each box (each row of boxes).

3937 max_output_size: A scalar integer `Tensor` representing the maximum number

3938 of boxes to be selected by non-max suppression.

3939 overlap_threshold: A 0-D float tensor representing the threshold for

3940 deciding whether boxes overlap too much with respect to the provided

3941 overlap values.

3942 score_threshold: A 0-D float tensor representing the threshold for deciding

3943 when to remove boxes based on score.

3944 name: A name for the operation (optional).

3945

3946 Returns:

3947 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the

3948 selected indices from the overlaps tensor, where `M <= max_output_size`.

3949 """

3950 with ops.name_scope(name, 'non_max_suppression_overlaps'):

3951 overlap_threshold = ops.convert_to_tensor(

3952 overlap_threshold, name='overlap_threshold')

3953 # pylint: disable=protected-access

3954 return gen_image_ops.non_max_suppression_with_overlaps(

3955 overlaps, scores, max_output_size, overlap_threshold, score_threshold)

3956 # pylint: enable=protected-access

3957

3958

3959_rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115],

3960 [0.587, -0.27455667, -0.52273617],

3961 [0.114, -0.32134392, 0.31119955]]

3962

3963

3964@tf_export('image.rgb_to_yiq')

3965@dispatch.add_dispatch_support

3966def rgb_to_yiq(images):

3967 """Converts one or more images from RGB to YIQ.

3968

3969 Outputs a tensor of the same shape as the `images` tensor, containing the YIQ

3970 value of the pixels.

3971 The output is only well defined if the value in images are in [0,1].

3972

3973 Usage Example:

3974

3975 >>> x = tf.constant([[[1.0, 2.0, 3.0]]])

3976 >>> tf.image.rgb_to_yiq(x)

3977 <tf.Tensor: shape=(1, 1, 3), dtype=float32,

3978 numpy=array([[[ 1.815 , -0.91724455, 0.09962624]]], dtype=float32)>

3979

3980 Args:

3981 images: 2-D or higher rank. Image data to convert. Last dimension must be

3982 size 3.

3983

3984 Returns:

3985 images: tensor with the same shape as `images`.

3986 """

3987 images = ops.convert_to_tensor(images, name='images')

3988 kernel = ops.convert_to_tensor(

3989 _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel')

3990 ndims = images.get_shape().ndims

3991 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])

3992

3993

3994_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021],

3995 [0.6208248, -0.64720424, 1.70423049]]

3996

3997

3998@tf_export('image.yiq_to_rgb')

3999@dispatch.add_dispatch_support

4000def yiq_to_rgb(images):

4001 """Converts one or more images from YIQ to RGB.

4002

4003 Outputs a tensor of the same shape as the `images` tensor, containing the RGB

4004 value of the pixels.

4005 The output is only well defined if the Y value in images are in [0,1],

4006 I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226].

4007

4008 Args:

4009 images: 2-D or higher rank. Image data to convert. Last dimension must be

4010 size 3.

4011

4012 Returns:

4013 images: tensor with the same shape as `images`.

4014 """

4015 images = ops.convert_to_tensor(images, name='images')

4016 kernel = ops.convert_to_tensor(

4017 _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel')

4018 ndims = images.get_shape().ndims

4019 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])

4020

4021

4022_rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538],

4023 [0.587, -0.28886916, -0.51496512],

4024 [0.114, 0.43601035, -0.10001026]]

4025

4026

4027@tf_export('image.rgb_to_yuv')

4028@dispatch.add_dispatch_support

4029def rgb_to_yuv(images):

4030 """Converts one or more images from RGB to YUV.

4031

4032 Outputs a tensor of the same shape as the `images` tensor, containing the YUV

4033 value of the pixels.

4034 The output is only well defined if the value in images are in [0, 1].

4035 There are two ways of representing an image: [0, 255] pixel values range or

4036 [0, 1] (as float) pixel values range. Users need to convert the input image

4037 into a float [0, 1] range.

4038

4039 Args:

4040 images: 2-D or higher rank. Image data to convert. Last dimension must be

4041 size 3.

4042

4043 Returns:

4044 images: tensor with the same shape as `images`.

4045 """

4046 images = ops.convert_to_tensor(images, name='images')

4047 kernel = ops.convert_to_tensor(

4048 _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel')

4049 ndims = images.get_shape().ndims

4050 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])

4051

4052

4053_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185],

4054 [1.13988303, -0.58062185, 0]]

4055

4056

4057@tf_export('image.yuv_to_rgb')

4058@dispatch.add_dispatch_support

4059def yuv_to_rgb(images):

4060 """Converts one or more images from YUV to RGB.

4061

4062 Outputs a tensor of the same shape as the `images` tensor, containing the RGB

4063 value of the pixels.

4064 The output is only well defined if the Y value in images are in [0,1],

4065 U and V value are in [-0.5,0.5].

4066

4067 As per the above description, you need to scale your YUV images if their

4068 pixel values are not in the required range. Below given example illustrates

4069 preprocessing of each channel of images before feeding them to `yuv_to_rgb`.

4070

4071 ```python

4072 yuv_images = tf.random.uniform(shape=[100, 64, 64, 3], maxval=255)

4073 last_dimension_axis = len(yuv_images.shape) - 1

4074 yuv_tensor_images = tf.truediv(

4075 tf.subtract(

4076 yuv_images,

4077 tf.reduce_min(yuv_images)

4078 ),

4079 tf.subtract(

4080 tf.reduce_max(yuv_images),

4081 tf.reduce_min(yuv_images)

4082 )

4083 )

4084 y, u, v = tf.split(yuv_tensor_images, 3, axis=last_dimension_axis)

4085 target_uv_min, target_uv_max = -0.5, 0.5

4086 u = u * (target_uv_max - target_uv_min) + target_uv_min

4087 v = v * (target_uv_max - target_uv_min) + target_uv_min

4088 preprocessed_yuv_images = tf.concat([y, u, v], axis=last_dimension_axis)

4089 rgb_tensor_images = tf.image.yuv_to_rgb(preprocessed_yuv_images)

4090 ```

4091

4092 Args:

4093 images: 2-D or higher rank. Image data to convert. Last dimension must be

4094 size 3.

4095

4096 Returns:

4097 images: tensor with the same shape as `images`.

4098 """

4099 images = ops.convert_to_tensor(images, name='images')

4100 kernel = ops.convert_to_tensor(

4101 _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel')

4102 ndims = images.get_shape().ndims

4103 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])

4104

4105

4106def _verify_compatible_image_shapes(img1, img2):

4107 """Checks if two image tensors are compatible for applying SSIM or PSNR.

4108

4109 This function checks if two sets of images have ranks at least 3, and if the

4110 last three dimensions match.

4111

4112 Args:

4113 img1: Tensor containing the first image batch.

4114 img2: Tensor containing the second image batch.

4115

4116 Returns:

4117 A tuple containing: the first tensor shape, the second tensor shape, and a

4118 list of control_flow_ops.Assert() ops implementing the checks.

4119

4120 Raises:

4121 ValueError: When static shape check fails.

4122 """

4123 shape1 = img1.get_shape().with_rank_at_least(3)

4124 shape2 = img2.get_shape().with_rank_at_least(3)

4125 shape1[-3:].assert_is_compatible_with(shape2[-3:])

4126

4127 if shape1.ndims is not None and shape2.ndims is not None:

4128 for dim1, dim2 in zip(

4129 reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])):

4130 if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)):

4131 raise ValueError('Two images are not compatible: %s and %s' %

4132 (shape1, shape2))

4133

4134 # Now assign shape tensors.

4135 shape1, shape2 = array_ops.shape_n([img1, img2])

4136

4137 # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable.

4138 checks = []

4139 checks.append(

4140 control_flow_assert.Assert(

4141 math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2],

4142 summarize=10))

4143 checks.append(

4144 control_flow_assert.Assert(

4145 math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])),

4146 [shape1, shape2],

4147 summarize=10))

4148 return shape1, shape2, checks

4149

4150

4151@tf_export('image.psnr')

4152@dispatch.add_dispatch_support

4153def psnr(a, b, max_val, name=None):

4154 """Returns the Peak Signal-to-Noise Ratio between a and b.

4155

4156 This is intended to be used on signals (or images). Produces a PSNR value for

4157 each image in batch.

4158

4159 The last three dimensions of input are expected to be [height, width, depth].

4160

4161 Example:

4162

4163 ```python

4164 # Read images from file.

4165 im1 = tf.decode_png('path/to/im1.png')

4166 im2 = tf.decode_png('path/to/im2.png')

4167 # Compute PSNR over tf.uint8 Tensors.

4168 psnr1 = tf.image.psnr(im1, im2, max_val=255)

4169

4170 # Compute PSNR over tf.float32 Tensors.

4171 im1 = tf.image.convert_image_dtype(im1, tf.float32)

4172 im2 = tf.image.convert_image_dtype(im2, tf.float32)

4173 psnr2 = tf.image.psnr(im1, im2, max_val=1.0)

4174 # psnr1 and psnr2 both have type tf.float32 and are almost equal.

4175 ```

4176

4177 Args:

4178 a: First set of images.

4179 b: Second set of images.

4180 max_val: The dynamic range of the images (i.e., the difference between the

4181 maximum the and minimum allowed values).

4182 name: Namespace to embed the computation in.

4183

4184 Returns:

4185 The scalar PSNR between a and b. The returned tensor has type `tf.float32`

4186 and shape [batch_size, 1].

4187 """

4188 with ops.name_scope(name, 'PSNR', [a, b]):

4189 # Need to convert the images to float32. Scale max_val accordingly so that

4190 # PSNR is computed correctly.

4191 max_val = math_ops.cast(max_val, a.dtype)

4192 max_val = convert_image_dtype(max_val, dtypes.float32)

4193 a = convert_image_dtype(a, dtypes.float32)

4194 b = convert_image_dtype(b, dtypes.float32)

4195 mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1])

4196 psnr_val = math_ops.subtract(

4197 20 * math_ops.log(max_val) / math_ops.log(10.0),

4198 np.float32(10 / np.log(10)) * math_ops.log(mse),

4199 name='psnr')

4200

4201 _, _, checks = _verify_compatible_image_shapes(a, b)

4202 with ops.control_dependencies(checks):

4203 return array_ops.identity(psnr_val)

4204

4205

4206def _ssim_helper(x, y, reducer, max_val, compensation=1.0, k1=0.01, k2=0.03):

4207 r"""Helper function for computing SSIM.

4208

4209 SSIM estimates covariances with weighted sums. The default parameters

4210 use a biased estimate of the covariance:

4211 Suppose `reducer` is a weighted sum, then the mean estimators are

4212 \mu_x = \sum_i w_i x_i,

4213 \mu_y = \sum_i w_i y_i,

4214 where w_i's are the weighted-sum weights, and covariance estimator is

4215 cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)

4216 with assumption \sum_i w_i = 1. This covariance estimator is biased, since

4217 E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y).

4218 For SSIM measure with unbiased covariance estimators, pass as `compensation`

4219 argument (1 - \sum_i w_i ^ 2).

4220

4221 Args:

4222 x: First set of images.

4223 y: Second set of images.

4224 reducer: Function that computes 'local' averages from the set of images. For

4225 non-convolutional version, this is usually tf.reduce_mean(x, [1, 2]), and

4226 for convolutional version, this is usually tf.nn.avg_pool2d or

4227 tf.nn.conv2d with weighted-sum kernel.

4228 max_val: The dynamic range (i.e., the difference between the maximum

4229 possible allowed value and the minimum allowed value).

4230 compensation: Compensation factor. See above.

4231 k1: Default value 0.01

4232 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so

4233 it would be better if we took the values in the range of 0 < K2 < 0.4).

4234

4235 Returns:

4236 A pair containing the luminance measure, and the contrast-structure measure.

4237 """

4238

4239 c1 = (k1 * max_val)**2

4240 c2 = (k2 * max_val)**2

4241

4242 # SSIM luminance measure is

4243 # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1).

4244 mean0 = reducer(x)

4245 mean1 = reducer(y)

4246 num0 = mean0 * mean1 * 2.0

4247 den0 = math_ops.square(mean0) + math_ops.square(mean1)

4248 luminance = (num0 + c1) / (den0 + c1)

4249

4250 # SSIM contrast-structure measure is

4251 # (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2).

4252 # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then

4253 # cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)

4254 # = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j).

4255 num1 = reducer(x * y) * 2.0

4256 den1 = reducer(math_ops.square(x) + math_ops.square(y))

4257 c2 *= compensation

4258 cs = (num1 - num0 + c2) / (den1 - den0 + c2)

4259

4260 # SSIM score is the product of the luminance and contrast-structure measures.

4261 return luminance, cs

4262

4263

4264def _fspecial_gauss(size, sigma):

4265 """Function to mimic the 'fspecial' gaussian MATLAB function."""

4266 size = ops.convert_to_tensor(size, dtypes.int32)

4267 sigma = ops.convert_to_tensor(sigma)

4268

4269 coords = math_ops.cast(math_ops.range(size), sigma.dtype)

4270 coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0

4271

4272 g = math_ops.square(coords)

4273 g *= -0.5 / math_ops.square(sigma)

4274

4275 g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1])

4276 g = array_ops.reshape(g, shape=[1, -1]) # For tf.nn.softmax().

4277 g = nn_ops.softmax(g)

4278 return array_ops.reshape(g, shape=[size, size, 1, 1])

4279

4280

4281def _ssim_per_channel(img1,

4282 img2,

4283 max_val=1.0,

4284 filter_size=11,

4285 filter_sigma=1.5,

4286 k1=0.01,

4287 k2=0.03,

4288 return_index_map=False):

4289 """Computes SSIM index between img1 and img2 per color channel.

4290

4291 This function matches the standard SSIM implementation from:

4292 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image

4293 quality assessment: from error visibility to structural similarity. IEEE

4294 transactions on image processing.

4295

4296 Details:

4297 - 11x11 Gaussian filter of width 1.5 is used.

4298 - k1 = 0.01, k2 = 0.03 as in the original paper.

4299

4300 Args:

4301 img1: First image batch.

4302 img2: Second image batch.

4303 max_val: The dynamic range of the images (i.e., the difference between the

4304 maximum the and minimum allowed values).

4305 filter_size: Default value 11 (size of gaussian filter).

4306 filter_sigma: Default value 1.5 (width of gaussian filter).

4307 k1: Default value 0.01

4308 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so

4309 it would be better if we took the values in the range of 0 < K2 < 0.4).

4310 return_index_map: If True returns local SSIM map instead of the global mean.

4311

4312 Returns:

4313 A pair of tensors containing and channel-wise SSIM and contrast-structure

4314 values. The shape is [..., channels].

4315 """

4316 filter_size = constant_op.constant(filter_size, dtype=dtypes.int32)

4317 filter_sigma = constant_op.constant(filter_sigma, dtype=img1.dtype)

4318

4319 shape1, shape2 = array_ops.shape_n([img1, img2])

4320 checks = [

4321 control_flow_assert.Assert(

4322 math_ops.reduce_all(

4323 math_ops.greater_equal(shape1[-3:-1], filter_size)),

4324 [shape1, filter_size],

4325 summarize=8),

4326 control_flow_assert.Assert(

4327 math_ops.reduce_all(

4328 math_ops.greater_equal(shape2[-3:-1], filter_size)),

4329 [shape2, filter_size],

4330 summarize=8)

4331 ]

4332

4333 # Enforce the check to run before computation.

4334 with ops.control_dependencies(checks):

4335 img1 = array_ops.identity(img1)

4336

4337 # TODO(sjhwang): Try to cache kernels and compensation factor.

4338 kernel = _fspecial_gauss(filter_size, filter_sigma)

4339 kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1])

4340

4341 # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`,

4342 # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead.

4343 compensation = 1.0

4344

4345 # TODO(sjhwang): Try FFT.

4346 # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying

4347 # 1-by-n and n-by-1 Gaussian filters instead of an n-by-n filter.

4348 def reducer(x):

4349 shape = array_ops.shape(x)

4350 x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0))

4351 y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')

4352 return array_ops.reshape(

4353 y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0))

4354

4355 luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation, k1,

4356 k2)

4357

4358 # Average over the second and the third from the last: height, width.

4359 if return_index_map:

4360 ssim_val = luminance * cs

4361 else:

4362 axes = constant_op.constant([-3, -2], dtype=dtypes.int32)

4363 ssim_val = math_ops.reduce_mean(luminance * cs, axes)

4364 cs = math_ops.reduce_mean(cs, axes)

4365 return ssim_val, cs

4366

4367

4368@tf_export('image.ssim')

4369@dispatch.add_dispatch_support

4370def ssim(img1,

4371 img2,

4372 max_val,

4373 filter_size=11,

4374 filter_sigma=1.5,

4375 k1=0.01,

4376 k2=0.03,

4377 return_index_map=False):

4378 """Computes SSIM index between img1 and img2.

4379

4380 This function is based on the standard SSIM implementation from:

4381 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image

4382 quality assessment: from error visibility to structural similarity. IEEE

4383 transactions on image processing.

4384

4385 Note: The true SSIM is only defined on grayscale. This function does not

4386 perform any colorspace transform. (If the input is already YUV, then it will

4387 compute YUV SSIM average.)

4388

4389 Details:

4390 - 11x11 Gaussian filter of width 1.5 is used.

4391 - k1 = 0.01, k2 = 0.03 as in the original paper.

4392

4393 The image sizes must be at least 11x11 because of the filter size.

4394

4395 Example:

4396

4397 ```python

4398 # Read images (of size 255 x 255) from file.

4399 im1 = tf.image.decode_image(tf.io.read_file('path/to/im1.png'))

4400 im2 = tf.image.decode_image(tf.io.read_file('path/to/im2.png'))

4401 tf.shape(im1) # `img1.png` has 3 channels; shape is `(255, 255, 3)`

4402 tf.shape(im2) # `img2.png` has 3 channels; shape is `(255, 255, 3)`

4403 # Add an outer batch for each image.

4404 im1 = tf.expand_dims(im1, axis=0)

4405 im2 = tf.expand_dims(im2, axis=0)

4406 # Compute SSIM over tf.uint8 Tensors.

4407 ssim1 = tf.image.ssim(im1, im2, max_val=255, filter_size=11,

4408 filter_sigma=1.5, k1=0.01, k2=0.03)

4409

4410 # Compute SSIM over tf.float32 Tensors.

4411 im1 = tf.image.convert_image_dtype(im1, tf.float32)

4412 im2 = tf.image.convert_image_dtype(im2, tf.float32)

4413 ssim2 = tf.image.ssim(im1, im2, max_val=1.0, filter_size=11,

4414 filter_sigma=1.5, k1=0.01, k2=0.03)

4415 # ssim1 and ssim2 both have type tf.float32 and are almost equal.

4416 ```

4417

4418 Args:

4419 img1: First image batch. 4-D Tensor of shape `[batch, height, width,

4420 channels]` with only Positive Pixel Values.

4421 img2: Second image batch. 4-D Tensor of shape `[batch, height, width,

4422 channels]` with only Positive Pixel Values.

4423 max_val: The dynamic range of the images (i.e., the difference between the

4424 maximum the and minimum allowed values).

4425 filter_size: Default value 11 (size of gaussian filter).

4426 filter_sigma: Default value 1.5 (width of gaussian filter).

4427 k1: Default value 0.01

4428 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so

4429 it would be better if we took the values in the range of 0 < K2 < 0.4).

4430 return_index_map: If True returns local SSIM map instead of the global mean.

4431

4432 Returns:

4433 A tensor containing an SSIM value for each image in batch or a tensor

4434 containing an SSIM value for each pixel for each image in batch if

4435 return_index_map is True. Returned SSIM values are in range (-1, 1], when

4436 pixel values are non-negative. Returns a tensor with shape:

4437 broadcast(img1.shape[:-3], img2.shape[:-3]) or broadcast(img1.shape[:-1],

4438 img2.shape[:-1]).

4439 """

4440 with ops.name_scope(None, 'SSIM', [img1, img2]):

4441 # Convert to tensor if needed.

4442 img1 = ops.convert_to_tensor(img1, name='img1')

4443 img2 = ops.convert_to_tensor(img2, name='img2')

4444 # Shape checking.

4445 _, _, checks = _verify_compatible_image_shapes(img1, img2)

4446 with ops.control_dependencies(checks):

4447 img1 = array_ops.identity(img1)

4448

4449 # Need to convert the images to float32. Scale max_val accordingly so that

4450 # SSIM is computed correctly.

4451 max_val = math_ops.cast(max_val, img1.dtype)

4452 max_val = convert_image_dtype(max_val, dtypes.float32)

4453 img1 = convert_image_dtype(img1, dtypes.float32)

4454 img2 = convert_image_dtype(img2, dtypes.float32)

4455 ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val, filter_size,

4456 filter_sigma, k1, k2,

4457 return_index_map)

4458 # Compute average over color channels.

4459 return math_ops.reduce_mean(ssim_per_channel, [-1])

4460

4461

4462# Default values obtained by Wang et al.

4463_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)

4464

4465

4466@tf_export('image.ssim_multiscale')

4467@dispatch.add_dispatch_support

4468def ssim_multiscale(img1,

4469 img2,

4470 max_val,

4471 power_factors=_MSSSIM_WEIGHTS,

4472 filter_size=11,

4473 filter_sigma=1.5,

4474 k1=0.01,

4475 k2=0.03):

4476 """Computes the MS-SSIM between img1 and img2.

4477

4478 This function assumes that `img1` and `img2` are image batches, i.e. the last

4479 three dimensions are [height, width, channels].

4480

4481 Note: The true SSIM is only defined on grayscale. This function does not

4482 perform any colorspace transform. (If the input is already YUV, then it will

4483 compute YUV SSIM average.)

4484

4485 Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale

4486 structural similarity for image quality assessment." Signals, Systems and

4487 Computers, 2004.

4488

4489 Args:

4490 img1: First image batch with only Positive Pixel Values.

4491 img2: Second image batch with only Positive Pixel Values. Must have the

4492 same rank as img1.

4493 max_val: The dynamic range of the images (i.e., the difference between the

4494 maximum the and minimum allowed values).

4495 power_factors: Iterable of weights for each of the scales. The number of

4496 scales used is the length of the list. Index 0 is the unscaled

4497 resolution's weight and each increasing scale corresponds to the image

4498 being downsampled by 2. Defaults to (0.0448, 0.2856, 0.3001, 0.2363,

4499 0.1333), which are the values obtained in the original paper.

4500 filter_size: Default value 11 (size of gaussian filter).

4501 filter_sigma: Default value 1.5 (width of gaussian filter).

4502 k1: Default value 0.01

4503 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so

4504 it would be better if we took the values in the range of 0 < K2 < 0.4).

4505

4506 Returns:

4507 A tensor containing an MS-SSIM value for each image in batch. The values

4508 are in range [0, 1]. Returns a tensor with shape:

4509 broadcast(img1.shape[:-3], img2.shape[:-3]).

4510 """

4511 with ops.name_scope(None, 'MS-SSIM', [img1, img2]):

4512 # Convert to tensor if needed.

4513 img1 = ops.convert_to_tensor(img1, name='img1')

4514 img2 = ops.convert_to_tensor(img2, name='img2')

4515 # Shape checking.

4516 shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2)

4517 with ops.control_dependencies(checks):

4518 img1 = array_ops.identity(img1)

4519

4520 # Need to convert the images to float32. Scale max_val accordingly so that

4521 # SSIM is computed correctly.

4522 max_val = math_ops.cast(max_val, img1.dtype)

4523 max_val = convert_image_dtype(max_val, dtypes.float32)

4524 img1 = convert_image_dtype(img1, dtypes.float32)

4525 img2 = convert_image_dtype(img2, dtypes.float32)

4526

4527 imgs = [img1, img2]

4528 shapes = [shape1, shape2]

4529

4530 # img1 and img2 are assumed to be a (multi-dimensional) batch of

4531 # 3-dimensional images (height, width, channels). `heads` contain the batch

4532 # dimensions, and `tails` contain the image dimensions.

4533 heads = [s[:-3] for s in shapes]

4534 tails = [s[-3:] for s in shapes]

4535

4536 divisor = [1, 2, 2, 1]

4537 divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32)

4538

4539 def do_pad(images, remainder):

4540 padding = array_ops.expand_dims(remainder, -1)

4541 padding = array_ops.pad(padding, [[1, 0], [1, 0]])

4542 return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images]

4543

4544 mcs = []

4545 for k in range(len(power_factors)):

4546 with ops.name_scope(None, 'Scale%d' % k, imgs):

4547 if k > 0:

4548 # Avg pool takes rank 4 tensors. Flatten leading dimensions.

4549 flat_imgs = [

4550 array_ops.reshape(x, array_ops.concat([[-1], t], 0))

4551 for x, t in zip(imgs, tails)

4552 ]

4553

4554 remainder = tails[0] % divisor_tensor

4555 need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0))

4556 # pylint: disable=cell-var-from-loop

4557 padded = tf_cond.cond(need_padding,

4558 lambda: do_pad(flat_imgs, remainder),

4559 lambda: flat_imgs)

4560 # pylint: enable=cell-var-from-loop

4561

4562 downscaled = [

4563 nn_ops.avg_pool(

4564 x, ksize=divisor, strides=divisor, padding='VALID')

4565 for x in padded

4566 ]

4567 tails = [x[1:] for x in array_ops.shape_n(downscaled)]

4568 imgs = [

4569 array_ops.reshape(x, array_ops.concat([h, t], 0))

4570 for x, h, t in zip(downscaled, heads, tails)

4571 ]

4572

4573 # Overwrite previous ssim value since we only need the last one.

4574 ssim_per_channel, cs = _ssim_per_channel(

4575 *imgs,

4576 max_val=max_val,

4577 filter_size=filter_size,

4578 filter_sigma=filter_sigma,

4579 k1=k1,

4580 k2=k2)

4581 mcs.append(nn_ops.relu(cs))

4582

4583 # Remove the cs score for the last scale. In the MS-SSIM calculation,

4584 # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p).

4585 mcs.pop() # Remove the cs score for the last scale.

4586 mcs_and_ssim = array_ops_stack.stack(

4587 mcs + [nn_ops.relu(ssim_per_channel)], axis=-1)

4588 # Take weighted geometric mean across the scale axis.

4589 ms_ssim = math_ops.reduce_prod(

4590 math_ops.pow(mcs_and_ssim, power_factors), [-1])

4591

4592 return math_ops.reduce_mean(ms_ssim, [-1]) # Avg over color channels.

4593

4594

4595@tf_export('image.image_gradients')

4596@dispatch.add_dispatch_support

4597def image_gradients(image):

4598 """Returns image gradients (dy, dx) for each color channel.

4599

4600 Both output tensors have the same shape as the input: [batch_size, h, w,

4601 d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in

4602 location (x, y). That means that dy will always have zeros in the last row,

4603 and dx will always have zeros in the last column.

4604

4605 Usage Example:

4606 ```python

4607 BATCH_SIZE = 1

4608 IMAGE_HEIGHT = 5

4609 IMAGE_WIDTH = 5

4610 CHANNELS = 1

4611 image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS,

4612 delta=1, dtype=tf.float32),

4613 shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))

4614 dy, dx = tf.image.image_gradients(image)

4615 print(image[0, :,:,0])

4616 tf.Tensor(

4617 [[ 0. 1. 2. 3. 4.]

4618 [ 5. 6. 7. 8. 9.]

4619 [10. 11. 12. 13. 14.]

4620 [15. 16. 17. 18. 19.]

4621 [20. 21. 22. 23. 24.]], shape=(5, 5), dtype=float32)

4622 print(dy[0, :,:,0])

4623 tf.Tensor(

4624 [[5. 5. 5. 5. 5.]

4625 [5. 5. 5. 5. 5.]

4626 [5. 5. 5. 5. 5.]

4627 [5. 5. 5. 5. 5.]

4628 [0. 0. 0. 0. 0.]], shape=(5, 5), dtype=float32)

4629 print(dx[0, :,:,0])

4630 tf.Tensor(

4631 [[1. 1. 1. 1. 0.]

4632 [1. 1. 1. 1. 0.]

4633 [1. 1. 1. 1. 0.]

4634 [1. 1. 1. 1. 0.]

4635 [1. 1. 1. 1. 0.]], shape=(5, 5), dtype=float32)

4636 ```

4637

4638 Args:

4639 image: Tensor with shape [batch_size, h, w, d].

4640

4641 Returns:

4642 Pair of tensors (dy, dx) holding the vertical and horizontal image

4643 gradients (1-step finite difference).

4644

4645 Raises:

4646 ValueError: If `image` is not a 4D tensor.

4647 """

4648 if image.get_shape().ndims != 4:

4649 raise ValueError('image_gradients expects a 4D tensor '

4650 '[batch_size, h, w, d], not {}.'.format(image.get_shape()))

4651 image_shape = array_ops.shape(image)

4652 batch_size, height, width, depth = array_ops_stack.unstack(image_shape)

4653 dy = image[:, 1:, :, :] - image[:, :-1, :, :]

4654 dx = image[:, :, 1:, :] - image[:, :, :-1, :]

4655

4656 # Return tensors with same size as original image by concatenating

4657 # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y).

4658 shape = array_ops_stack.stack([batch_size, 1, width, depth])

4659 dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1)

4660 dy = array_ops.reshape(dy, image_shape)

4661

4662 shape = array_ops_stack.stack([batch_size, height, 1, depth])

4663 dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2)

4664 dx = array_ops.reshape(dx, image_shape)

4665

4666 return dy, dx

4667

4668

4669@tf_export('image.sobel_edges')

4670@dispatch.add_dispatch_support

4671def sobel_edges(image):

4672 """Returns a tensor holding Sobel edge maps.

4673

4674 Example usage:

4675

4676 For general usage, `image` would be loaded from a file as below:

4677

4678 ```python

4679 image_bytes = tf.io.read_file(path_to_image_file)

4680 image = tf.image.decode_image(image_bytes)

4681 image = tf.cast(image, tf.float32)

4682 image = tf.expand_dims(image, 0)

4683 ```

4684 But for demo purposes, we are using randomly generated values for `image`:

4685

4686 >>> image = tf.random.uniform(

4687 ... maxval=255, shape=[1, 28, 28, 3], dtype=tf.float32)

4688 >>> sobel = tf.image.sobel_edges(image)

4689 >>> sobel_y = np.asarray(sobel[0, :, :, :, 0]) # sobel in y-direction

4690 >>> sobel_x = np.asarray(sobel[0, :, :, :, 1]) # sobel in x-direction

4691

4692 For displaying the sobel results, PIL's [Image Module](

4693 https://pillow.readthedocs.io/en/stable/reference/Image.html) can be used:

4694

4695 ```python

4696 # Display edge maps for the first channel (at index 0)

4697 Image.fromarray(sobel_y[..., 0] / 4 + 0.5).show()

4698 Image.fromarray(sobel_x[..., 0] / 4 + 0.5).show()

4699 ```

4700

4701 Args:

4702 image: Image tensor with shape [batch_size, h, w, d] and type float32 or

4703 float64. The image(s) must be 2x2 or larger.

4704

4705 Returns:

4706 Tensor holding edge maps for each channel. Returns a tensor with shape

4707 [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]],

4708 [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter.

4709 """

4710 # Define vertical and horizontal Sobel filters.

4711 static_image_shape = image.get_shape()

4712 image_shape = array_ops.shape(image)

4713 kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]],

4714 [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]]

4715 num_kernels = len(kernels)

4716 kernels = np.transpose(np.asarray(kernels), (1, 2, 0))

4717 kernels = np.expand_dims(kernels, -2)

4718 kernels_tf = constant_op.constant(kernels, dtype=image.dtype)

4719

4720 kernels_tf = array_ops.tile(

4721 kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters')

4722

4723 # Use depth-wise convolution to calculate edge maps per channel.

4724 pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]]

4725 padded = array_ops.pad(image, pad_sizes, mode='REFLECT')

4726

4727 # Output tensor has shape [batch_size, h, w, d * num_kernels].

4728 strides = [1, 1, 1, 1]

4729 output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID')

4730

4731 # Reshape to [batch_size, h, w, d, num_kernels].

4732 shape = array_ops.concat([image_shape, [num_kernels]], 0)

4733 output = array_ops.reshape(output, shape=shape)

4734 output.set_shape(static_image_shape.concatenate([num_kernels]))

4735 return output

4736

4737

4738def resize_bicubic(images,

4739 size,

4740 align_corners=False,

4741 name=None,

4742 half_pixel_centers=False):

4743 return gen_image_ops.resize_bicubic(

4744 images=images,

4745 size=size,

4746 align_corners=align_corners,

4747 half_pixel_centers=half_pixel_centers,

4748 name=name)

4749

4750

4751def resize_bilinear(images,

4752 size,

4753 align_corners=False,

4754 name=None,

4755 half_pixel_centers=False):

4756 return gen_image_ops.resize_bilinear(

4757 images=images,

4758 size=size,

4759 align_corners=align_corners,

4760 half_pixel_centers=half_pixel_centers,

4761 name=name)

4762

4763

4764def resize_nearest_neighbor(images,

4765 size,

4766 align_corners=False,

4767 name=None,

4768 half_pixel_centers=False):

4769 return gen_image_ops.resize_nearest_neighbor(

4770 images=images,

4771 size=size,

4772 align_corners=align_corners,

4773 half_pixel_centers=half_pixel_centers,

4774 name=name)

4775

4776

4777resize_area_deprecation = deprecation.deprecated(

4778 date=None,

4779 instructions=(

4780 'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.'))

4781tf_export(v1=['image.resize_area'])(

4782 resize_area_deprecation(

4783 dispatch.add_dispatch_support(gen_image_ops.resize_area)))

4784

4785resize_bicubic_deprecation = deprecation.deprecated(

4786 date=None,

4787 instructions=(

4788 'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.'))

4789tf_export(v1=['image.resize_bicubic'])(

4790 dispatch.add_dispatch_support(resize_bicubic_deprecation(resize_bicubic)))

4791

4792resize_bilinear_deprecation = deprecation.deprecated(

4793 date=None,

4794 instructions=(

4795 'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.'))

4796tf_export(v1=['image.resize_bilinear'])(

4797 dispatch.add_dispatch_support(resize_bilinear_deprecation(resize_bilinear)))

4798

4799resize_nearest_neighbor_deprecation = deprecation.deprecated(

4800 date=None,

4801 instructions=(

4802 'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` '

4803 'instead.'))

4804tf_export(v1=['image.resize_nearest_neighbor'])(

4805 dispatch.add_dispatch_support(

4806 resize_nearest_neighbor_deprecation(resize_nearest_neighbor)))

4807

4808

4809@tf_export('image.crop_and_resize', v1=[])

4810@dispatch.add_dispatch_support

4811def crop_and_resize_v2(image,

4812 boxes,

4813 box_indices,

4814 crop_size,

4815 method='bilinear',

4816 extrapolation_value=.0,

4817 name=None):

4818 """Extracts crops from the input image tensor and resizes them.

4819

4820 Extracts crops from the input image tensor and resizes them using bilinear

4821 sampling or nearest neighbor sampling (possibly with aspect ratio change) to a

4822 common output size specified by `crop_size`. This is more general than the

4823 `crop_to_bounding_box` op which extracts a fixed size slice from the input

4824 image and does not allow resizing or aspect ratio change. The crops occur

4825 first and then the resize.

4826

4827 Returns a tensor with `crops` from the input `image` at positions defined at

4828 the bounding box locations in `boxes`. The cropped boxes are all resized (with

4829 bilinear or nearest neighbor interpolation) to a fixed

4830 `size = [crop_height, crop_width]`. The result is a 4-D tensor

4831 `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.

4832 In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical

4833 results to using `tf.compat.v1.image.resize_bilinear()` or

4834 `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method`

4835 argument) with

4836 `align_corners=True`.

4837

4838 Args:

4839 image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.

4840 Both `image_height` and `image_width` need to be positive.

4841 boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor

4842 specifies the coordinates of a box in the `box_ind[i]` image and is

4843 specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized

4844 coordinate value of `y` is mapped to the image coordinate at `y *

4845 (image_height - 1)`, so as the `[0, 1]` interval of normalized image

4846 height is mapped to `[0, image_height - 1]` in image height coordinates.

4847 We do allow `y1` > `y2`, in which case the sampled crop is an up-down

4848 flipped version of the original image. The width dimension is treated

4849 similarly. Normalized coordinates outside the `[0, 1]` range are allowed,

4850 in which case we use `extrapolation_value` to extrapolate the input image

4851 values.

4852 box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0,

4853 batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box

4854 refers to.

4855 crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`.

4856 All cropped image patches are resized to this size. The aspect ratio of

4857 the image content is not preserved. Both `crop_height` and `crop_width`

4858 need to be positive.

4859 method: An optional string specifying the sampling method for resizing. It

4860 can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`.

4861 Currently two sampling methods are supported: Bilinear and Nearest

4862 Neighbor.

4863 extrapolation_value: An optional `float`. Defaults to `0.0`. Value used for

4864 extrapolation, when applicable.

4865 name: A name for the operation (optional).

4866

4867 Returns:

4868 A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.

4869

4870 Usage example:

4871

4872 >>> BATCH_SIZE = 1

4873 >>> NUM_BOXES = 5

4874 >>> IMAGE_HEIGHT = 256

4875 >>> IMAGE_WIDTH = 256

4876 >>> CHANNELS = 3

4877 >>> CROP_SIZE = (24, 24)

4878

4879 >>> image = tf.random.normal(shape=(

4880 ... BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS) )

4881 >>> boxes = tf.random.uniform(shape=(NUM_BOXES, 4))

4882 >>> box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0,

4883 ... maxval=BATCH_SIZE, dtype=tf.int32)

4884 >>> output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE)

4885 >>> output.shape

4886 TensorShape([5, 24, 24, 3])

4887

4888 Example with linear interpolation:

4889

4890 >>> image = np.arange(0, 18, 2).astype('float32').reshape(3, 3)

4891 >>> result = tf.image.crop_and_resize(

4892 ... image[None, :, :, None],

4893 ... np.asarray([[0.5,0.5,1,1]]), [0], [3, 3], method='bilinear')

4894 >>> result[0][:, :, 0]

4895 <tf.Tensor: shape=(3, 3), dtype=float32, numpy=

4896 array([[ 8., 9., 10.],

4897 [11., 12., 13.],

4898 [14., 15., 16.]], dtype=float32)>

4899

4900 Example with nearest interpolation:

4901

4902 >>> image = np.arange(0, 18, 2).astype('float32').reshape(3, 3)

4903 >>> result = tf.image.crop_and_resize(

4904 ... image[None, :, :, None],

4905 ... np.asarray([[0.5,0.5,1,1]]), [0], [3, 3], method='nearest')

4906 >>> result[0][:, :, 0]

4907 <tf.Tensor: shape=(3, 3), dtype=float32, numpy=

4908 array([[ 8., 10., 10.],

4909 [14., 16., 16.],

4910 [14., 16., 16.]], dtype=float32)>

4911

4912

4913 """

4914 return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size,

4915 method, extrapolation_value, name)

4916

4917

4918@tf_export(v1=['image.crop_and_resize'])

4919@dispatch.add_dispatch_support

4920@deprecation.deprecated_args(None,

4921 'box_ind is deprecated, use box_indices instead',

4922 'box_ind')

4923def crop_and_resize_v1( # pylint: disable=missing-docstring

4924 image,

4925 boxes,

4926 box_ind=None,

4927 crop_size=None,

4928 method='bilinear',

4929 extrapolation_value=0,

4930 name=None,

4931 box_indices=None):

4932 box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices,

4933 'box_ind', box_ind)

4934 return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method,

4935 extrapolation_value, name)

4936

4937

4938crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__

4939

4940

4941@tf_export(v1=['image.extract_glimpse'])

4942@dispatch.add_dispatch_support

4943def extract_glimpse(

4944 input, # pylint: disable=redefined-builtin

4945 size,

4946 offsets,

4947 centered=True,

4948 normalized=True,

4949 uniform_noise=True,

4950 name=None):

4951 """Extracts a glimpse from the input tensor.

4952

4953 Returns a set of windows called glimpses extracted at location

4954 `offsets` from the input tensor. If the windows only partially

4955 overlaps the inputs, the non-overlapping areas will be filled with

4956 random noise.

4957

4958 The result is a 4-D tensor of shape `[batch_size, glimpse_height,

4959 glimpse_width, channels]`. The channels and batch dimensions are the

4960 same as that of the input tensor. The height and width of the output

4961 windows are specified in the `size` parameter.

4962

4963 The argument `normalized` and `centered` controls how the windows are built:

4964

4965 * If the coordinates are normalized but not centered, 0.0 and 1.0

4966 correspond to the minimum and maximum of each height and width

4967 dimension.

4968 * If the coordinates are both normalized and centered, they range from

4969 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper

4970 left corner, the lower right corner is located at (1.0, 1.0) and the

4971 center is at (0, 0).

4972 * If the coordinates are not normalized they are interpreted as

4973 numbers of pixels.

4974

4975 Usage Example:

4976

4977 >>> x = [[[[0.0],

4978 ... [1.0],

4979 ... [2.0]],

4980 ... [[3.0],

4981 ... [4.0],

4982 ... [5.0]],

4983 ... [[6.0],

4984 ... [7.0],

4985 ... [8.0]]]]

4986 >>> tf.compat.v1.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]],

4987 ... centered=False, normalized=False)

4988 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=

4989 array([[[[0.],

4990 [1.]],

4991 [[3.],

4992 [4.]]]], dtype=float32)>

4993

4994 Args:

4995 input: A `Tensor` of type `float32`. A 4-D float tensor of shape

4996 `[batch_size, height, width, channels]`.

4997 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the

4998 size of the glimpses to extract. The glimpse height must be specified

4999 first, following by the glimpse width.

5000 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape

5001 `[batch_size, 2]` containing the y, x locations of the center of each

5002 window.

5003 centered: An optional `bool`. Defaults to `True`. indicates if the offset

5004 coordinates are centered relative to the image, in which case the (0, 0)

5005 offset is relative to the center of the input images. If false, the (0,0)

5006 offset corresponds to the upper left corner of the input images.

5007 normalized: An optional `bool`. Defaults to `True`. indicates if the offset

5008 coordinates are normalized.

5009 uniform_noise: An optional `bool`. Defaults to `True`. indicates if the

5010 noise should be generated using a uniform distribution or a Gaussian

5011 distribution.

5012 name: A name for the operation (optional).

5013

5014 Returns:

5015 A `Tensor` of type `float32`.

5016 """

5017 return gen_image_ops.extract_glimpse(

5018 input=input,

5019 size=size,

5020 offsets=offsets,

5021 centered=centered,

5022 normalized=normalized,

5023 uniform_noise=uniform_noise,

5024 name=name)

5025

5026

5027@tf_export('image.extract_glimpse', v1=[])

5028@dispatch.add_dispatch_support

5029def extract_glimpse_v2(

5030 input, # pylint: disable=redefined-builtin

5031 size,

5032 offsets,

5033 centered=True,

5034 normalized=True,

5035 noise='uniform',

5036 name=None):

5037 """Extracts a glimpse from the input tensor.

5038

5039 Returns a set of windows called glimpses extracted at location

5040 `offsets` from the input tensor. If the windows only partially

5041 overlaps the inputs, the non-overlapping areas will be filled with

5042 random noise.

5043

5044 The result is a 4-D tensor of shape `[batch_size, glimpse_height,

5045 glimpse_width, channels]`. The channels and batch dimensions are the

5046 same as that of the input tensor. The height and width of the output

5047 windows are specified in the `size` parameter.

5048

5049 The argument `normalized` and `centered` controls how the windows are built:

5050

5051 * If the coordinates are normalized but not centered, 0.0 and 1.0

5052 correspond to the minimum and maximum of each height and width

5053 dimension.

5054 * If the coordinates are both normalized and centered, they range from

5055 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper

5056 left corner, the lower right corner is located at (1.0, 1.0) and the

5057 center is at (0, 0).

5058 * If the coordinates are not normalized they are interpreted as

5059 numbers of pixels.

5060

5061 Usage Example:

5062

5063 >>> x = [[[[0.0],

5064 ... [1.0],

5065 ... [2.0]],

5066 ... [[3.0],

5067 ... [4.0],

5068 ... [5.0]],

5069 ... [[6.0],

5070 ... [7.0],

5071 ... [8.0]]]]

5072 >>> tf.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]],

5073 ... centered=False, normalized=False)

5074 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=

5075 array([[[[4.],

5076 [5.]],

5077 [[7.],

5078 [8.]]]], dtype=float32)>

5079

5080 Args:

5081 input: A `Tensor` of type `float32`. A 4-D float tensor of shape

5082 `[batch_size, height, width, channels]`.

5083 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the

5084 size of the glimpses to extract. The glimpse height must be specified

5085 first, following by the glimpse width.

5086 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape

5087 `[batch_size, 2]` containing the y, x locations of the center of each

5088 window.

5089 centered: An optional `bool`. Defaults to `True`. indicates if the offset

5090 coordinates are centered relative to the image, in which case the (0, 0)

5091 offset is relative to the center of the input images. If false, the (0,0)

5092 offset corresponds to the upper left corner of the input images.

5093 normalized: An optional `bool`. Defaults to `True`. indicates if the offset

5094 coordinates are normalized.

5095 noise: An optional `string`. Defaults to `uniform`. indicates if the noise

5096 should be `uniform` (uniform distribution), `gaussian` (gaussian

5097 distribution), or `zero` (zero padding).

5098 name: A name for the operation (optional).

5099

5100 Returns:

5101 A `Tensor` of type `float32`.

5102 """

5103 return gen_image_ops.extract_glimpse_v2(

5104 input=input,

5105 size=size,

5106 offsets=offsets,

5107 centered=centered,

5108 normalized=normalized,

5109 noise=noise,

5110 uniform_noise=False,

5111 name=name)

5112

5113

5114@tf_export('image.combined_non_max_suppression')

5115@dispatch.add_dispatch_support

5116def combined_non_max_suppression(boxes,

5117 scores,

5118 max_output_size_per_class,

5119 max_total_size,

5120 iou_threshold=0.5,

5121 score_threshold=float('-inf'),

5122 pad_per_class=False,

5123 clip_boxes=True,

5124 name=None):

5125 """Greedily selects a subset of bounding boxes in descending order of score.

5126

5127 This operation performs non_max_suppression on the inputs per batch, across

5128 all classes.

5129 Prunes away boxes that have high intersection-over-union (IOU) overlap

5130 with previously selected boxes. Bounding boxes are supplied as

5131 [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any

5132 diagonal pair of box corners and the coordinates can be provided as normalized

5133 (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm

5134 is agnostic to where the origin is in the coordinate system. Also note that

5135 this algorithm is invariant to orthogonal transformations and translations

5136 of the coordinate system; thus translating or reflections of the coordinate

5137 system result in the same boxes being selected by the algorithm.

5138 The output of this operation is the final boxes, scores and classes tensor

5139 returned after performing non_max_suppression.

5140

5141 Args:

5142 boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q`

5143 is 1 then same boxes are used for all classes otherwise, if `q` is equal

5144 to number of classes, class-specific boxes are used.

5145 scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]`

5146 representing a single score corresponding to each box (each row of boxes).

5147 max_output_size_per_class: A scalar integer `Tensor` representing the

5148 maximum number of boxes to be selected by non-max suppression per class

5149 max_total_size: A int32 scalar representing maximum number of boxes retained

5150 over all classes. Note that setting this value to a large number may

5151 result in OOM error depending on the system workload.

5152 iou_threshold: A float representing the threshold for deciding whether boxes

5153 overlap too much with respect to IOU.

5154 score_threshold: A float representing the threshold for deciding when to

5155 remove boxes based on score.

5156 pad_per_class: If false, the output nmsed boxes, scores and classes are

5157 padded/clipped to `max_total_size`. If true, the output nmsed boxes,

5158 scores and classes are padded to be of length

5159 `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in

5160 which case it is clipped to `max_total_size`. Defaults to false.

5161 clip_boxes: If true, the coordinates of output nmsed boxes will be clipped

5162 to [0, 1]. If false, output the box coordinates as it is. Defaults to

5163 true.

5164 name: A name for the operation (optional).

5165

5166 Returns:

5167 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor

5168 containing the non-max suppressed boxes.

5169 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing

5170 the scores for the boxes.

5171 'nmsed_classes': A [batch_size, max_detections] float32 tensor

5172 containing the class for boxes.

5173 'valid_detections': A [batch_size] int32 tensor indicating the number of

5174 valid detections per batch item. Only the top valid_detections[i] entries

5175 in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the

5176 entries are zero paddings.

5177 """

5178 with ops.name_scope(name, 'combined_non_max_suppression'):

5179 iou_threshold = ops.convert_to_tensor(

5180 iou_threshold, dtype=dtypes.float32, name='iou_threshold')

5181 score_threshold = ops.convert_to_tensor(

5182 score_threshold, dtype=dtypes.float32, name='score_threshold')

5183

5184 # Convert `max_total_size` to tensor *without* setting the `dtype` param.

5185 # This allows us to catch `int32` overflow case with `max_total_size`

5186 # whose expected dtype is `int32` by the op registration. Any number within

5187 # `int32` will get converted to `int32` tensor. Anything larger will get

5188 # converted to `int64`. Passing in `int64` for `max_total_size` to the op

5189 # will throw dtype mismatch exception.

5190 # TODO(b/173251596): Once there is a more general solution to warn against

5191 # int overflow conversions, revisit this check.

5192 max_total_size = ops.convert_to_tensor(max_total_size)

5193

5194 return gen_image_ops.combined_non_max_suppression(

5195 boxes, scores, max_output_size_per_class, max_total_size, iou_threshold,

5196 score_threshold, pad_per_class, clip_boxes)

5197

5198

5199def _bbox_overlap(boxes_a, boxes_b):

5200 """Calculates the overlap (iou - intersection over union) between boxes_a and boxes_b.

5201

5202 Args:

5203 boxes_a: a tensor with a shape of [batch_size, N, 4]. N is the number of

5204 boxes per image. The last dimension is the pixel coordinates in

5205 [ymin, xmin, ymax, xmax] form.

5206 boxes_b: a tensor with a shape of [batch_size, M, 4]. M is the number of

5207 boxes. The last dimension is the pixel coordinates in

5208 [ymin, xmin, ymax, xmax] form.

5209 Returns:

5210 intersection_over_union: a tensor with as a shape of [batch_size, N, M],

5211 representing the ratio of intersection area over union area (IoU) between

5212 two boxes

5213 """

5214 with ops.name_scope('bbox_overlap'):

5215 a_y_min, a_x_min, a_y_max, a_x_max = array_ops.split(

5216 value=boxes_a, num_or_size_splits=4, axis=2)

5217 b_y_min, b_x_min, b_y_max, b_x_max = array_ops.split(

5218 value=boxes_b, num_or_size_splits=4, axis=2)

5219

5220 # Calculates the intersection area.

5221 i_xmin = math_ops.maximum(

5222 a_x_min, array_ops.transpose(b_x_min, [0, 2, 1]))

5223 i_xmax = math_ops.minimum(

5224 a_x_max, array_ops.transpose(b_x_max, [0, 2, 1]))

5225 i_ymin = math_ops.maximum(

5226 a_y_min, array_ops.transpose(b_y_min, [0, 2, 1]))

5227 i_ymax = math_ops.minimum(

5228 a_y_max, array_ops.transpose(b_y_max, [0, 2, 1]))

5229 i_area = math_ops.maximum(

5230 (i_xmax - i_xmin), 0) * math_ops.maximum((i_ymax - i_ymin), 0)

5231

5232 # Calculates the union area.

5233 a_area = (a_y_max - a_y_min) * (a_x_max - a_x_min)

5234 b_area = (b_y_max - b_y_min) * (b_x_max - b_x_min)

5235 EPSILON = 1e-8

5236 # Adds a small epsilon to avoid divide-by-zero.

5237 u_area = a_area + array_ops.transpose(b_area, [0, 2, 1]) - i_area + EPSILON

5238

5239 # Calculates IoU.

5240 intersection_over_union = i_area / u_area

5241

5242 return intersection_over_union

5243

5244

5245def _self_suppression(iou, _, iou_sum, iou_threshold):

5246 """Suppress boxes in the same tile.

5247

5248 Compute boxes that cannot be suppressed by others (i.e.,

5249 can_suppress_others), and then use them to suppress boxes in the same tile.

5250

5251 Args:

5252 iou: a tensor of shape [batch_size, num_boxes_with_padding] representing

5253 intersection over union.

5254 iou_sum: a scalar tensor.

5255 iou_threshold: a scalar tensor.

5256

5257 Returns:

5258 iou_suppressed: a tensor of shape [batch_size, num_boxes_with_padding].

5259 iou_diff: a scalar tensor representing whether any box is supressed in

5260 this step.

5261 iou_sum_new: a scalar tensor of shape [batch_size] that represents

5262 the iou sum after suppression.

5263 iou_threshold: a scalar tensor.

5264 """

5265 batch_size = array_ops.shape(iou)[0]

5266 can_suppress_others = math_ops.cast(

5267 array_ops.reshape(

5268 math_ops.reduce_max(iou, 1) < iou_threshold, [batch_size, -1, 1]),

5269 iou.dtype)

5270 iou_after_suppression = array_ops.reshape(

5271 math_ops.cast(

5272 math_ops.reduce_max(can_suppress_others * iou, 1) < iou_threshold,

5273 iou.dtype),

5274 [batch_size, -1, 1]) * iou

5275 iou_sum_new = math_ops.reduce_sum(iou_after_suppression, [1, 2])

5276 return [

5277 iou_after_suppression,

5278 math_ops.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new,

5279 iou_threshold

5280 ]

5281

5282

5283def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, tile_size):

5284 """Suppress boxes between different tiles.

5285

5286 Args:

5287 boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4]

5288 box_slice: a tensor of shape [batch_size, tile_size, 4]

5289 iou_threshold: a scalar tensor

5290 inner_idx: a scalar tensor representing the tile index of the tile

5291 that is used to supress box_slice

5292 tile_size: an integer representing the number of boxes in a tile

5293

5294 Returns:

5295 boxes: unchanged boxes as input

5296 box_slice_after_suppression: box_slice after suppression

5297 iou_threshold: unchanged

5298 """

5299 batch_size = array_ops.shape(boxes)[0]

5300 new_slice = array_ops.slice(

5301 boxes, [0, inner_idx * tile_size, 0],

5302 [batch_size, tile_size, 4])

5303 iou = _bbox_overlap(new_slice, box_slice)

5304 box_slice_after_suppression = array_ops.expand_dims(

5305 math_ops.cast(math_ops.reduce_all(iou < iou_threshold, [1]),

5306 box_slice.dtype),

5307 2) * box_slice

5308 return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1

5309

5310

5311def _suppression_loop_body(boxes, iou_threshold, output_size, idx, tile_size):

5312 """Process boxes in the range [idx*tile_size, (idx+1)*tile_size).

5313

5314 Args:

5315 boxes: a tensor with a shape of [batch_size, anchors, 4].

5316 iou_threshold: a float representing the threshold for deciding whether boxes

5317 overlap too much with respect to IOU.

5318 output_size: an int32 tensor of size [batch_size]. Representing the number

5319 of selected boxes for each batch.

5320 idx: an integer scalar representing induction variable.

5321 tile_size: an integer representing the number of boxes in a tile

5322

5323 Returns:

5324 boxes: updated boxes.

5325 iou_threshold: pass down iou_threshold to the next iteration.

5326 output_size: the updated output_size.

5327 idx: the updated induction variable.

5328 """

5329 with ops.name_scope('suppression_loop_body'):

5330 num_tiles = array_ops.shape(boxes)[1] // tile_size

5331 batch_size = array_ops.shape(boxes)[0]

5332

5333 def cross_suppression_func(boxes, box_slice, iou_threshold, inner_idx):

5334 return _cross_suppression(boxes, box_slice, iou_threshold, inner_idx,

5335 tile_size)

5336

5337 # Iterates over tiles that can possibly suppress the current tile.

5338 box_slice = array_ops.slice(boxes, [0, idx * tile_size, 0],

5339 [batch_size, tile_size, 4])

5340 _, box_slice, _, _ = while_loop.while_loop(

5341 lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,

5342 cross_suppression_func,

5343 [boxes, box_slice, iou_threshold,

5344 constant_op.constant(0)])

5345

5346 # Iterates over the current tile to compute self-suppression.

5347 iou = _bbox_overlap(box_slice, box_slice)

5348 mask = array_ops.expand_dims(

5349 array_ops.reshape(

5350 math_ops.range(tile_size), [1, -1]) > array_ops.reshape(

5351 math_ops.range(tile_size), [-1, 1]), 0)

5352 iou *= math_ops.cast(

5353 math_ops.logical_and(mask, iou >= iou_threshold), iou.dtype)

5354 suppressed_iou, _, _, _ = while_loop.while_loop(

5355 lambda _iou, loop_condition, _iou_sum, _: loop_condition,

5356 _self_suppression, [

5357 iou,

5358 constant_op.constant(True),

5359 math_ops.reduce_sum(iou, [1, 2]), iou_threshold

5360 ])

5361 suppressed_box = math_ops.reduce_sum(suppressed_iou, 1) > 0

5362 box_slice *= array_ops.expand_dims(

5363 1.0 - math_ops.cast(suppressed_box, box_slice.dtype), 2)

5364

5365 # Uses box_slice to update the input boxes.

5366 mask = array_ops.reshape(

5367 math_ops.cast(

5368 math_ops.equal(math_ops.range(num_tiles), idx), boxes.dtype),

5369 [1, -1, 1, 1])

5370 boxes = array_ops.tile(array_ops.expand_dims(

5371 box_slice, [1]), [1, num_tiles, 1, 1]) * mask + array_ops.reshape(

5372 boxes, [batch_size, num_tiles, tile_size, 4]) * (1 - mask)

5373 boxes = array_ops.reshape(boxes, [batch_size, -1, 4])

5374

5375 # Updates output_size.

5376 output_size += math_ops.reduce_sum(

5377 math_ops.cast(

5378 math_ops.reduce_any(box_slice > 0, [2]), dtypes.int32), [1])

5379 return boxes, iou_threshold, output_size, idx + 1

5380

5381

5382@tf_export('image.non_max_suppression_padded')

5383@dispatch.add_dispatch_support

5384def non_max_suppression_padded(boxes,

5385 scores,

5386 max_output_size,

5387 iou_threshold=0.5,

5388 score_threshold=float('-inf'),

5389 pad_to_max_output_size=False,

5390 name=None,

5391 sorted_input=False,

5392 canonicalized_coordinates=False,

5393 tile_size=512):

5394 """Greedily selects a subset of bounding boxes in descending order of score.

5395

5396 Performs algorithmically equivalent operation to tf.image.non_max_suppression,

5397 with the addition of an optional parameter which zero-pads the output to

5398 be of size `max_output_size`.

5399 The output of this operation is a tuple containing the set of integers

5400 indexing into the input collection of bounding boxes representing the selected

5401 boxes and the number of valid indices in the index set. The bounding box

5402 coordinates corresponding to the selected indices can then be obtained using

5403 the `tf.slice` and `tf.gather` operations. For example:

5404 ```python

5405 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(

5406 boxes, scores, max_output_size, iou_threshold,

5407 score_threshold, pad_to_max_output_size=True)

5408 selected_indices = tf.slice(

5409 selected_indices_padded, tf.constant([0]), num_valid)

5410 selected_boxes = tf.gather(boxes, selected_indices)

5411 ```

5412

5413 Args:

5414 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4].

5415 Dimensions except the last two are batch dimensions.

5416 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes].

5417 max_output_size: a scalar integer `Tensor` representing the maximum number

5418 of boxes to be selected by non max suppression. Note that setting this

5419 value to a large number may result in OOM error depending on the system

5420 workload.

5421 iou_threshold: a float representing the threshold for deciding whether boxes

5422 overlap too much with respect to IoU (intersection over union).

5423 score_threshold: a float representing the threshold for box scores. Boxes

5424 with a score that is not larger than this threshold will be suppressed.

5425 pad_to_max_output_size: whether to pad the output idx to max_output_size.

5426 Must be set to True when the input is a batch of images.

5427 name: name of operation.

5428 sorted_input: a boolean indicating whether the input boxes and scores

5429 are sorted in descending order by the score.

5430 canonicalized_coordinates: if box coordinates are given as

5431 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant

5432 computation to canonicalize box coordinates.

5433 tile_size: an integer representing the number of boxes in a tile, i.e.,

5434 the maximum number of boxes per image that can be used to suppress other

5435 boxes in parallel; larger tile_size means larger parallelism and

5436 potentially more redundant work.

5437 Returns:

5438 idx: a tensor with a shape of [..., num_boxes] representing the

5439 indices selected by non-max suppression. The leading dimensions

5440 are the batch dimensions of the input boxes. All numbers are within

5441 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i]

5442 indices (i.e., idx[i][:num_valid[i]]) are valid.

5443 num_valid: a tensor of rank 0 or higher with a shape of [...]

5444 representing the number of valid indices in idx. Its dimensions are the

5445 batch dimensions of the input boxes.

5446 Raises:

5447 ValueError: When set pad_to_max_output_size to False for batched input.

5448 """

5449 with ops.name_scope(name, 'non_max_suppression_padded'):

5450 if not pad_to_max_output_size:

5451 # pad_to_max_output_size may be set to False only when the shape of

5452 # boxes is [num_boxes, 4], i.e., a single image. We make best effort to

5453 # detect violations at compile time. If `boxes` does not have a static

5454 # rank, the check allows computation to proceed.

5455 if boxes.get_shape().rank is not None and boxes.get_shape().rank > 2:

5456 raise ValueError("'pad_to_max_output_size' (value {}) must be True for "

5457 'batched input'.format(pad_to_max_output_size))

5458 if name is None:

5459 name = ''

5460 idx, num_valid = non_max_suppression_padded_v2(

5461 boxes, scores, max_output_size, iou_threshold, score_threshold,

5462 sorted_input, canonicalized_coordinates, tile_size)

5463 # def_function.function seems to lose shape information, so set it here.

5464 if not pad_to_max_output_size:

5465 idx = idx[0, :num_valid]

5466 else:

5467 batch_dims = array_ops.concat([

5468 array_ops.shape(boxes)[:-2],

5469 array_ops.expand_dims(max_output_size, 0)

5470 ], 0)

5471 idx = array_ops.reshape(idx, batch_dims)

5472 return idx, num_valid

5473

5474

5475# TODO(b/158709815): Improve performance regression due to

5476# def_function.function.

5477@def_function.function(

5478 experimental_implements='non_max_suppression_padded_v2')

5479def non_max_suppression_padded_v2(boxes,

5480 scores,

5481 max_output_size,

5482 iou_threshold=0.5,

5483 score_threshold=float('-inf'),

5484 sorted_input=False,

5485 canonicalized_coordinates=False,

5486 tile_size=512):

5487 """Non-maximum suppression.

5488

5489 Prunes away boxes that have high intersection-over-union (IOU) overlap

5490 with previously selected boxes. Bounding boxes are supplied as

5491 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any

5492 diagonal pair of box corners and the coordinates can be provided as normalized

5493 (i.e., lying in the interval `[0, 1]`) or absolute. The bounding box

5494 coordinates are cannonicalized to `[y_min, x_min, y_max, x_max]`,

5495 where `(y_min, x_min)` and `(y_max, x_mas)` are the coordinates of the lower

5496 left and upper right corner. User may indiciate the input box coordinates are

5497 already canonicalized to eliminate redundant work by setting

5498 canonicalized_coordinates to `True`. Note that this algorithm is agnostic to

5499 where the origin is in the coordinate system. Note that this algorithm is

5500 invariant to orthogonal transformations and translations of the coordinate

5501 system; thus translating or reflections of the coordinate system result in the

5502 same boxes being selected by the algorithm.

5503

5504 Similar to tf.image.non_max_suppression, non_max_suppression_padded

5505 implements hard NMS but can operate on a batch of images and improves

5506 performance by titling the bounding boxes. Non_max_suppression_padded should

5507 be preferred over tf.image_non_max_suppression when running on devices with

5508 abundant parallelsim for higher computation speed. For soft NMS, refer to

5509 tf.image.non_max_suppression_with_scores.

5510

5511 While a serial NMS algorithm iteratively uses the highest-scored unprocessed

5512 box to suppress boxes, this algorithm uses many boxes to suppress other boxes

5513 in parallel. The key idea is to partition boxes into tiles based on their

5514 score and suppresses boxes tile by tile, thus achieving parallelism within a

5515 tile. The tile size determines the degree of parallelism.

5516

5517 In cross suppression (using boxes of tile A to suppress boxes of tile B),

5518 all boxes in A can independently suppress boxes in B.

5519

5520 Self suppression (suppressing boxes of the same tile) needs to be iteratively

5521 applied until there's no more suppression. In each iteration, boxes that

5522 cannot be suppressed are used to suppress boxes in the same tile.

5523

5524 boxes = boxes.pad_to_multiply_of(tile_size)

5525 num_tiles = len(boxes) // tile_size

5526 output_boxes = []

5527 for i in range(num_tiles):

5528 box_tile = boxes[i*tile_size : (i+1)*tile_size]

5529 for j in range(i - 1):

5530 # in parallel suppress boxes in box_tile using boxes from suppressing_tile

5531 suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]

5532 iou = _bbox_overlap(box_tile, suppressing_tile)

5533 # if the box is suppressed in iou, clear it to a dot

5534 box_tile *= _update_boxes(iou)

5535 # Iteratively handle the diagnal tile.

5536 iou = _box_overlap(box_tile, box_tile)

5537 iou_changed = True

5538 while iou_changed:

5539 # boxes that are not suppressed by anything else

5540 suppressing_boxes = _get_suppressing_boxes(iou)

5541 # boxes that are suppressed by suppressing_boxes

5542 suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)

5543 # clear iou to 0 for boxes that are suppressed, as they cannot be used

5544 # to suppress other boxes any more

5545 new_iou = _clear_iou(iou, suppressed_boxes)

5546 iou_changed = (new_iou != iou)

5547 iou = new_iou

5548 # remaining boxes that can still suppress others, are selected boxes.

5549 output_boxes.append(_get_suppressing_boxes(iou))

5550 if len(output_boxes) >= max_output_size:

5551 break

5552

5553 Args:

5554 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4].

5555 Dimensions except the last two are batch dimensions. The last dimension

5556 represents box coordinates, given as [y_1, x_1, y_2, x_2]. The coordinates

5557 on each dimension can be given in any order

5558 (see also `canonicalized_coordinates`) but must describe a box with

5559 a positive area.

5560 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes].

5561 max_output_size: a scalar integer `Tensor` representing the maximum number

5562 of boxes to be selected by non max suppression.

5563 iou_threshold: a float representing the threshold for deciding whether boxes

5564 overlap too much with respect to IoU (intersection over union).

5565 score_threshold: a float representing the threshold for box scores. Boxes

5566 with a score that is not larger than this threshold will be suppressed.

5567 sorted_input: a boolean indicating whether the input boxes and scores

5568 are sorted in descending order by the score.

5569 canonicalized_coordinates: if box coordinates are given as

5570 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant

5571 computation to canonicalize box coordinates.

5572 tile_size: an integer representing the number of boxes in a tile, i.e.,

5573 the maximum number of boxes per image that can be used to suppress other

5574 boxes in parallel; larger tile_size means larger parallelism and

5575 potentially more redundant work.

5576 Returns:

5577 idx: a tensor with a shape of [..., num_boxes] representing the

5578 indices selected by non-max suppression. The leading dimensions

5579 are the batch dimensions of the input boxes. All numbers are within

5580 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i]

5581 indices (i.e., idx[i][:num_valid[i]]) are valid.

5582 num_valid: a tensor of rank 0 or higher with a shape of [...]

5583 representing the number of valid indices in idx. Its dimensions are the

5584 batch dimensions of the input boxes.

5585 Raises:

5586 ValueError: When set pad_to_max_output_size to False for batched input.

5587 """

5588 def _sort_scores_and_boxes(scores, boxes):

5589 """Sort boxes based their score from highest to lowest.

5590

5591 Args:

5592 scores: a tensor with a shape of [batch_size, num_boxes] representing

5593 the scores of boxes.

5594 boxes: a tensor with a shape of [batch_size, num_boxes, 4] representing

5595 the boxes.

5596 Returns:

5597 sorted_scores: a tensor with a shape of [batch_size, num_boxes]

5598 representing the sorted scores.

5599 sorted_boxes: a tensor representing the sorted boxes.

5600 sorted_scores_indices: a tensor with a shape of [batch_size, num_boxes]

5601 representing the index of the scores in a sorted descending order.

5602 """

5603 with ops.name_scope('sort_scores_and_boxes'):

5604 sorted_scores_indices = sort_ops.argsort(

5605 scores, axis=1, direction='DESCENDING')

5606 sorted_scores = array_ops.gather(

5607 scores, sorted_scores_indices, axis=1, batch_dims=1

5608 )

5609 sorted_boxes = array_ops.gather(

5610 boxes, sorted_scores_indices, axis=1, batch_dims=1

5611 )

5612 return sorted_scores, sorted_boxes, sorted_scores_indices

5613

5614 batch_dims = array_ops.shape(boxes)[:-2]

5615 num_boxes = array_ops.shape(boxes)[-2]

5616 boxes = array_ops.reshape(boxes, [-1, num_boxes, 4])

5617 scores = array_ops.reshape(scores, [-1, num_boxes])

5618 batch_size = array_ops.shape(boxes)[0]

5619 if score_threshold != float('-inf'):

5620 with ops.name_scope('filter_by_score'):

5621 score_mask = math_ops.cast(scores > score_threshold, scores.dtype)

5622 scores *= score_mask

5623 box_mask = array_ops.expand_dims(

5624 math_ops.cast(score_mask, boxes.dtype), 2)

5625 boxes *= box_mask

5626

5627 if not canonicalized_coordinates:

5628 with ops.name_scope('canonicalize_coordinates'):

5629 y_1, x_1, y_2, x_2 = array_ops.split(

5630 value=boxes, num_or_size_splits=4, axis=2)

5631 y_1_is_min = math_ops.reduce_all(

5632 math_ops.less_equal(y_1[0, 0, 0], y_2[0, 0, 0]))

5633 y_min, y_max = tf_cond.cond(

5634 y_1_is_min, lambda: (y_1, y_2), lambda: (y_2, y_1))

5635 x_1_is_min = math_ops.reduce_all(

5636 math_ops.less_equal(x_1[0, 0, 0], x_2[0, 0, 0]))

5637 x_min, x_max = tf_cond.cond(

5638 x_1_is_min, lambda: (x_1, x_2), lambda: (x_2, x_1))

5639 boxes = array_ops.concat([y_min, x_min, y_max, x_max], axis=2)

5640 # TODO(@bhack): https://github.com/tensorflow/tensorflow/issues/56089

5641 # this will be required after deprecation

5642 #else:

5643 # y_1, x_1, y_2, x_2 = array_ops.split(

5644 # value=boxes, num_or_size_splits=4, axis=2)

5645

5646 if not sorted_input:

5647 scores, boxes, sorted_indices = _sort_scores_and_boxes(scores, boxes)

5648 else:

5649 # Default value required for Autograph.

5650 sorted_indices = array_ops.zeros_like(scores, dtype=dtypes.int32)

5651

5652 pad = math_ops.cast(

5653 math_ops.ceil(

5654 math_ops.cast(

5655 math_ops.maximum(num_boxes, max_output_size), dtypes.float32) /

5656 math_ops.cast(tile_size, dtypes.float32)),

5657 dtypes.int32) * tile_size - num_boxes

5658 boxes = array_ops.pad(

5659 math_ops.cast(boxes, dtypes.float32), [[0, 0], [0, pad], [0, 0]])

5660 scores = array_ops.pad(

5661 math_ops.cast(scores, dtypes.float32), [[0, 0], [0, pad]])

5662 num_boxes_after_padding = num_boxes + pad

5663 num_iterations = num_boxes_after_padding // tile_size

5664 def _loop_cond(unused_boxes, unused_threshold, output_size, idx):

5665 return math_ops.logical_and(

5666 math_ops.reduce_min(output_size) < max_output_size,

5667 idx < num_iterations)

5668

5669 def suppression_loop_body(boxes, iou_threshold, output_size, idx):

5670 return _suppression_loop_body(

5671 boxes, iou_threshold, output_size, idx, tile_size)

5672

5673 selected_boxes, _, output_size, _ = while_loop.while_loop(

5674 _loop_cond,

5675 suppression_loop_body,

5676 [

5677 boxes, iou_threshold,

5678 array_ops.zeros([batch_size], dtypes.int32),

5679 constant_op.constant(0)

5680 ],

5681 shape_invariants=[

5682 tensor_shape.TensorShape([None, None, 4]),

5683 tensor_shape.TensorShape([]),

5684 tensor_shape.TensorShape([None]),

5685 tensor_shape.TensorShape([]),

5686 ],

5687 )

5688 num_valid = math_ops.minimum(output_size, max_output_size)

5689 idx = num_boxes_after_padding - math_ops.cast(

5690 nn_ops.top_k(

5691 math_ops.cast(math_ops.reduce_any(

5692 selected_boxes > 0, [2]), dtypes.int32) *

5693 array_ops.expand_dims(

5694 math_ops.range(num_boxes_after_padding, 0, -1), 0),

5695 max_output_size)[0], dtypes.int32)

5696 idx = math_ops.minimum(idx, num_boxes - 1)

5697

5698 if not sorted_input:

5699 index_offsets = math_ops.range(batch_size) * num_boxes

5700 gather_idx = array_ops.reshape(

5701 idx + array_ops.expand_dims(index_offsets, 1), [-1])

5702 idx = array_ops.reshape(

5703 array_ops.gather(array_ops.reshape(sorted_indices, [-1]),

5704 gather_idx),

5705 [batch_size, -1])

5706 invalid_index = array_ops.zeros([batch_size, max_output_size],

5707 dtype=dtypes.int32)

5708 idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0)

5709 num_valid_expanded = array_ops.expand_dims(num_valid, 1)

5710 idx = array_ops.where(idx_index < num_valid_expanded,

5711 idx, invalid_index)

5712

5713 num_valid = array_ops.reshape(num_valid, batch_dims)

5714 return idx, num_valid

5715

5716

5717def non_max_suppression_padded_v1(boxes,

5718 scores,

5719 max_output_size,

5720 iou_threshold=0.5,

5721 score_threshold=float('-inf'),

5722 pad_to_max_output_size=False,

5723 name=None):

5724 """Greedily selects a subset of bounding boxes in descending order of score.

5725

5726 Performs algorithmically equivalent operation to tf.image.non_max_suppression,

5727 with the addition of an optional parameter which zero-pads the output to

5728 be of size `max_output_size`.

5729 The output of this operation is a tuple containing the set of integers

5730 indexing into the input collection of bounding boxes representing the selected

5731 boxes and the number of valid indices in the index set. The bounding box

5732 coordinates corresponding to the selected indices can then be obtained using

5733 the `tf.slice` and `tf.gather` operations. For example:

5734 ```python

5735 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(

5736 boxes, scores, max_output_size, iou_threshold,

5737 score_threshold, pad_to_max_output_size=True)

5738 selected_indices = tf.slice(

5739 selected_indices_padded, tf.constant([0]), num_valid)

5740 selected_boxes = tf.gather(boxes, selected_indices)

5741 ```

5742

5743 Args:

5744 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.

5745 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single

5746 score corresponding to each box (each row of boxes).

5747 max_output_size: A scalar integer `Tensor` representing the maximum number

5748 of boxes to be selected by non-max suppression.

5749 iou_threshold: A float representing the threshold for deciding whether boxes

5750 overlap too much with respect to IOU.

5751 score_threshold: A float representing the threshold for deciding when to

5752 remove boxes based on score.

5753 pad_to_max_output_size: bool. If True, size of `selected_indices` output is

5754 padded to `max_output_size`.

5755 name: A name for the operation (optional).

5756

5757 Returns:

5758 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the

5759 selected indices from the boxes tensor, where `M <= max_output_size`.

5760 valid_outputs: A scalar integer `Tensor` denoting how many elements in

5761 `selected_indices` are valid. Valid elements occur first, then padding.

5762 """

5763 with ops.name_scope(name, 'non_max_suppression_padded'):

5764 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')

5765 score_threshold = ops.convert_to_tensor(

5766 score_threshold, name='score_threshold')

5767 return gen_image_ops.non_max_suppression_v4(boxes, scores, max_output_size,

5768 iou_threshold, score_threshold,

5769 pad_to_max_output_size)

5770

5771

5772@tf_export('image.draw_bounding_boxes', v1=[])

5773@dispatch.add_dispatch_support

5774def draw_bounding_boxes_v2(images, boxes, colors, name=None):

5775 """Draw bounding boxes on a batch of images.

5776

5777 Outputs a copy of `images` but draws on top of the pixels zero or more

5778 bounding boxes specified by the locations in `boxes`. The coordinates of the

5779 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`.

5780 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width

5781 and the height of the underlying image.

5782

5783 For example, if an image is 100 x 200 pixels (height x width) and the bounding

5784 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of

5785 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).

5786

5787 Parts of the bounding box may fall outside the image.

5788

5789 Args:

5790 images: A `Tensor`. Must be one of the following types: `float32`, `half`.

5791 4-D with shape `[batch, height, width, depth]`. A batch of images.

5792 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch,

5793 num_bounding_boxes, 4]` containing bounding boxes.

5794 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle

5795 through for the boxes.

5796 name: A name for the operation (optional).

5797

5798 Returns:

5799 A `Tensor`. Has the same type as `images`.

5800

5801 Usage Example:

5802

5803 >>> # create an empty image

5804 >>> img = tf.zeros([1, 3, 3, 3])

5805 >>> # draw a box around the image

5806 >>> box = np.array([0, 0, 1, 1])

5807 >>> boxes = box.reshape([1, 1, 4])

5808 >>> # alternate between red and blue

5809 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])

5810 >>> tf.image.draw_bounding_boxes(img, boxes, colors)

5811 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=

5812 array([[[[1., 0., 0.],

5813 [1., 0., 0.],

5814 [1., 0., 0.]],

5815 [[1., 0., 0.],

5816 [0., 0., 0.],

5817 [1., 0., 0.]],

5818 [[1., 0., 0.],

5819 [1., 0., 0.],

5820 [1., 0., 0.]]]], dtype=float32)>

5821 """

5822 if colors is None:

5823 return gen_image_ops.draw_bounding_boxes(images, boxes, name)

5824 return gen_image_ops.draw_bounding_boxes_v2(images, boxes, colors, name)

5825

5826

5827@tf_export(v1=['image.draw_bounding_boxes'])

5828@dispatch.add_dispatch_support

5829def draw_bounding_boxes(images, boxes, name=None, colors=None):

5830 """Draw bounding boxes on a batch of images.

5831

5832 Outputs a copy of `images` but draws on top of the pixels zero or more

5833 bounding boxes specified by the locations in `boxes`. The coordinates of the

5834 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`.

5835 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width

5836 and the height of the underlying image.

5837

5838 For example, if an image is 100 x 200 pixels (height x width) and the bounding

5839 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of

5840 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).

5841

5842 Parts of the bounding box may fall outside the image.

5843

5844 Args:

5845 images: A `Tensor`. Must be one of the following types: `float32`, `half`.

5846 4-D with shape `[batch, height, width, depth]`. A batch of images.

5847 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch,

5848 num_bounding_boxes, 4]` containing bounding boxes.

5849 name: A name for the operation (optional).

5850 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle

5851 through for the boxes.

5852

5853 Returns:

5854 A `Tensor`. Has the same type as `images`.

5855

5856 Usage Example:

5857

5858 >>> # create an empty image

5859 >>> img = tf.zeros([1, 3, 3, 3])

5860 >>> # draw a box around the image

5861 >>> box = np.array([0, 0, 1, 1])

5862 >>> boxes = box.reshape([1, 1, 4])

5863 >>> # alternate between red and blue

5864 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])

5865 >>> tf.image.draw_bounding_boxes(img, boxes, colors)

5866 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=

5867 array([[[[1., 0., 0.],

5868 [1., 0., 0.],

5869 [1., 0., 0.]],

5870 [[1., 0., 0.],

5871 [0., 0., 0.],

5872 [1., 0., 0.]],

5873 [[1., 0., 0.],

5874 [1., 0., 0.],

5875 [1., 0., 0.]]]], dtype=float32)>

5876 """

5877 return draw_bounding_boxes_v2(images, boxes, colors, name)

5878

5879

5880@tf_export('image.generate_bounding_box_proposals')

5881@dispatch.add_dispatch_support

5882def generate_bounding_box_proposals(scores,

5883 bbox_deltas,

5884 image_info,

5885 anchors,

5886 nms_threshold=0.7,

5887 pre_nms_topn=6000,

5888 min_size=16,

5889 post_nms_topn=300,

5890 name=None):

5891 """Generate bounding box proposals from encoded bounding boxes.

5892

5893 Args:

5894 scores: A 4-D float `Tensor` of shape

5895 `[num_images, height, width, num_achors]` containing scores of

5896 the boxes for given anchors, can be unsorted.

5897 bbox_deltas: A 4-D float `Tensor` of shape

5898 `[num_images, height, width, 4 x num_anchors]` encoding boxes

5899 with respect to each anchor. Coordinates are given

5900 in the form `[dy, dx, dh, dw]`.

5901 image_info: A 2-D float `Tensor` of shape `[num_images, 5]`

5902 containing image information Height, Width, Scale.

5903 anchors: A 2-D float `Tensor` of shape `[num_anchors, 4]`

5904 describing the anchor boxes.

5905 Boxes are formatted in the form `[y1, x1, y2, x2]`.

5906 nms_threshold: A scalar float `Tensor` for non-maximal-suppression

5907 threshold. Defaults to 0.7.

5908 pre_nms_topn: A scalar int `Tensor` for the number of

5909 top scoring boxes to be used as input. Defaults to 6000.

5910 min_size: A scalar float `Tensor`. Any box that has a smaller size

5911 than min_size will be discarded. Defaults to 16.

5912 post_nms_topn: An integer. Maximum number of rois in the output.

5913 name: A name for this operation (optional).

5914

5915 Returns:

5916 rois: Region of interest boxes sorted by their scores.

5917 roi_probabilities: scores of the ROI boxes in the ROIs' `Tensor`.

5918 """

5919 return gen_image_ops.generate_bounding_box_proposals(

5920 scores=scores,

5921 bbox_deltas=bbox_deltas,

5922 image_info=image_info,

5923 anchors=anchors,

5924 nms_threshold=nms_threshold,

5925 pre_nms_topn=pre_nms_topn,

5926 min_size=min_size,

5927 post_nms_topn=post_nms_topn,

5928 name=name)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/image_ops_impl.py: 24%

1268 statements