Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/image_ops_impl.py: 24%

1268 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Implementation of image ops.""" 

16 

17import functools 

18import numpy as np 

19 

20from tensorflow.python.eager import context 

21from tensorflow.python.eager import def_function 

22from tensorflow.python.framework import config 

23from tensorflow.python.framework import constant_op 

24from tensorflow.python.framework import dtypes 

25from tensorflow.python.framework import ops 

26from tensorflow.python.framework import random_seed 

27from tensorflow.python.framework import tensor_shape 

28from tensorflow.python.framework import tensor_util 

29from tensorflow.python.ops import array_ops 

30from tensorflow.python.ops import array_ops_stack 

31from tensorflow.python.ops import check_ops 

32from tensorflow.python.ops import cond as tf_cond 

33from tensorflow.python.ops import control_flow_assert 

34from tensorflow.python.ops import control_flow_case 

35from tensorflow.python.ops import control_flow_ops 

36from tensorflow.python.ops import gen_image_ops 

37from tensorflow.python.ops import math_ops 

38from tensorflow.python.ops import nn 

39from tensorflow.python.ops import nn_ops 

40from tensorflow.python.ops import random_ops 

41from tensorflow.python.ops import sort_ops 

42from tensorflow.python.ops import stateless_random_ops 

43from tensorflow.python.ops import string_ops 

44from tensorflow.python.ops import variables 

45from tensorflow.python.ops import while_loop 

46from tensorflow.python.util import deprecation 

47from tensorflow.python.util import dispatch 

48from tensorflow.python.util.tf_export import tf_export 

49 

50ops.NotDifferentiable('RandomCrop') 

51# TODO(b/31222613): This op may be differentiable, and there may be 

52# latent bugs here. 

53ops.NotDifferentiable('HSVToRGB') 

54ops.NotDifferentiable('DrawBoundingBoxes') 

55ops.NotDifferentiable('SampleDistortedBoundingBox') 

56ops.NotDifferentiable('SampleDistortedBoundingBoxV2') 

57# TODO(bsteiner): Implement the gradient function for extract_glimpse 

58# TODO(b/31222613): This op may be differentiable, and there may be 

59# latent bugs here. 

60ops.NotDifferentiable('ExtractGlimpse') 

61ops.NotDifferentiable('NonMaxSuppression') 

62ops.NotDifferentiable('NonMaxSuppressionV2') 

63ops.NotDifferentiable('NonMaxSuppressionWithOverlaps') 

64ops.NotDifferentiable('GenerateBoundingBoxProposals') 

65 

66 

67# pylint: disable=invalid-name 

68def _assert(cond, ex_type, msg): 

69 """A polymorphic assert, works with tensors and boolean expressions. 

70 

71 If `cond` is not a tensor, behave like an ordinary assert statement, except 

72 that a empty list is returned. If `cond` is a tensor, return a list 

73 containing a single TensorFlow assert op. 

74 

75 Args: 

76 cond: Something evaluates to a boolean value. May be a tensor. 

77 ex_type: The exception class to use. 

78 msg: The error message. 

79 

80 Returns: 

81 A list, containing at most one assert op. 

82 """ 

83 if _is_tensor(cond): 

84 return [control_flow_assert.Assert(cond, [msg])] 

85 else: 

86 if not cond: 

87 raise ex_type(msg) 

88 else: 

89 return [] 

90 

91 

92def _is_tensor(x): 

93 """Returns `True` if `x` is a symbolic tensor-like object. 

94 

95 Args: 

96 x: A python object to check. 

97 

98 Returns: 

99 `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`. 

100 """ 

101 return isinstance(x, (ops.Tensor, variables.Variable)) 

102 

103 

104def _ImageDimensions(image, rank): 

105 """Returns the dimensions of an image tensor. 

106 

107 Args: 

108 image: A rank-D Tensor. For 3-D of shape: `[height, width, channels]`. 

109 rank: The expected rank of the image 

110 

111 Returns: 

112 A list of corresponding to the dimensions of the 

113 input image. Dimensions that are statically known are python integers, 

114 otherwise, they are integer scalar tensors. 

115 """ 

116 if image.get_shape().is_fully_defined(): 

117 return image.get_shape().as_list() 

118 else: 

119 static_shape = image.get_shape().with_rank(rank).as_list() 

120 dynamic_shape = array_ops_stack.unstack(array_ops.shape(image), rank) 

121 return [ 

122 s if s is not None else d for s, d in zip(static_shape, dynamic_shape) 

123 ] 

124 

125 

126def _Check3DImage(image, require_static=True): 

127 """Assert that we are working with a properly shaped image. 

128 

129 Args: 

130 image: 3-D Tensor of shape [height, width, channels] 

131 require_static: If `True`, requires that all dimensions of `image` are known 

132 and non-zero. 

133 

134 Raises: 

135 ValueError: if `image.shape` is not a 3-vector. 

136 

137 Returns: 

138 An empty list, if `image` has fully defined dimensions. Otherwise, a list 

139 containing an assert op is returned. 

140 """ 

141 try: 

142 image_shape = image.get_shape().with_rank(3) 

143 except ValueError: 

144 raise ValueError("'image' (shape %s) must be three-dimensional." % 

145 image.shape) 

146 if require_static and not image_shape.is_fully_defined(): 

147 raise ValueError("'image' (shape %s) must be fully defined." % image_shape) 

148 if any(x == 0 for x in image_shape): 

149 raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape) 

150 if not image_shape.is_fully_defined(): 

151 return [ 

152 check_ops.assert_positive( 

153 array_ops.shape(image), 

154 ["all dims of 'image.shape' " 

155 'must be > 0.']) 

156 ] 

157 else: 

158 return [] 

159 

160 

161def _Assert3DImage(image): 

162 """Assert that we are working with a properly shaped image. 

163 

164 Performs the check statically if possible (i.e. if the shape 

165 is statically known). Otherwise adds a control dependency 

166 to an assert op that checks the dynamic shape. 

167 

168 Args: 

169 image: 3-D Tensor of shape [height, width, channels] 

170 

171 Raises: 

172 ValueError: if `image.shape` is not a 3-vector. 

173 

174 Returns: 

175 If the shape of `image` could be verified statically, `image` is 

176 returned unchanged, otherwise there will be a control dependency 

177 added that asserts the correct dynamic shape. 

178 """ 

179 return control_flow_ops.with_dependencies( 

180 _Check3DImage(image, require_static=False), image) 

181 

182 

183def _AssertAtLeast3DImage(image): 

184 """Assert that we are working with a properly shaped image. 

185 

186 Performs the check statically if possible (i.e. if the shape 

187 is statically known). Otherwise adds a control dependency 

188 to an assert op that checks the dynamic shape. 

189 

190 Args: 

191 image: >= 3-D Tensor of size [*, height, width, depth] 

192 

193 Raises: 

194 ValueError: if image.shape is not a [>= 3] vector. 

195 

196 Returns: 

197 If the shape of `image` could be verified statically, `image` is 

198 returned unchanged, otherwise there will be a control dependency 

199 added that asserts the correct dynamic shape. 

200 """ 

201 return control_flow_ops.with_dependencies( 

202 _CheckAtLeast3DImage(image, require_static=False), image) 

203 

204 

205def _CheckAtLeast3DImage(image, require_static=True): 

206 """Assert that we are working with a properly shaped image. 

207 

208 Args: 

209 image: >= 3-D Tensor of size [*, height, width, depth] 

210 require_static: If `True`, requires that all dimensions of `image` are known 

211 and non-zero. 

212 

213 Raises: 

214 ValueError: if image.shape is not a [>= 3] vector. 

215 

216 Returns: 

217 An empty list, if `image` has fully defined dimensions. Otherwise, a list 

218 containing an assert op is returned. 

219 """ 

220 try: 

221 if image.get_shape().ndims is None: 

222 image_shape = image.get_shape().with_rank(3) 

223 else: 

224 image_shape = image.get_shape().with_rank_at_least(3) 

225 except ValueError: 

226 raise ValueError("'image' (shape %s) must be at least three-dimensional." % 

227 image.shape) 

228 if require_static and not image_shape.is_fully_defined(): 

229 raise ValueError('\'image\' must be fully defined.') 

230 if any(x == 0 for x in image_shape[-3:]): 

231 raise ValueError('inner 3 dims of \'image.shape\' must be > 0: %s' % 

232 image_shape) 

233 if not image_shape[-3:].is_fully_defined(): 

234 return [ 

235 check_ops.assert_positive( 

236 array_ops.shape(image)[-3:], 

237 ["inner 3 dims of 'image.shape' " 

238 'must be > 0.']), 

239 check_ops.assert_greater_equal( 

240 array_ops.rank(image), 

241 3, 

242 message="'image' must be at least three-dimensional.") 

243 ] 

244 else: 

245 return [] 

246 

247 

248def _AssertGrayscaleImage(image): 

249 """Assert that we are working with a properly shaped grayscale image. 

250 

251 Performs the check statically if possible (i.e. if the shape 

252 is statically known). Otherwise adds a control dependency 

253 to an assert op that checks the dynamic shape. 

254 

255 Args: 

256 image: >= 2-D Tensor of size [*, 1] 

257 

258 Raises: 

259 ValueError: if image.shape is not a [>= 2] vector or if 

260 last dimension is not size 1. 

261 

262 Returns: 

263 If the shape of `image` could be verified statically, `image` is 

264 returned unchanged, otherwise there will be a control dependency 

265 added that asserts the correct dynamic shape. 

266 """ 

267 return control_flow_ops.with_dependencies( 

268 _CheckGrayscaleImage(image, require_static=False), image) 

269 

270 

271def _CheckGrayscaleImage(image, require_static=True): 

272 """Assert that we are working with properly shaped grayscale image. 

273 

274 Args: 

275 image: >= 2-D Tensor of size [*, 1] 

276 require_static: Boolean, whether static shape is required. 

277 

278 Raises: 

279 ValueError: if image.shape is not a [>= 2] vector or if 

280 last dimension is not size 1. 

281 

282 Returns: 

283 An empty list, if `image` has fully defined dimensions. Otherwise, a list 

284 containing an assert op is returned. 

285 """ 

286 try: 

287 if image.get_shape().ndims is None: 

288 image_shape = image.get_shape().with_rank(2) 

289 else: 

290 image_shape = image.get_shape().with_rank_at_least(2) 

291 except ValueError: 

292 raise ValueError('A grayscale image (shape %s) must be at least ' 

293 'two-dimensional.' % image.shape) 

294 if require_static and not image_shape.is_fully_defined(): 

295 raise ValueError('\'image\' must be fully defined.') 

296 if image_shape.is_fully_defined(): 

297 if image_shape[-1] != 1: 

298 raise ValueError('Last dimension of a grayscale image should be size 1.') 

299 if not image_shape.is_fully_defined(): 

300 return [ 

301 check_ops.assert_equal( 

302 array_ops.shape(image)[-1], 

303 1, 

304 message='Last dimension of a grayscale image should be size 1.'), 

305 check_ops.assert_greater_equal( 

306 array_ops.rank(image), 

307 3, 

308 message='A grayscale image must be at least two-dimensional.') 

309 ] 

310 else: 

311 return [] 

312 

313 

314def fix_image_flip_shape(image, result): 

315 """Set the shape to 3 dimensional if we don't know anything else. 

316 

317 Args: 

318 image: original image size 

319 result: flipped or transformed image 

320 

321 Returns: 

322 An image whose shape is at least (None, None, None). 

323 """ 

324 

325 image_shape = image.get_shape() 

326 if image_shape == tensor_shape.unknown_shape(): 

327 result.set_shape([None, None, None]) 

328 else: 

329 result.set_shape(image_shape) 

330 return result 

331 

332 

333@tf_export('image.random_flip_up_down') 

334@dispatch.add_dispatch_support 

335def random_flip_up_down(image, seed=None): 

336 """Randomly flips an image vertically (upside down). 

337 

338 With a 1 in 2 chance, outputs the contents of `image` flipped along the first 

339 dimension, which is `height`. Otherwise, output the image as-is. 

340 When passing a batch of images, each image will be randomly flipped 

341 independent of other images. 

342 

343 Example usage: 

344 

345 >>> image = np.array([[[1], [2]], [[3], [4]]]) 

346 >>> tf.image.random_flip_up_down(image, 3).numpy().tolist() 

347 [[[3], [4]], [[1], [2]]] 

348 

349 Randomly flip multiple images. 

350 

351 >>> images = np.array( 

352 ... [ 

353 ... [[[1], [2]], [[3], [4]]], 

354 ... [[[5], [6]], [[7], [8]]] 

355 ... ]) 

356 >>> tf.image.random_flip_up_down(images, 4).numpy().tolist() 

357 [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]] 

358 

359 For producing deterministic results given a `seed` value, use 

360 `tf.image.stateless_random_flip_up_down`. Unlike using the `seed` param 

361 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 

362 same results given the same seed independent of how many times the function is 

363 called, and independent of global seed settings (e.g. tf.random.set_seed). 

364 

365 Args: 

366 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

367 of shape `[height, width, channels]`. 

368 seed: A Python integer. Used to create a random seed. See 

369 `tf.compat.v1.set_random_seed` for behavior. 

370 

371 Returns: 

372 A tensor of the same type and shape as `image`. 

373 Raises: 

374 ValueError: if the shape of `image` not supported. 

375 """ 

376 random_func = functools.partial(random_ops.random_uniform, seed=seed) 

377 return _random_flip(image, 0, random_func, 'random_flip_up_down') 

378 

379 

380@tf_export('image.random_flip_left_right') 

381@dispatch.add_dispatch_support 

382def random_flip_left_right(image, seed=None): 

383 """Randomly flip an image horizontally (left to right). 

384 

385 With a 1 in 2 chance, outputs the contents of `image` flipped along the 

386 second dimension, which is `width`. Otherwise output the image as-is. 

387 When passing a batch of images, each image will be randomly flipped 

388 independent of other images. 

389 

390 Example usage: 

391 

392 >>> image = np.array([[[1], [2]], [[3], [4]]]) 

393 >>> tf.image.random_flip_left_right(image, 5).numpy().tolist() 

394 [[[2], [1]], [[4], [3]]] 

395 

396 Randomly flip multiple images. 

397 

398 >>> images = np.array( 

399 ... [ 

400 ... [[[1], [2]], [[3], [4]]], 

401 ... [[[5], [6]], [[7], [8]]] 

402 ... ]) 

403 >>> tf.image.random_flip_left_right(images, 6).numpy().tolist() 

404 [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]] 

405 

406 For producing deterministic results given a `seed` value, use 

407 `tf.image.stateless_random_flip_left_right`. Unlike using the `seed` param 

408 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 

409 same results given the same seed independent of how many times the function is 

410 called, and independent of global seed settings (e.g. tf.random.set_seed). 

411 

412 Args: 

413 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

414 of shape `[height, width, channels]`. 

415 seed: A Python integer. Used to create a random seed. See 

416 `tf.compat.v1.set_random_seed` for behavior. 

417 

418 Returns: 

419 A tensor of the same type and shape as `image`. 

420 

421 Raises: 

422 ValueError: if the shape of `image` not supported. 

423 """ 

424 random_func = functools.partial(random_ops.random_uniform, seed=seed) 

425 return _random_flip(image, 1, random_func, 'random_flip_left_right') 

426 

427 

428@tf_export('image.stateless_random_flip_left_right', v1=[]) 

429@dispatch.add_dispatch_support 

430def stateless_random_flip_left_right(image, seed): 

431 """Randomly flip an image horizontally (left to right) deterministically. 

432 

433 Guarantees the same results given the same `seed` independent of how many 

434 times the function is called, and independent of global seed settings (e.g. 

435 `tf.random.set_seed`). 

436 

437 Example usage: 

438 

439 >>> image = np.array([[[1], [2]], [[3], [4]]]) 

440 >>> seed = (2, 3) 

441 >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist() 

442 [[[2], [1]], [[4], [3]]] 

443 

444 Args: 

445 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

446 of shape `[height, width, channels]`. 

447 seed: A shape [2] Tensor, the seed to the random number generator. Must have 

448 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 

449 

450 Returns: 

451 A tensor of the same type and shape as `image`. 

452 """ 

453 random_func = functools.partial( 

454 stateless_random_ops.stateless_random_uniform, seed=seed) 

455 return _random_flip( 

456 image, 1, random_func, 'stateless_random_flip_left_right') 

457 

458 

459@tf_export('image.stateless_random_flip_up_down', v1=[]) 

460@dispatch.add_dispatch_support 

461def stateless_random_flip_up_down(image, seed): 

462 """Randomly flip an image vertically (upside down) deterministically. 

463 

464 Guarantees the same results given the same `seed` independent of how many 

465 times the function is called, and independent of global seed settings (e.g. 

466 `tf.random.set_seed`). 

467 

468 Example usage: 

469 

470 >>> image = np.array([[[1], [2]], [[3], [4]]]) 

471 >>> seed = (2, 3) 

472 >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist() 

473 [[[3], [4]], [[1], [2]]] 

474 

475 Args: 

476 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

477 of shape `[height, width, channels]`. 

478 seed: A shape [2] Tensor, the seed to the random number generator. Must have 

479 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 

480 

481 Returns: 

482 A tensor of the same type and shape as `image`. 

483 """ 

484 random_func = functools.partial( 

485 stateless_random_ops.stateless_random_uniform, seed=seed) 

486 return _random_flip( 

487 image, 0, random_func, 'stateless_random_flip_up_down') 

488 

489 

490def _random_flip(image, flip_index, random_func, scope_name): 

491 """Randomly (50% chance) flip an image along axis `flip_index`. 

492 

493 Args: 

494 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

495 of shape `[height, width, channels]`. 

496 flip_index: Dimension along which to flip the image. 

497 Vertical is 0, Horizontal is 1. 

498 random_func: partial function for calling either stateful or stateless 

499 random ops with `seed` parameter specified. 

500 scope_name: Name of the scope in which the ops are added. 

501 

502 Returns: 

503 A tensor of the same type and shape as `image`. 

504 

505 Raises: 

506 ValueError: if the shape of `image` not supported. 

507 """ 

508 with ops.name_scope(None, scope_name, [image]) as scope: 

509 image = ops.convert_to_tensor(image, name='image') 

510 image = _AssertAtLeast3DImage(image) 

511 shape = image.get_shape() 

512 

513 def f_rank3(): 

514 uniform_random = random_func(shape=[], minval=0, maxval=1.0) 

515 mirror_cond = math_ops.less(uniform_random, .5) 

516 result = tf_cond.cond( 

517 mirror_cond, 

518 lambda: array_ops.reverse(image, [flip_index]), 

519 lambda: image, 

520 name=scope) 

521 return fix_image_flip_shape(image, result) 

522 

523 def f_rank4(): 

524 batch_size = array_ops.shape(image)[0] 

525 uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0) 

526 flips = math_ops.round( 

527 array_ops.reshape(uniform_random, [batch_size, 1, 1, 1])) 

528 flips = math_ops.cast(flips, image.dtype) 

529 flipped_input = array_ops.reverse(image, [flip_index + 1]) 

530 return flips * flipped_input + (1 - flips) * image 

531 

532 if shape.ndims is None: 

533 rank = array_ops.rank(image) 

534 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 

535 if shape.ndims == 3: 

536 return f_rank3() 

537 elif shape.ndims == 4: 

538 return f_rank4() 

539 else: 

540 raise ValueError( 

541 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 

542 

543 

544@tf_export('image.flip_left_right') 

545@dispatch.add_dispatch_support 

546def flip_left_right(image): 

547 """Flip an image horizontally (left to right). 

548 

549 Outputs the contents of `image` flipped along the width dimension. 

550 

551 See also `tf.reverse`. 

552 

553 Usage Example: 

554 

555 >>> x = [[[1.0, 2.0, 3.0], 

556 ... [4.0, 5.0, 6.0]], 

557 ... [[7.0, 8.0, 9.0], 

558 ... [10.0, 11.0, 12.0]]] 

559 >>> tf.image.flip_left_right(x) 

560 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

561 array([[[ 4., 5., 6.], 

562 [ 1., 2., 3.]], 

563 [[10., 11., 12.], 

564 [ 7., 8., 9.]]], dtype=float32)> 

565 

566 Args: 

567 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

568 of shape `[height, width, channels]`. 

569 

570 Returns: 

571 A tensor of the same type and shape as `image`. 

572 

573 Raises: 

574 ValueError: if the shape of `image` not supported. 

575 """ 

576 return _flip(image, 1, 'flip_left_right') 

577 

578 

579@tf_export('image.flip_up_down') 

580@dispatch.add_dispatch_support 

581def flip_up_down(image): 

582 """Flip an image vertically (upside down). 

583 

584 Outputs the contents of `image` flipped along the height dimension. 

585 

586 See also `reverse()`. 

587 

588 Usage Example: 

589 

590 >>> x = [[[1.0, 2.0, 3.0], 

591 ... [4.0, 5.0, 6.0]], 

592 ... [[7.0, 8.0, 9.0], 

593 ... [10.0, 11.0, 12.0]]] 

594 >>> tf.image.flip_up_down(x) 

595 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

596 array([[[ 7., 8., 9.], 

597 [10., 11., 12.]], 

598 [[ 1., 2., 3.], 

599 [ 4., 5., 6.]]], dtype=float32)> 

600 

601 Args: 

602 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

603 of shape `[height, width, channels]`. 

604 

605 Returns: 

606 A `Tensor` of the same type and shape as `image`. 

607 

608 Raises: 

609 ValueError: if the shape of `image` not supported. 

610 """ 

611 return _flip(image, 0, 'flip_up_down') 

612 

613 

614def _flip(image, flip_index, scope_name): 

615 """Flip an image either horizontally or vertically. 

616 

617 Outputs the contents of `image` flipped along the dimension `flip_index`. 

618 

619 See also `reverse()`. 

620 

621 Args: 

622 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

623 of shape `[height, width, channels]`. 

624 flip_index: 0 For vertical, 1 for horizontal. 

625 scope_name: string, scope name. 

626 

627 Returns: 

628 A `Tensor` of the same type and shape as `image`. 

629 

630 Raises: 

631 ValueError: if the shape of `image` not supported. 

632 """ 

633 with ops.name_scope(None, scope_name, [image]): 

634 image = ops.convert_to_tensor(image, name='image') 

635 image = _AssertAtLeast3DImage(image) 

636 shape = image.get_shape() 

637 

638 def f_rank3(): 

639 return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index])) 

640 

641 def f_rank4(): 

642 return array_ops.reverse(image, [flip_index + 1]) 

643 

644 if shape.ndims is None: 

645 rank = array_ops.rank(image) 

646 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 

647 elif shape.ndims == 3: 

648 return f_rank3() 

649 elif shape.ndims == 4: 

650 return f_rank4() 

651 else: 

652 raise ValueError( 

653 '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape) 

654 

655 

656@tf_export('image.rot90') 

657@dispatch.add_dispatch_support 

658def rot90(image, k=1, name=None): 

659 """Rotate image(s) by 90 degrees. 

660 

661 

662 For example: 

663 

664 >>> a=tf.constant([[[1],[2]], 

665 ... [[3],[4]]]) 

666 >>> # rotating `a` counter clockwise by 90 degrees 

667 >>> a_rot=tf.image.rot90(a) 

668 >>> print(a_rot[...,0].numpy()) 

669 [[2 4] 

670 [1 3]] 

671 >>> # rotating `a` counter clockwise by 270 degrees 

672 >>> a_rot=tf.image.rot90(a, k=3) 

673 >>> print(a_rot[...,0].numpy()) 

674 [[3 1] 

675 [4 2]] 

676 >>> # rotating `a` clockwise by 180 degrees 

677 >>> a_rot=tf.image.rot90(a, k=-2) 

678 >>> print(a_rot[...,0].numpy()) 

679 [[4 3] 

680 [2 1]] 

681 

682 Args: 

683 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

684 of shape `[height, width, channels]`. 

685 k: A scalar integer tensor. The number of times the image(s) are rotated by 

686 90 degrees. 

687 name: A name for this operation (optional). 

688 

689 Returns: 

690 A rotated tensor of the same type and shape as `image`. 

691 

692 Raises: 

693 ValueError: if the shape of `image` not supported. 

694 """ 

695 with ops.name_scope(name, 'rot90', [image, k]) as scope: 

696 image = ops.convert_to_tensor(image, name='image') 

697 image = _AssertAtLeast3DImage(image) 

698 k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k') 

699 k.get_shape().assert_has_rank(0) 

700 k = math_ops.mod(k, 4) 

701 

702 shape = image.get_shape() 

703 if shape.ndims is None: 

704 rank = array_ops.rank(image) 

705 

706 def f_rank3(): 

707 return _rot90_3D(image, k, scope) 

708 

709 def f_rank4(): 

710 return _rot90_4D(image, k, scope) 

711 

712 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 

713 elif shape.ndims == 3: 

714 return _rot90_3D(image, k, scope) 

715 elif shape.ndims == 4: 

716 return _rot90_4D(image, k, scope) 

717 else: 

718 raise ValueError( 

719 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 

720 

721 

722def _rot90_3D(image, k, name_scope): 

723 """Rotate image counter-clockwise by 90 degrees `k` times. 

724 

725 Args: 

726 image: 3-D Tensor of shape `[height, width, channels]`. 

727 k: A scalar integer. The number of times the image is rotated by 90 degrees. 

728 name_scope: A valid TensorFlow name scope. 

729 

730 Returns: 

731 A 3-D tensor of the same type and shape as `image`. 

732 

733 """ 

734 

735 def _rot90(): 

736 return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2]) 

737 

738 def _rot180(): 

739 return array_ops.reverse_v2(image, [0, 1]) 

740 

741 def _rot270(): 

742 return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1]) 

743 

744 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), 

745 (math_ops.equal(k, 3), _rot270)] 

746 

747 result = control_flow_case.case( 

748 cases, default=lambda: image, exclusive=True, name=name_scope) 

749 result.set_shape([None, None, image.get_shape()[2]]) 

750 return result 

751 

752 

753def _rot90_4D(images, k, name_scope): 

754 """Rotate batch of images counter-clockwise by 90 degrees `k` times. 

755 

756 Args: 

757 images: 4-D Tensor of shape `[height, width, channels]`. 

758 k: A scalar integer. The number of times the images are rotated by 90 

759 degrees. 

760 name_scope: A valid TensorFlow name scope. 

761 

762 Returns: 

763 A 4-D `Tensor` of the same type and shape as `images`. 

764 """ 

765 

766 def _rot90(): 

767 return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3]) 

768 

769 def _rot180(): 

770 return array_ops.reverse_v2(images, [1, 2]) 

771 

772 def _rot270(): 

773 return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2]) 

774 

775 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), 

776 (math_ops.equal(k, 3), _rot270)] 

777 

778 result = control_flow_case.case( 

779 cases, default=lambda: images, exclusive=True, name=name_scope) 

780 shape = result.get_shape() 

781 result.set_shape([shape[0], None, None, shape[3]]) 

782 return result 

783 

784 

785@tf_export('image.transpose', v1=['image.transpose', 'image.transpose_image']) 

786@dispatch.add_dispatch_support 

787def transpose(image, name=None): 

788 """Transpose image(s) by swapping the height and width dimension. 

789 

790 Usage Example: 

791 

792 >>> x = [[[1.0, 2.0, 3.0], 

793 ... [4.0, 5.0, 6.0]], 

794 ... [[7.0, 8.0, 9.0], 

795 ... [10.0, 11.0, 12.0]]] 

796 >>> tf.image.transpose(x) 

797 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

798 array([[[ 1., 2., 3.], 

799 [ 7., 8., 9.]], 

800 [[ 4., 5., 6.], 

801 [10., 11., 12.]]], dtype=float32)> 

802 

803 Args: 

804 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

805 of shape `[height, width, channels]`. 

806 name: A name for this operation (optional). 

807 

808 Returns: 

809 If `image` was 4-D, a 4-D float Tensor of shape 

810 `[batch, width, height, channels]` 

811 If `image` was 3-D, a 3-D float Tensor of shape 

812 `[width, height, channels]` 

813 

814 Raises: 

815 ValueError: if the shape of `image` not supported. 

816 

817 Usage Example: 

818 

819 >>> image = [[[1, 2], [3, 4]], 

820 ... [[5, 6], [7, 8]], 

821 ... [[9, 10], [11, 12]]] 

822 >>> image = tf.constant(image) 

823 >>> tf.image.transpose(image) 

824 <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy= 

825 array([[[ 1, 2], 

826 [ 5, 6], 

827 [ 9, 10]], 

828 [[ 3, 4], 

829 [ 7, 8], 

830 [11, 12]]], dtype=int32)> 

831 """ 

832 with ops.name_scope(name, 'transpose', [image]): 

833 image = ops.convert_to_tensor(image, name='image') 

834 image = _AssertAtLeast3DImage(image) 

835 shape = image.get_shape() 

836 if shape.ndims is None: 

837 rank = array_ops.rank(image) 

838 

839 def f_rank3(): 

840 return array_ops.transpose(image, [1, 0, 2], name=name) 

841 

842 def f_rank4(): 

843 return array_ops.transpose(image, [0, 2, 1, 3], name=name) 

844 

845 return tf_cond.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 

846 elif shape.ndims == 3: 

847 return array_ops.transpose(image, [1, 0, 2], name=name) 

848 elif shape.ndims == 4: 

849 return array_ops.transpose(image, [0, 2, 1, 3], name=name) 

850 else: 

851 raise ValueError( 

852 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 

853 

854 

855@tf_export('image.central_crop') 

856@dispatch.add_dispatch_support 

857def central_crop(image, central_fraction): 

858 """Crop the central region of the image(s). 

859 

860 Remove the outer parts of an image but retain the central region of the image 

861 along each dimension. If we specify `central_fraction = 0.5`, this function 

862 returns the region marked with "X" in the below diagram. The larger the value 

863 of `central_fraction`, the larger the dimension of the region to be cropped 

864 and retained. 

865 

866 -------- 

867 | | 

868 | XXXX | 

869 | XXXX | 

870 | | where "X" is the central 50% of the image. 

871 -------- 

872 

873 This function works on either a single image (`image` is a 3-D Tensor), or a 

874 batch of images (`image` is a 4-D Tensor). 

875 

876 Usage Example: 

877 

878 >>> x = [[[1.0, 2.0, 3.0], 

879 ... [4.0, 5.0, 6.0], 

880 ... [7.0, 8.0, 9.0], 

881 ... [10.0, 11.0, 12.0]], 

882 ... [[13.0, 14.0, 15.0], 

883 ... [16.0, 17.0, 18.0], 

884 ... [19.0, 20.0, 21.0], 

885 ... [22.0, 23.0, 24.0]], 

886 ... [[25.0, 26.0, 27.0], 

887 ... [28.0, 29.0, 30.0], 

888 ... [31.0, 32.0, 33.0], 

889 ... [34.0, 35.0, 36.0]], 

890 ... [[37.0, 38.0, 39.0], 

891 ... [40.0, 41.0, 42.0], 

892 ... [43.0, 44.0, 45.0], 

893 ... [46.0, 47.0, 48.0]]] 

894 >>> tf.image.central_crop(x, 0.5) 

895 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

896 array([[[16., 17., 18.], 

897 [19., 20., 21.]], 

898 [[28., 29., 30.], 

899 [31., 32., 33.]]], dtype=float32)> 

900 

901 Args: 

902 image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D 

903 Tensor of shape [batch_size, height, width, depth]. 

904 central_fraction: float (0, 1], fraction of size to crop 

905 

906 Raises: 

907 ValueError: if central_crop_fraction is not within (0, 1]. 

908 

909 Returns: 

910 3-D / 4-D float Tensor, as per the input. 

911 """ 

912 with ops.name_scope(None, 'central_crop', [image]): 

913 image = ops.convert_to_tensor(image, name='image') 

914 central_fraction_static = tensor_util.constant_value(central_fraction) 

915 if central_fraction_static is not None: 

916 if central_fraction_static <= 0.0 or central_fraction_static > 1.0: 

917 raise ValueError('central_fraction must be within (0, 1]') 

918 if central_fraction_static == 1.0: 

919 return image 

920 else: 

921 assert_ops = _assert( 

922 math_ops.logical_or(central_fraction > 0.0, central_fraction <= 1.0), 

923 ValueError, 'central_fraction must be within (0, 1]') 

924 image = control_flow_ops.with_dependencies(assert_ops, image) 

925 

926 _AssertAtLeast3DImage(image) 

927 rank = image.get_shape().ndims 

928 if rank != 3 and rank != 4: 

929 raise ValueError('`image` should either be a Tensor with rank = 3 or ' 

930 'rank = 4. Had rank = {}.'.format(rank)) 

931 

932 # Helper method to return the `idx`-th dimension of `tensor`, along with 

933 # a boolean signifying if the dimension is dynamic. 

934 def _get_dim(tensor, idx): 

935 static_shape = tensor.get_shape().dims[idx].value 

936 if static_shape is not None: 

937 return static_shape, False 

938 return array_ops.shape(tensor)[idx], True 

939 

940 # Get the height, width, depth (and batch size, if the image is a 4-D 

941 # tensor). 

942 if rank == 3: 

943 img_h, dynamic_h = _get_dim(image, 0) 

944 img_w, dynamic_w = _get_dim(image, 1) 

945 img_d = image.get_shape()[2] 

946 else: 

947 img_bs = image.get_shape()[0] 

948 img_h, dynamic_h = _get_dim(image, 1) 

949 img_w, dynamic_w = _get_dim(image, 2) 

950 img_d = image.get_shape()[3] 

951 

952 dynamic_h = dynamic_h or (central_fraction_static is None) 

953 dynamic_w = dynamic_w or (central_fraction_static is None) 

954 

955 # Compute the bounding boxes for the crop. The type and value of the 

956 # bounding boxes depend on the `image` tensor's rank and whether / not the 

957 # dimensions are statically defined. 

958 if dynamic_h: 

959 img_hd = math_ops.cast(img_h, dtypes.float64) 

960 bbox_h_start = math_ops.cast( 

961 (img_hd - img_hd * math_ops.cast(central_fraction, dtypes.float64)) / 

962 2, dtypes.int32) 

963 else: 

964 img_hd = float(img_h) 

965 bbox_h_start = int((img_hd - img_hd * central_fraction_static) / 2) 

966 

967 if dynamic_w: 

968 img_wd = math_ops.cast(img_w, dtypes.float64) 

969 bbox_w_start = math_ops.cast( 

970 (img_wd - img_wd * math_ops.cast(central_fraction, dtypes.float64)) / 

971 2, dtypes.int32) 

972 else: 

973 img_wd = float(img_w) 

974 bbox_w_start = int((img_wd - img_wd * central_fraction_static) / 2) 

975 

976 bbox_h_size = img_h - bbox_h_start * 2 

977 bbox_w_size = img_w - bbox_w_start * 2 

978 

979 if rank == 3: 

980 bbox_begin = array_ops_stack.stack([bbox_h_start, bbox_w_start, 0]) 

981 bbox_size = array_ops_stack.stack([bbox_h_size, bbox_w_size, -1]) 

982 else: 

983 bbox_begin = array_ops_stack.stack([0, bbox_h_start, bbox_w_start, 0]) 

984 bbox_size = array_ops_stack.stack([-1, bbox_h_size, bbox_w_size, -1]) 

985 

986 image = array_ops.slice(image, bbox_begin, bbox_size) 

987 

988 # Reshape the `image` tensor to the desired size. 

989 if rank == 3: 

990 image.set_shape([ 

991 None if dynamic_h else bbox_h_size, 

992 None if dynamic_w else bbox_w_size, img_d 

993 ]) 

994 else: 

995 image.set_shape([ 

996 img_bs, None if dynamic_h else bbox_h_size, 

997 None if dynamic_w else bbox_w_size, img_d 

998 ]) 

999 return image 

1000 

1001 

1002@tf_export('image.pad_to_bounding_box') 

1003@dispatch.add_dispatch_support 

1004def pad_to_bounding_box(image, offset_height, offset_width, target_height, 

1005 target_width): 

1006 """Pad `image` with zeros to the specified `height` and `width`. 

1007 

1008 Adds `offset_height` rows of zeros on top, `offset_width` columns of 

1009 zeros on the left, and then pads the image on the bottom and right 

1010 with zeros until it has dimensions `target_height`, `target_width`. 

1011 

1012 This op does nothing if `offset_*` is zero and the image already has size 

1013 `target_height` by `target_width`. 

1014 

1015 Usage Example: 

1016 

1017 >>> x = [[[1., 2., 3.], 

1018 ... [4., 5., 6.]], 

1019 ... [[7., 8., 9.], 

1020 ... [10., 11., 12.]]] 

1021 >>> padded_image = tf.image.pad_to_bounding_box(x, 1, 1, 4, 4) 

1022 >>> padded_image 

1023 <tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy= 

1024 array([[[ 0., 0., 0.], 

1025 [ 0., 0., 0.], 

1026 [ 0., 0., 0.], 

1027 [ 0., 0., 0.]], 

1028 [[ 0., 0., 0.], 

1029 [ 1., 2., 3.], 

1030 [ 4., 5., 6.], 

1031 [ 0., 0., 0.]], 

1032 [[ 0., 0., 0.], 

1033 [ 7., 8., 9.], 

1034 [10., 11., 12.], 

1035 [ 0., 0., 0.]], 

1036 [[ 0., 0., 0.], 

1037 [ 0., 0., 0.], 

1038 [ 0., 0., 0.], 

1039 [ 0., 0., 0.]]], dtype=float32)> 

1040 

1041 Args: 

1042 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

1043 of shape `[height, width, channels]`. 

1044 offset_height: Number of rows of zeros to add on top. 

1045 offset_width: Number of columns of zeros to add on the left. 

1046 target_height: Height of output image. 

1047 target_width: Width of output image. 

1048 

1049 Returns: 

1050 If `image` was 4-D, a 4-D float Tensor of shape 

1051 `[batch, target_height, target_width, channels]` 

1052 If `image` was 3-D, a 3-D float Tensor of shape 

1053 `[target_height, target_width, channels]` 

1054 

1055 Raises: 

1056 ValueError: If the shape of `image` is incompatible with the `offset_*` or 

1057 `target_*` arguments, or either `offset_height` or `offset_width` is 

1058 negative. 

1059 """ 

1060 return pad_to_bounding_box_internal( 

1061 image, 

1062 offset_height, 

1063 offset_width, 

1064 target_height, 

1065 target_width, 

1066 check_dims=True) 

1067 

1068 

1069# TODO(b/190099338) Remove this internal method and remap call sites to call 

1070# image_ops.pad_to_bounding_box when asserts are no longer serialized. See also 

1071# b/204377079#comment6 for more context. 

1072def pad_to_bounding_box_internal(image, offset_height, offset_width, 

1073 target_height, target_width, check_dims): 

1074 """Pad `image` with zeros to the specified `height` and `width`. 

1075 

1076 Adds `offset_height` rows of zeros on top, `offset_width` columns of 

1077 zeros on the left, and then pads the image on the bottom and right 

1078 with zeros until it has dimensions `target_height`, `target_width`. 

1079 

1080 This op does nothing if `offset_*` is zero and the image already has size 

1081 `target_height` by `target_width`. 

1082 

1083 Args: 

1084 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

1085 of shape `[height, width, channels]`. 

1086 offset_height: Number of rows of zeros to add on top. 

1087 offset_width: Number of columns of zeros to add on the left. 

1088 target_height: Height of output image. 

1089 target_width: Width of output image. 

1090 check_dims: If True, assert that dimensions are non-negative and in range. 

1091 In multi-GPU distributed settings, assertions can cause program slowdown. 

1092 Setting this parameter to `False` avoids this, resulting in faster speed 

1093 in some situations, with the tradeoff being that some error checking is 

1094 not happening. 

1095 

1096 Returns: 

1097 If `image` was 4-D, a 4-D float Tensor of shape 

1098 `[batch, target_height, target_width, channels]` 

1099 If `image` was 3-D, a 3-D float Tensor of shape 

1100 `[target_height, target_width, channels]` 

1101 

1102 Raises: 

1103 ValueError: If the shape of `image` is incompatible with the `offset_*` or 

1104 `target_*` arguments, or either `offset_height` or `offset_width` is 

1105 negative. Not raised if `check_dims` is `False`. 

1106 """ 

1107 with ops.name_scope(None, 'pad_to_bounding_box', [image]): 

1108 image = ops.convert_to_tensor(image, name='image') 

1109 

1110 is_batch = True 

1111 image_shape = image.get_shape() 

1112 if image_shape.ndims == 3: 

1113 is_batch = False 

1114 image = array_ops.expand_dims(image, 0) 

1115 elif image_shape.ndims is None: 

1116 is_batch = False 

1117 image = array_ops.expand_dims(image, 0) 

1118 image.set_shape([None] * 4) 

1119 elif image_shape.ndims != 4: 

1120 raise ValueError( 

1121 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 

1122 image_shape) 

1123 

1124 batch, height, width, depth = _ImageDimensions(image, rank=4) 

1125 

1126 after_padding_width = target_width - offset_width - width 

1127 

1128 after_padding_height = target_height - offset_height - height 

1129 

1130 if check_dims: 

1131 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 

1132 assert_ops += _assert(offset_height >= 0, ValueError, 

1133 'offset_height must be >= 0') 

1134 assert_ops += _assert(offset_width >= 0, ValueError, 

1135 'offset_width must be >= 0') 

1136 assert_ops += _assert(after_padding_width >= 0, ValueError, 

1137 'width must be <= target - offset') 

1138 assert_ops += _assert(after_padding_height >= 0, ValueError, 

1139 'height must be <= target - offset') 

1140 image = control_flow_ops.with_dependencies(assert_ops, image) 

1141 

1142 # Do not pad on the depth dimensions. 

1143 paddings = array_ops.reshape( 

1144 array_ops_stack.stack([ 

1145 0, 0, offset_height, after_padding_height, offset_width, 

1146 after_padding_width, 0, 0 

1147 ]), [4, 2]) 

1148 padded = array_ops.pad(image, paddings) 

1149 

1150 padded_shape = [ 

1151 None if _is_tensor(i) else i 

1152 for i in [batch, target_height, target_width, depth] 

1153 ] 

1154 padded.set_shape(padded_shape) 

1155 

1156 if not is_batch: 

1157 padded = array_ops.squeeze(padded, axis=[0]) 

1158 

1159 return padded 

1160 

1161 

1162@tf_export('image.crop_to_bounding_box') 

1163@dispatch.add_dispatch_support 

1164def crop_to_bounding_box(image, offset_height, offset_width, target_height, 

1165 target_width): 

1166 """Crops an `image` to a specified bounding box. 

1167 

1168 This op cuts a rectangular bounding box out of `image`. The top-left corner 

1169 of the bounding box is at `offset_height, offset_width` in `image`, and the 

1170 lower-right corner is at 

1171 `offset_height + target_height, offset_width + target_width`. 

1172 

1173 Example Usage: 

1174 

1175 >>> image = tf.constant(np.arange(1, 28, dtype=np.float32), shape=[3, 3, 3]) 

1176 >>> image[:,:,0] # print the first channel of the 3-D tensor 

1177 <tf.Tensor: shape=(3, 3), dtype=float32, numpy= 

1178 array([[ 1., 4., 7.], 

1179 [10., 13., 16.], 

1180 [19., 22., 25.]], dtype=float32)> 

1181 >>> cropped_image = tf.image.crop_to_bounding_box(image, 0, 0, 2, 2) 

1182 >>> cropped_image[:,:,0] # print the first channel of the cropped 3-D tensor 

1183 <tf.Tensor: shape=(2, 2), dtype=float32, numpy= 

1184 array([[ 1., 4.], 

1185 [10., 13.]], dtype=float32)> 

1186 

1187 Args: 

1188 image: 4-D `Tensor` of shape `[batch, height, width, channels]` or 3-D 

1189 `Tensor` of shape `[height, width, channels]`. 

1190 offset_height: Vertical coordinate of the top-left corner of the bounding 

1191 box in `image`. 

1192 offset_width: Horizontal coordinate of the top-left corner of the bounding 

1193 box in `image`. 

1194 target_height: Height of the bounding box. 

1195 target_width: Width of the bounding box. 

1196 

1197 Returns: 

1198 If `image` was 4-D, a 4-D `Tensor` of shape 

1199 `[batch, target_height, target_width, channels]`. 

1200 If `image` was 3-D, a 3-D `Tensor` of shape 

1201 `[target_height, target_width, channels]`. 

1202 It has the same dtype with `image`. 

1203 

1204 Raises: 

1205 ValueError: `image` is not a 3-D or 4-D `Tensor`. 

1206 ValueError: `offset_width < 0` or `offset_height < 0`. 

1207 ValueError: `target_width <= 0` or `target_height <= 0`. 

1208 ValueError: `width < offset_width + target_width` or 

1209 `height < offset_height + target_height`. 

1210 """ 

1211 with ops.name_scope(None, 'crop_to_bounding_box', [image]): 

1212 image = ops.convert_to_tensor(image, name='image') 

1213 

1214 is_batch = True 

1215 image_shape = image.get_shape() 

1216 if image_shape.ndims == 3: 

1217 is_batch = False 

1218 image = array_ops.expand_dims(image, 0) 

1219 elif image_shape.ndims is None: 

1220 is_batch = False 

1221 image = array_ops.expand_dims(image, 0) 

1222 image.set_shape([None] * 4) 

1223 elif image_shape.ndims != 4: 

1224 raise ValueError( 

1225 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 

1226 image_shape) 

1227 

1228 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 

1229 

1230 batch, height, width, depth = _ImageDimensions(image, rank=4) 

1231 

1232 assert_ops += _assert(offset_width >= 0, ValueError, 

1233 'offset_width must be >= 0.') 

1234 assert_ops += _assert(offset_height >= 0, ValueError, 

1235 'offset_height must be >= 0.') 

1236 assert_ops += _assert(target_width > 0, ValueError, 

1237 'target_width must be > 0.') 

1238 assert_ops += _assert(target_height > 0, ValueError, 

1239 'target_height must be > 0.') 

1240 assert_ops += _assert(width >= (target_width + offset_width), ValueError, 

1241 'width must be >= target + offset.') 

1242 assert_ops += _assert(height >= (target_height + offset_height), ValueError, 

1243 'height must be >= target + offset.') 

1244 image = control_flow_ops.with_dependencies(assert_ops, image) 

1245 

1246 cropped = array_ops.slice( 

1247 image, 

1248 array_ops_stack.stack([0, offset_height, offset_width, 0]), 

1249 array_ops_stack.stack([ 

1250 array_ops.shape(image)[0], 

1251 target_height, 

1252 target_width, 

1253 array_ops.shape(image)[3]])) 

1254 

1255 cropped_shape = [ 

1256 None if _is_tensor(i) else i 

1257 for i in [batch, target_height, target_width, depth] 

1258 ] 

1259 cropped.set_shape(cropped_shape) 

1260 

1261 if not is_batch: 

1262 cropped = array_ops.squeeze(cropped, axis=[0]) 

1263 

1264 return cropped 

1265 

1266 

1267@tf_export( 

1268 'image.resize_with_crop_or_pad', 

1269 v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad']) 

1270@dispatch.add_dispatch_support 

1271def resize_image_with_crop_or_pad(image, target_height, target_width): 

1272 """Crops and/or pads an image to a target width and height. 

1273 

1274 Resizes an image to a target width and height by either centrally 

1275 cropping the image or padding it evenly with zeros. 

1276 

1277 If `width` or `height` is greater than the specified `target_width` or 

1278 `target_height` respectively, this op centrally crops along that dimension. 

1279 

1280 For example: 

1281 

1282 >>> image = np.arange(75).reshape(5, 5, 3) # create 3-D image input 

1283 >>> image[:,:,0] # print first channel just for demo purposes 

1284 array([[ 0, 3, 6, 9, 12], 

1285 [15, 18, 21, 24, 27], 

1286 [30, 33, 36, 39, 42], 

1287 [45, 48, 51, 54, 57], 

1288 [60, 63, 66, 69, 72]]) 

1289 >>> image = tf.image.resize_with_crop_or_pad(image, 3, 3) # crop 

1290 >>> # print first channel for demo purposes; centrally cropped output 

1291 >>> image[:,:,0] 

1292 <tf.Tensor: shape=(3, 3), dtype=int64, numpy= 

1293 array([[18, 21, 24], 

1294 [33, 36, 39], 

1295 [48, 51, 54]])> 

1296 

1297 If `width` or `height` is smaller than the specified `target_width` or 

1298 `target_height` respectively, this op centrally pads with 0 along that 

1299 dimension. 

1300 

1301 For example: 

1302 

1303 >>> image = np.arange(1, 28).reshape(3, 3, 3) # create 3-D image input 

1304 >>> image[:,:,0] # print first channel just for demo purposes 

1305 array([[ 1, 4, 7], 

1306 [10, 13, 16], 

1307 [19, 22, 25]]) 

1308 >>> image = tf.image.resize_with_crop_or_pad(image, 5, 5) # pad 

1309 >>> # print first channel for demo purposes; we should see 0 paddings 

1310 >>> image[:,:,0] 

1311 <tf.Tensor: shape=(5, 5), dtype=int64, numpy= 

1312 array([[ 0, 0, 0, 0, 0], 

1313 [ 0, 1, 4, 7, 0], 

1314 [ 0, 10, 13, 16, 0], 

1315 [ 0, 19, 22, 25, 0], 

1316 [ 0, 0, 0, 0, 0]])> 

1317 

1318 Args: 

1319 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

1320 of shape `[height, width, channels]`. 

1321 target_height: Target height. 

1322 target_width: Target width. 

1323 

1324 Raises: 

1325 ValueError: if `target_height` or `target_width` are zero or negative. 

1326 

1327 Returns: 

1328 Cropped and/or padded image. 

1329 If `images` was 4-D, a 4-D float Tensor of shape 

1330 `[batch, new_height, new_width, channels]`. 

1331 If `images` was 3-D, a 3-D float Tensor of shape 

1332 `[new_height, new_width, channels]`. 

1333 """ 

1334 with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]): 

1335 image = ops.convert_to_tensor(image, name='image') 

1336 image_shape = image.get_shape() 

1337 is_batch = True 

1338 if image_shape.ndims == 3: 

1339 is_batch = False 

1340 image = array_ops.expand_dims(image, 0) 

1341 elif image_shape.ndims is None: 

1342 is_batch = False 

1343 image = array_ops.expand_dims(image, 0) 

1344 image.set_shape([None] * 4) 

1345 elif image_shape.ndims != 4: 

1346 raise ValueError( 

1347 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 

1348 image_shape) 

1349 

1350 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 

1351 assert_ops += _assert(target_width > 0, ValueError, 

1352 'target_width must be > 0.') 

1353 assert_ops += _assert(target_height > 0, ValueError, 

1354 'target_height must be > 0.') 

1355 

1356 image = control_flow_ops.with_dependencies(assert_ops, image) 

1357 # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks. 

1358 # Make sure our checks come first, so that error messages are clearer. 

1359 if _is_tensor(target_height): 

1360 target_height = control_flow_ops.with_dependencies( 

1361 assert_ops, target_height) 

1362 if _is_tensor(target_width): 

1363 target_width = control_flow_ops.with_dependencies(assert_ops, 

1364 target_width) 

1365 

1366 def max_(x, y): 

1367 if _is_tensor(x) or _is_tensor(y): 

1368 return math_ops.maximum(x, y) 

1369 else: 

1370 return max(x, y) 

1371 

1372 def min_(x, y): 

1373 if _is_tensor(x) or _is_tensor(y): 

1374 return math_ops.minimum(x, y) 

1375 else: 

1376 return min(x, y) 

1377 

1378 def equal_(x, y): 

1379 if _is_tensor(x) or _is_tensor(y): 

1380 return math_ops.equal(x, y) 

1381 else: 

1382 return x == y 

1383 

1384 _, height, width, _ = _ImageDimensions(image, rank=4) 

1385 width_diff = target_width - width 

1386 offset_crop_width = max_(-width_diff // 2, 0) 

1387 offset_pad_width = max_(width_diff // 2, 0) 

1388 

1389 height_diff = target_height - height 

1390 offset_crop_height = max_(-height_diff // 2, 0) 

1391 offset_pad_height = max_(height_diff // 2, 0) 

1392 

1393 # Maybe crop if needed. 

1394 cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width, 

1395 min_(target_height, height), 

1396 min_(target_width, width)) 

1397 

1398 # Maybe pad if needed. 

1399 resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width, 

1400 target_height, target_width) 

1401 

1402 # In theory all the checks below are redundant. 

1403 if resized.get_shape().ndims is None: 

1404 raise ValueError('resized contains no shape.') 

1405 

1406 _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4) 

1407 

1408 assert_ops = [] 

1409 assert_ops += _assert( 

1410 equal_(resized_height, target_height), ValueError, 

1411 'resized height is not correct.') 

1412 assert_ops += _assert( 

1413 equal_(resized_width, target_width), ValueError, 

1414 'resized width is not correct.') 

1415 

1416 resized = control_flow_ops.with_dependencies(assert_ops, resized) 

1417 

1418 if not is_batch: 

1419 resized = array_ops.squeeze(resized, axis=[0]) 

1420 

1421 return resized 

1422 

1423 

1424@tf_export(v1=['image.ResizeMethod']) 

1425class ResizeMethodV1: 

1426 """See `v1.image.resize` for details.""" 

1427 BILINEAR = 0 

1428 NEAREST_NEIGHBOR = 1 

1429 BICUBIC = 2 

1430 AREA = 3 

1431 

1432 

1433@tf_export('image.ResizeMethod', v1=[]) 

1434class ResizeMethod: 

1435 """See `tf.image.resize` for details.""" 

1436 BILINEAR = 'bilinear' 

1437 NEAREST_NEIGHBOR = 'nearest' 

1438 BICUBIC = 'bicubic' 

1439 AREA = 'area' 

1440 LANCZOS3 = 'lanczos3' 

1441 LANCZOS5 = 'lanczos5' 

1442 GAUSSIAN = 'gaussian' 

1443 MITCHELLCUBIC = 'mitchellcubic' 

1444 

1445 

1446def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name, 

1447 skip_resize_if_same): 

1448 """Core functionality for v1 and v2 resize functions.""" 

1449 with ops.name_scope(name, 'resize', [images, size]): 

1450 images = ops.convert_to_tensor(images, name='images') 

1451 if images.get_shape().ndims is None: 

1452 raise ValueError('\'images\' contains no shape.') 

1453 # TODO(shlens): Migrate this functionality to the underlying Op's. 

1454 is_batch = True 

1455 if images.get_shape().ndims == 3: 

1456 is_batch = False 

1457 images = array_ops.expand_dims(images, 0) 

1458 elif images.get_shape().ndims != 4: 

1459 raise ValueError('\'images\' must have either 3 or 4 dimensions.') 

1460 

1461 _, height, width, _ = images.get_shape().as_list() 

1462 

1463 try: 

1464 size = ops.convert_to_tensor(size, dtypes.int32, name='size') 

1465 except (TypeError, ValueError): 

1466 raise ValueError('\'size\' must be a 1-D int32 Tensor') 

1467 if not size.get_shape().is_compatible_with([2]): 

1468 raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: ' 

1469 'new_height, new_width') 

1470 

1471 if preserve_aspect_ratio: 

1472 # Get the current shapes of the image, even if dynamic. 

1473 _, current_height, current_width, _ = _ImageDimensions(images, rank=4) 

1474 

1475 # do the computation to find the right scale and height/width. 

1476 scale_factor_height = ( 

1477 math_ops.cast(size[0], dtypes.float32) / 

1478 math_ops.cast(current_height, dtypes.float32)) 

1479 scale_factor_width = ( 

1480 math_ops.cast(size[1], dtypes.float32) / 

1481 math_ops.cast(current_width, dtypes.float32)) 

1482 scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width) 

1483 scaled_height_const = math_ops.cast( 

1484 math_ops.round(scale_factor * 

1485 math_ops.cast(current_height, dtypes.float32)), 

1486 dtypes.int32) 

1487 scaled_width_const = math_ops.cast( 

1488 math_ops.round(scale_factor * 

1489 math_ops.cast(current_width, dtypes.float32)), 

1490 dtypes.int32) 

1491 

1492 # NOTE: Reset the size and other constants used later. 

1493 size = ops.convert_to_tensor([scaled_height_const, scaled_width_const], 

1494 dtypes.int32, 

1495 name='size') 

1496 

1497 size_const_as_shape = tensor_util.constant_value_as_shape(size) 

1498 new_height_const = tensor_shape.dimension_at_index(size_const_as_shape, 

1499 0).value 

1500 new_width_const = tensor_shape.dimension_at_index(size_const_as_shape, 

1501 1).value 

1502 

1503 # If we can determine that the height and width will be unmodified by this 

1504 # transformation, we avoid performing the resize. 

1505 if skip_resize_if_same and all( 

1506 x is not None 

1507 for x in [new_width_const, width, new_height_const, height]) and ( 

1508 width == new_width_const and height == new_height_const): 

1509 if not is_batch: 

1510 images = array_ops.squeeze(images, axis=[0]) 

1511 return images 

1512 

1513 images = resizer_fn(images, size) 

1514 

1515 # NOTE(mrry): The shape functions for the resize ops cannot unpack 

1516 # the packed values in `new_size`, so set the shape here. 

1517 images.set_shape([None, new_height_const, new_width_const, None]) 

1518 

1519 if not is_batch: 

1520 images = array_ops.squeeze(images, axis=[0]) 

1521 return images 

1522 

1523 

1524@tf_export(v1=['image.resize_images', 'image.resize']) 

1525@dispatch.add_dispatch_support 

1526def resize_images(images, 

1527 size, 

1528 method=ResizeMethodV1.BILINEAR, 

1529 align_corners=False, 

1530 preserve_aspect_ratio=False, 

1531 name=None): 

1532 """Resize `images` to `size` using the specified `method`. 

1533 

1534 Resized images will be distorted if their original aspect ratio is not 

1535 the same as `size`. To avoid distortions see 

1536 `tf.image.resize_with_pad` or `tf.image.resize_with_crop_or_pad`. 

1537 

1538 The `method` can be one of: 

1539 

1540 * <b>`tf.image.ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.]( 

1541 https://en.wikipedia.org/wiki/Bilinear_interpolation) 

1542 * <b>`tf.image.ResizeMethod.NEAREST_NEIGHBOR`</b>: [ 

1543 Nearest neighbor interpolation.]( 

1544 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) 

1545 * <b>`tf.image.ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.]( 

1546 https://en.wikipedia.org/wiki/Bicubic_interpolation) 

1547 * <b>`tf.image.ResizeMethod.AREA`</b>: Area interpolation. 

1548 

1549 The return value has the same type as `images` if `method` is 

1550 `tf.image.ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type 

1551 as `images` if the size of `images` can be statically determined to be the 

1552 same as `size`, because `images` is returned in this case. Otherwise, the 

1553 return value has type `float32`. 

1554 

1555 Args: 

1556 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

1557 of shape `[height, width, channels]`. 

1558 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new 

1559 size for the images. 

1560 method: ResizeMethod. Defaults to `tf.image.ResizeMethod.BILINEAR`. 

1561 align_corners: bool. If True, the centers of the 4 corner pixels of the 

1562 input and output tensors are aligned, preserving the values at the corner 

1563 pixels. Defaults to `False`. 

1564 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set, 

1565 then `images` will be resized to a size that fits in `size` while 

1566 preserving the aspect ratio of the original image. Scales up the image if 

1567 `size` is bigger than the current size of the `image`. Defaults to False. 

1568 name: A name for this operation (optional). 

1569 

1570 Raises: 

1571 ValueError: if the shape of `images` is incompatible with the 

1572 shape arguments to this function 

1573 ValueError: if `size` has invalid shape or type. 

1574 ValueError: if an unsupported resize method is specified. 

1575 

1576 Returns: 

1577 If `images` was 4-D, a 4-D float Tensor of shape 

1578 `[batch, new_height, new_width, channels]`. 

1579 If `images` was 3-D, a 3-D float Tensor of shape 

1580 `[new_height, new_width, channels]`. 

1581 """ 

1582 

1583 def resize_fn(images_t, new_size): 

1584 """Legacy resize core function, passed to _resize_images_common.""" 

1585 if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR: 

1586 return gen_image_ops.resize_bilinear( 

1587 images_t, new_size, align_corners=align_corners) 

1588 elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or 

1589 method == ResizeMethod.NEAREST_NEIGHBOR): 

1590 return gen_image_ops.resize_nearest_neighbor( 

1591 images_t, new_size, align_corners=align_corners) 

1592 elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC: 

1593 return gen_image_ops.resize_bicubic( 

1594 images_t, new_size, align_corners=align_corners) 

1595 elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA: 

1596 return gen_image_ops.resize_area( 

1597 images_t, new_size, align_corners=align_corners) 

1598 else: 

1599 raise ValueError('Resize method is not implemented: {}'.format(method)) 

1600 

1601 return _resize_images_common( 

1602 images, 

1603 resize_fn, 

1604 size, 

1605 preserve_aspect_ratio=preserve_aspect_ratio, 

1606 name=name, 

1607 skip_resize_if_same=True) 

1608 

1609 

1610@tf_export('image.resize', v1=[]) 

1611@dispatch.add_dispatch_support 

1612def resize_images_v2(images, 

1613 size, 

1614 method=ResizeMethod.BILINEAR, 

1615 preserve_aspect_ratio=False, 

1616 antialias=False, 

1617 name=None): 

1618 """Resize `images` to `size` using the specified `method`. 

1619 

1620 Resized images will be distorted if their original aspect ratio is not 

1621 the same as `size`. To avoid distortions see 

1622 `tf.image.resize_with_pad`. 

1623 

1624 >>> image = tf.constant([ 

1625 ... [1,0,0,0,0], 

1626 ... [0,1,0,0,0], 

1627 ... [0,0,1,0,0], 

1628 ... [0,0,0,1,0], 

1629 ... [0,0,0,0,1], 

1630 ... ]) 

1631 >>> # Add "batch" and "channels" dimensions 

1632 >>> image = image[tf.newaxis, ..., tf.newaxis] 

1633 >>> image.shape.as_list() # [batch, height, width, channels] 

1634 [1, 5, 5, 1] 

1635 >>> tf.image.resize(image, [3,5])[0,...,0].numpy() 

1636 array([[0.6666667, 0.3333333, 0. , 0. , 0. ], 

1637 [0. , 0. , 1. , 0. , 0. ], 

1638 [0. , 0. , 0. , 0.3333335, 0.6666665]], 

1639 dtype=float32) 

1640 

1641 It works equally well with a single image instead of a batch of images: 

1642 

1643 >>> tf.image.resize(image[0], [3,5]).shape.as_list() 

1644 [3, 5, 1] 

1645 

1646 When `antialias` is true, the sampling filter will anti-alias the input image 

1647 as well as interpolate. When downsampling an image with [anti-aliasing]( 

1648 https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter 

1649 kernel is scaled in order to properly anti-alias the input image signal. 

1650 `antialias` has no effect when upsampling an image: 

1651 

1652 >>> a = tf.image.resize(image, [5,10]) 

1653 >>> b = tf.image.resize(image, [5,10], antialias=True) 

1654 >>> tf.reduce_max(abs(a - b)).numpy() 

1655 0.0 

1656 

1657 The `method` argument expects an item from the `image.ResizeMethod` enum, or 

1658 the string equivalent. The options are: 

1659 

1660 * <b>`bilinear`</b>: [Bilinear interpolation.]( 

1661 https://en.wikipedia.org/wiki/Bilinear_interpolation) If `antialias` is 

1662 true, becomes a hat/tent filter function with radius 1 when downsampling. 

1663 * <b>`lanczos3`</b>: [Lanczos kernel]( 

1664 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3. 

1665 High-quality practical filter but may have some ringing, especially on 

1666 synthetic images. 

1667 * <b>`lanczos5`</b>: [Lanczos kernel] ( 

1668 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5. 

1669 Very-high-quality filter but may have stronger ringing. 

1670 * <b>`bicubic`</b>: [Cubic interpolant]( 

1671 https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to 

1672 Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel, 

1673 particularly when upsampling. 

1674 * <b>`gaussian`</b>: [Gaussian kernel]( 

1675 https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3, 

1676 sigma = 1.5 / 3.0. 

1677 * <b>`nearest`</b>: [Nearest neighbor interpolation.]( 

1678 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) 

1679 `antialias` has no effect when used with nearest neighbor interpolation. 

1680 * <b>`area`</b>: Anti-aliased resampling with area interpolation. 

1681 `antialias` has no effect when used with area interpolation; it 

1682 always anti-aliases. 

1683 * <b>`mitchellcubic`</b>: Mitchell-Netravali Cubic non-interpolating filter. 

1684 For synthetic images (especially those lacking proper prefiltering), less 

1685 ringing than Keys cubic kernel but less sharp. 

1686 

1687 Note: Near image edges the filtering kernel may be partially outside the 

1688 image boundaries. For these pixels, only input pixels inside the image will be 

1689 included in the filter sum, and the output value will be appropriately 

1690 normalized. 

1691 

1692 The return value has type `float32`, unless the `method` is 

1693 `ResizeMethod.NEAREST_NEIGHBOR`, then the return dtype is the dtype 

1694 of `images`: 

1695 

1696 >>> nn = tf.image.resize(image, [5,7], method='nearest') 

1697 >>> nn[0,...,0].numpy() 

1698 array([[1, 0, 0, 0, 0, 0, 0], 

1699 [0, 1, 1, 0, 0, 0, 0], 

1700 [0, 0, 0, 1, 0, 0, 0], 

1701 [0, 0, 0, 0, 1, 1, 0], 

1702 [0, 0, 0, 0, 0, 0, 1]], dtype=int32) 

1703 

1704 With `preserve_aspect_ratio=True`, the aspect ratio is preserved, so `size` 

1705 is the maximum for each dimension: 

1706 

1707 >>> max_10_20 = tf.image.resize(image, [10,20], preserve_aspect_ratio=True) 

1708 >>> max_10_20.shape.as_list() 

1709 [1, 10, 10, 1] 

1710 

1711 Args: 

1712 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

1713 of shape `[height, width, channels]`. 

1714 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new 

1715 size for the images. 

1716 method: An `image.ResizeMethod`, or string equivalent. Defaults to 

1717 `bilinear`. 

1718 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set, 

1719 then `images` will be resized to a size that fits in `size` while 

1720 preserving the aspect ratio of the original image. Scales up the image if 

1721 `size` is bigger than the current size of the `image`. Defaults to False. 

1722 antialias: Whether to use an anti-aliasing filter when downsampling an 

1723 image. 

1724 name: A name for this operation (optional). 

1725 

1726 Raises: 

1727 ValueError: if the shape of `images` is incompatible with the 

1728 shape arguments to this function 

1729 ValueError: if `size` has an invalid shape or type. 

1730 ValueError: if an unsupported resize method is specified. 

1731 

1732 Returns: 

1733 If `images` was 4-D, a 4-D float Tensor of shape 

1734 `[batch, new_height, new_width, channels]`. 

1735 If `images` was 3-D, a 3-D float Tensor of shape 

1736 `[new_height, new_width, channels]`. 

1737 """ 

1738 

1739 def resize_fn(images_t, new_size): 

1740 """Resize core function, passed to _resize_images_common.""" 

1741 scale_and_translate_methods = [ 

1742 ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN, 

1743 ResizeMethod.MITCHELLCUBIC 

1744 ] 

1745 

1746 def resize_with_scale_and_translate(method): 

1747 scale = ( 

1748 math_ops.cast(new_size, dtype=dtypes.float32) / 

1749 math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32)) 

1750 return gen_image_ops.scale_and_translate( 

1751 images_t, 

1752 new_size, 

1753 scale, 

1754 array_ops.zeros([2]), 

1755 kernel_type=method, 

1756 antialias=antialias) 

1757 

1758 if method == ResizeMethod.BILINEAR: 

1759 if antialias: 

1760 return resize_with_scale_and_translate('triangle') 

1761 else: 

1762 return gen_image_ops.resize_bilinear( 

1763 images_t, new_size, half_pixel_centers=True) 

1764 elif method == ResizeMethod.NEAREST_NEIGHBOR: 

1765 return gen_image_ops.resize_nearest_neighbor( 

1766 images_t, new_size, half_pixel_centers=True) 

1767 elif method == ResizeMethod.BICUBIC: 

1768 if antialias: 

1769 return resize_with_scale_and_translate('keyscubic') 

1770 else: 

1771 return gen_image_ops.resize_bicubic( 

1772 images_t, new_size, half_pixel_centers=True) 

1773 elif method == ResizeMethod.AREA: 

1774 return gen_image_ops.resize_area(images_t, new_size) 

1775 elif method in scale_and_translate_methods: 

1776 return resize_with_scale_and_translate(method) 

1777 else: 

1778 raise ValueError('Resize method is not implemented: {}'.format(method)) 

1779 

1780 return _resize_images_common( 

1781 images, 

1782 resize_fn, 

1783 size, 

1784 preserve_aspect_ratio=preserve_aspect_ratio, 

1785 name=name, 

1786 skip_resize_if_same=False) 

1787 

1788 

1789def _resize_image_with_pad_common(image, target_height, target_width, 

1790 resize_fn): 

1791 """Core functionality for v1 and v2 resize_image_with_pad functions.""" 

1792 with ops.name_scope(None, 'resize_image_with_pad', [image]): 

1793 image = ops.convert_to_tensor(image, name='image') 

1794 image_shape = image.get_shape() 

1795 is_batch = True 

1796 if image_shape.ndims == 3: 

1797 is_batch = False 

1798 image = array_ops.expand_dims(image, 0) 

1799 elif image_shape.ndims is None: 

1800 is_batch = False 

1801 image = array_ops.expand_dims(image, 0) 

1802 image.set_shape([None] * 4) 

1803 elif image_shape.ndims != 4: 

1804 raise ValueError( 

1805 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 

1806 image_shape) 

1807 

1808 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 

1809 assert_ops += _assert(target_width > 0, ValueError, 

1810 'target_width must be > 0.') 

1811 assert_ops += _assert(target_height > 0, ValueError, 

1812 'target_height must be > 0.') 

1813 

1814 image = control_flow_ops.with_dependencies(assert_ops, image) 

1815 

1816 def max_(x, y): 

1817 if _is_tensor(x) or _is_tensor(y): 

1818 return math_ops.maximum(x, y) 

1819 else: 

1820 return max(x, y) 

1821 

1822 _, height, width, _ = _ImageDimensions(image, rank=4) 

1823 

1824 # convert values to float, to ease divisions 

1825 f_height = math_ops.cast(height, dtype=dtypes.float32) 

1826 f_width = math_ops.cast(width, dtype=dtypes.float32) 

1827 f_target_height = math_ops.cast(target_height, dtype=dtypes.float32) 

1828 f_target_width = math_ops.cast(target_width, dtype=dtypes.float32) 

1829 

1830 # Find the ratio by which the image must be adjusted 

1831 # to fit within the target 

1832 ratio = max_(f_width / f_target_width, f_height / f_target_height) 

1833 resized_height_float = f_height / ratio 

1834 resized_width_float = f_width / ratio 

1835 resized_height = math_ops.cast( 

1836 math_ops.floor(resized_height_float), dtype=dtypes.int32) 

1837 resized_width = math_ops.cast( 

1838 math_ops.floor(resized_width_float), dtype=dtypes.int32) 

1839 

1840 padding_height = (f_target_height - resized_height_float) / 2 

1841 padding_width = (f_target_width - resized_width_float) / 2 

1842 f_padding_height = math_ops.floor(padding_height) 

1843 f_padding_width = math_ops.floor(padding_width) 

1844 p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32)) 

1845 p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32)) 

1846 

1847 # Resize first, then pad to meet requested dimensions 

1848 resized = resize_fn(image, [resized_height, resized_width]) 

1849 

1850 padded = pad_to_bounding_box(resized, p_height, p_width, target_height, 

1851 target_width) 

1852 

1853 if padded.get_shape().ndims is None: 

1854 raise ValueError('padded contains no shape.') 

1855 

1856 _ImageDimensions(padded, rank=4) 

1857 

1858 if not is_batch: 

1859 padded = array_ops.squeeze(padded, axis=[0]) 

1860 

1861 return padded 

1862 

1863 

1864@tf_export(v1=['image.resize_image_with_pad']) 

1865@dispatch.add_dispatch_support 

1866def resize_image_with_pad_v1(image, 

1867 target_height, 

1868 target_width, 

1869 method=ResizeMethodV1.BILINEAR, 

1870 align_corners=False): 

1871 """Resizes and pads an image to a target width and height. 

1872 

1873 Resizes an image to a target width and height by keeping 

1874 the aspect ratio the same without distortion. If the target 

1875 dimensions don't match the image dimensions, the image 

1876 is resized and then padded with zeroes to match requested 

1877 dimensions. 

1878 

1879 Args: 

1880 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

1881 of shape `[height, width, channels]`. 

1882 target_height: Target height. 

1883 target_width: Target width. 

1884 method: Method to use for resizing image. See `resize_images()` 

1885 align_corners: bool. If True, the centers of the 4 corner pixels of the 

1886 input and output tensors are aligned, preserving the values at the corner 

1887 pixels. Defaults to `False`. 

1888 

1889 Raises: 

1890 ValueError: if `target_height` or `target_width` are zero or negative. 

1891 

1892 Returns: 

1893 Resized and padded image. 

1894 If `images` was 4-D, a 4-D float Tensor of shape 

1895 `[batch, new_height, new_width, channels]`. 

1896 If `images` was 3-D, a 3-D float Tensor of shape 

1897 `[new_height, new_width, channels]`. 

1898 """ 

1899 

1900 def _resize_fn(im, new_size): 

1901 return resize_images(im, new_size, method, align_corners=align_corners) 

1902 

1903 return _resize_image_with_pad_common(image, target_height, target_width, 

1904 _resize_fn) 

1905 

1906 

1907@tf_export('image.resize_with_pad', v1=[]) 

1908@dispatch.add_dispatch_support 

1909def resize_image_with_pad_v2(image, 

1910 target_height, 

1911 target_width, 

1912 method=ResizeMethod.BILINEAR, 

1913 antialias=False): 

1914 """Resizes and pads an image to a target width and height. 

1915 

1916 Resizes an image to a target width and height by keeping 

1917 the aspect ratio the same without distortion. If the target 

1918 dimensions don't match the image dimensions, the image 

1919 is resized and then padded with zeroes to match requested 

1920 dimensions. 

1921 

1922 Args: 

1923 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

1924 of shape `[height, width, channels]`. 

1925 target_height: Target height. 

1926 target_width: Target width. 

1927 method: Method to use for resizing image. See `image.resize()` 

1928 antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'. 

1929 

1930 Raises: 

1931 ValueError: if `target_height` or `target_width` are zero or negative. 

1932 

1933 Returns: 

1934 Resized and padded image. 

1935 If `images` was 4-D, a 4-D float Tensor of shape 

1936 `[batch, new_height, new_width, channels]`. 

1937 If `images` was 3-D, a 3-D float Tensor of shape 

1938 `[new_height, new_width, channels]`. 

1939 """ 

1940 

1941 def _resize_fn(im, new_size): 

1942 return resize_images_v2(im, new_size, method, antialias=antialias) 

1943 

1944 return _resize_image_with_pad_common(image, target_height, target_width, 

1945 _resize_fn) 

1946 

1947 

1948@tf_export('image.per_image_standardization') 

1949@dispatch.add_dispatch_support 

1950def per_image_standardization(image): 

1951 """Linearly scales each image in `image` to have mean 0 and variance 1. 

1952 

1953 For each 3-D image `x` in `image`, computes `(x - mean) / adjusted_stddev`, 

1954 where 

1955 

1956 - `mean` is the average of all values in `x` 

1957 - `adjusted_stddev = max(stddev, 1.0/sqrt(N))` is capped away from 0 to 

1958 protect against division by 0 when handling uniform images 

1959 - `N` is the number of elements in `x` 

1960 - `stddev` is the standard deviation of all values in `x` 

1961 

1962 Example Usage: 

1963 

1964 >>> image = tf.constant(np.arange(1, 13, dtype=np.int32), shape=[2, 2, 3]) 

1965 >>> image # 3-D tensor 

1966 <tf.Tensor: shape=(2, 2, 3), dtype=int32, numpy= 

1967 array([[[ 1, 2, 3], 

1968 [ 4, 5, 6]], 

1969 [[ 7, 8, 9], 

1970 [10, 11, 12]]], dtype=int32)> 

1971 >>> new_image = tf.image.per_image_standardization(image) 

1972 >>> new_image # 3-D tensor with mean ~= 0 and variance ~= 1 

1973 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

1974 array([[[-1.593255 , -1.3035723 , -1.0138896 ], 

1975 [-0.7242068 , -0.4345241 , -0.14484136]], 

1976 [[ 0.14484136, 0.4345241 , 0.7242068 ], 

1977 [ 1.0138896 , 1.3035723 , 1.593255 ]]], dtype=float32)> 

1978 

1979 Args: 

1980 image: An n-D `Tensor` with at least 3 dimensions, the last 3 of which are 

1981 the dimensions of each image. 

1982 

1983 Returns: 

1984 A `Tensor` with the same shape as `image` and its dtype is `float32`. 

1985 

1986 Raises: 

1987 ValueError: The shape of `image` has fewer than 3 dimensions. 

1988 """ 

1989 with ops.name_scope(None, 'per_image_standardization', [image]) as scope: 

1990 image = ops.convert_to_tensor(image, name='image') 

1991 image = _AssertAtLeast3DImage(image) 

1992 

1993 image = math_ops.cast(image, dtype=dtypes.float32) 

1994 num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:]) 

1995 image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True) 

1996 

1997 # Apply a minimum normalization that protects us against uniform images. 

1998 stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True) 

1999 min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) 

2000 adjusted_stddev = math_ops.maximum(stddev, min_stddev) 

2001 

2002 image -= image_mean 

2003 image = math_ops.divide(image, adjusted_stddev, name=scope) 

2004 return image 

2005 

2006 

2007@tf_export('image.random_brightness') 

2008@dispatch.register_unary_elementwise_api 

2009@dispatch.add_dispatch_support 

2010def random_brightness(image, max_delta, seed=None): 

2011 """Adjust the brightness of images by a random factor. 

2012 

2013 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the 

2014 interval `[-max_delta, max_delta)`. 

2015 

2016 For producing deterministic results given a `seed` value, use 

2017 `tf.image.stateless_random_brightness`. Unlike using the `seed` param 

2018 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 

2019 same results given the same seed independent of how many times the function is 

2020 called, and independent of global seed settings (e.g. tf.random.set_seed). 

2021 

2022 Args: 

2023 image: An image or images to adjust. 

2024 max_delta: float, must be non-negative. 

2025 seed: A Python integer. Used to create a random seed. See 

2026 `tf.compat.v1.set_random_seed` for behavior. 

2027 

2028 Usage Example: 

2029 

2030 >>> x = [[[1.0, 2.0, 3.0], 

2031 ... [4.0, 5.0, 6.0]], 

2032 ... [[7.0, 8.0, 9.0], 

2033 ... [10.0, 11.0, 12.0]]] 

2034 >>> tf.image.random_brightness(x, 0.2) 

2035 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 

2036 

2037 Returns: 

2038 The brightness-adjusted image(s). 

2039 

2040 Raises: 

2041 ValueError: if `max_delta` is negative. 

2042 """ 

2043 if max_delta < 0: 

2044 raise ValueError('max_delta must be non-negative.') 

2045 

2046 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed) 

2047 return adjust_brightness(image, delta) 

2048 

2049 

2050@tf_export('image.stateless_random_brightness', v1=[]) 

2051@dispatch.register_unary_elementwise_api 

2052@dispatch.add_dispatch_support 

2053def stateless_random_brightness(image, max_delta, seed): 

2054 """Adjust the brightness of images by a random factor deterministically. 

2055 

2056 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the 

2057 interval `[-max_delta, max_delta)`. 

2058 

2059 Guarantees the same results given the same `seed` independent of how many 

2060 times the function is called, and independent of global seed settings (e.g. 

2061 `tf.random.set_seed`). 

2062 

2063 Usage Example: 

2064 

2065 >>> x = [[[1.0, 2.0, 3.0], 

2066 ... [4.0, 5.0, 6.0]], 

2067 ... [[7.0, 8.0, 9.0], 

2068 ... [10.0, 11.0, 12.0]]] 

2069 >>> seed = (1, 2) 

2070 >>> tf.image.stateless_random_brightness(x, 0.2, seed) 

2071 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

2072 array([[[ 1.1376241, 2.1376243, 3.1376243], 

2073 [ 4.1376243, 5.1376243, 6.1376243]], 

2074 [[ 7.1376243, 8.137624 , 9.137624 ], 

2075 [10.137624 , 11.137624 , 12.137624 ]]], dtype=float32)> 

2076 

2077 Args: 

2078 image: An image or images to adjust. 

2079 max_delta: float, must be non-negative. 

2080 seed: A shape [2] Tensor, the seed to the random number generator. Must have 

2081 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 

2082 

2083 Returns: 

2084 The brightness-adjusted image(s). 

2085 

2086 Raises: 

2087 ValueError: if `max_delta` is negative. 

2088 """ 

2089 if max_delta < 0: 

2090 raise ValueError('max_delta must be non-negative.') 

2091 

2092 delta = stateless_random_ops.stateless_random_uniform( 

2093 shape=[], minval=-max_delta, maxval=max_delta, seed=seed) 

2094 return adjust_brightness(image, delta) 

2095 

2096 

2097@tf_export('image.random_contrast') 

2098@dispatch.add_dispatch_support 

2099def random_contrast(image, lower, upper, seed=None): 

2100 """Adjust the contrast of an image or images by a random factor. 

2101 

2102 Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly 

2103 picked in the interval `[lower, upper)`. 

2104 

2105 For producing deterministic results given a `seed` value, use 

2106 `tf.image.stateless_random_contrast`. Unlike using the `seed` param 

2107 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 

2108 same results given the same seed independent of how many times the function is 

2109 called, and independent of global seed settings (e.g. tf.random.set_seed). 

2110 

2111 Args: 

2112 image: An image tensor with 3 or more dimensions. 

2113 lower: float. Lower bound for the random contrast factor. 

2114 upper: float. Upper bound for the random contrast factor. 

2115 seed: A Python integer. Used to create a random seed. See 

2116 `tf.compat.v1.set_random_seed` for behavior. 

2117 

2118 Usage Example: 

2119 

2120 >>> x = [[[1.0, 2.0, 3.0], 

2121 ... [4.0, 5.0, 6.0]], 

2122 ... [[7.0, 8.0, 9.0], 

2123 ... [10.0, 11.0, 12.0]]] 

2124 >>> tf.image.random_contrast(x, 0.2, 0.5) 

2125 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 

2126 

2127 Returns: 

2128 The contrast-adjusted image(s). 

2129 

2130 Raises: 

2131 ValueError: if `upper <= lower` or if `lower < 0`. 

2132 """ 

2133 if upper <= lower: 

2134 raise ValueError('upper must be > lower.') 

2135 

2136 if lower < 0: 

2137 raise ValueError('lower must be non-negative.') 

2138 

2139 contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed) 

2140 return adjust_contrast(image, contrast_factor) 

2141 

2142 

2143@tf_export('image.stateless_random_contrast', v1=[]) 

2144@dispatch.add_dispatch_support 

2145def stateless_random_contrast(image, lower, upper, seed): 

2146 """Adjust the contrast of images by a random factor deterministically. 

2147 

2148 Guarantees the same results given the same `seed` independent of how many 

2149 times the function is called, and independent of global seed settings (e.g. 

2150 `tf.random.set_seed`). 

2151 

2152 Args: 

2153 image: An image tensor with 3 or more dimensions. 

2154 lower: float. Lower bound for the random contrast factor. 

2155 upper: float. Upper bound for the random contrast factor. 

2156 seed: A shape [2] Tensor, the seed to the random number generator. Must have 

2157 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 

2158 

2159 Usage Example: 

2160 

2161 >>> x = [[[1.0, 2.0, 3.0], 

2162 ... [4.0, 5.0, 6.0]], 

2163 ... [[7.0, 8.0, 9.0], 

2164 ... [10.0, 11.0, 12.0]]] 

2165 >>> seed = (1, 2) 

2166 >>> tf.image.stateless_random_contrast(x, 0.2, 0.5, seed) 

2167 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

2168 array([[[3.4605184, 4.4605184, 5.4605184], 

2169 [4.820173 , 5.820173 , 6.820173 ]], 

2170 [[6.179827 , 7.179827 , 8.179828 ], 

2171 [7.5394816, 8.539482 , 9.539482 ]]], dtype=float32)> 

2172 

2173 Returns: 

2174 The contrast-adjusted image(s). 

2175 

2176 Raises: 

2177 ValueError: if `upper <= lower` or if `lower < 0`. 

2178 """ 

2179 if upper <= lower: 

2180 raise ValueError('upper must be > lower.') 

2181 

2182 if lower < 0: 

2183 raise ValueError('lower must be non-negative.') 

2184 

2185 contrast_factor = stateless_random_ops.stateless_random_uniform( 

2186 shape=[], minval=lower, maxval=upper, seed=seed) 

2187 return adjust_contrast(image, contrast_factor) 

2188 

2189 

2190@tf_export('image.adjust_brightness') 

2191@dispatch.register_unary_elementwise_api 

2192@dispatch.add_dispatch_support 

2193def adjust_brightness(image, delta): 

2194 """Adjust the brightness of RGB or Grayscale images. 

2195 

2196 This is a convenience method that converts RGB images to float 

2197 representation, adjusts their brightness, and then converts them back to the 

2198 original data type. If several adjustments are chained, it is advisable to 

2199 minimize the number of redundant conversions. 

2200 

2201 The value `delta` is added to all components of the tensor `image`. `image` is 

2202 converted to `float` and scaled appropriately if it is in fixed-point 

2203 representation, and `delta` is converted to the same data type. For regular 

2204 images, `delta` should be in the range `(-1,1)`, as it is added to the image 

2205 in floating point representation, where pixel values are in the `[0,1)` range. 

2206 

2207 Usage Example: 

2208 

2209 >>> x = [[[1.0, 2.0, 3.0], 

2210 ... [4.0, 5.0, 6.0]], 

2211 ... [[7.0, 8.0, 9.0], 

2212 ... [10.0, 11.0, 12.0]]] 

2213 >>> tf.image.adjust_brightness(x, delta=0.1) 

2214 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

2215 array([[[ 1.1, 2.1, 3.1], 

2216 [ 4.1, 5.1, 6.1]], 

2217 [[ 7.1, 8.1, 9.1], 

2218 [10.1, 11.1, 12.1]]], dtype=float32)> 

2219 

2220 Args: 

2221 image: RGB image or images to adjust. 

2222 delta: A scalar. Amount to add to the pixel values. 

2223 

2224 Returns: 

2225 A brightness-adjusted tensor of the same shape and type as `image`. 

2226 """ 

2227 with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name: 

2228 image = ops.convert_to_tensor(image, name='image') 

2229 # Remember original dtype to so we can convert back if needed 

2230 orig_dtype = image.dtype 

2231 

2232 if orig_dtype in [dtypes.float16, dtypes.float32]: 

2233 flt_image = image 

2234 else: 

2235 flt_image = convert_image_dtype(image, dtypes.float32) 

2236 

2237 adjusted = math_ops.add( 

2238 flt_image, math_ops.cast(delta, flt_image.dtype), name=name) 

2239 

2240 return convert_image_dtype(adjusted, orig_dtype, saturate=True) 

2241 

2242 

2243@tf_export('image.adjust_contrast') 

2244@dispatch.add_dispatch_support 

2245def adjust_contrast(images, contrast_factor): 

2246 """Adjust contrast of RGB or grayscale images. 

2247 

2248 This is a convenience method that converts RGB images to float 

2249 representation, adjusts their contrast, and then converts them back to the 

2250 original data type. If several adjustments are chained, it is advisable to 

2251 minimize the number of redundant conversions. 

2252 

2253 `images` is a tensor of at least 3 dimensions. The last 3 dimensions are 

2254 interpreted as `[height, width, channels]`. The other dimensions only 

2255 represent a collection of images, such as `[batch, height, width, channels].` 

2256 

2257 Contrast is adjusted independently for each channel of each image. 

2258 

2259 For each channel, this Op computes the mean of the image pixels in the 

2260 channel and then adjusts each component `x` of each pixel to 

2261 `(x - mean) * contrast_factor + mean`. 

2262 

2263 `contrast_factor` must be in the interval `(-inf, inf)`. 

2264 

2265 Usage Example: 

2266 

2267 >>> x = [[[1.0, 2.0, 3.0], 

2268 ... [4.0, 5.0, 6.0]], 

2269 ... [[7.0, 8.0, 9.0], 

2270 ... [10.0, 11.0, 12.0]]] 

2271 >>> tf.image.adjust_contrast(x, 2.) 

2272 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

2273 array([[[-3.5, -2.5, -1.5], 

2274 [ 2.5, 3.5, 4.5]], 

2275 [[ 8.5, 9.5, 10.5], 

2276 [14.5, 15.5, 16.5]]], dtype=float32)> 

2277 

2278 Args: 

2279 images: Images to adjust. At least 3-D. 

2280 contrast_factor: A float multiplier for adjusting contrast. 

2281 

2282 Returns: 

2283 The contrast-adjusted image or images. 

2284 """ 

2285 with ops.name_scope(None, 'adjust_contrast', 

2286 [images, contrast_factor]) as name: 

2287 images = ops.convert_to_tensor(images, name='images') 

2288 # Remember original dtype to so we can convert back if needed 

2289 orig_dtype = images.dtype 

2290 

2291 if orig_dtype in (dtypes.float16, dtypes.float32): 

2292 flt_images = images 

2293 else: 

2294 flt_images = convert_image_dtype(images, dtypes.float32) 

2295 

2296 adjusted = gen_image_ops.adjust_contrastv2( 

2297 flt_images, contrast_factor=contrast_factor, name=name) 

2298 

2299 return convert_image_dtype(adjusted, orig_dtype, saturate=True) 

2300 

2301 

2302@tf_export('image.adjust_gamma') 

2303@dispatch.register_unary_elementwise_api 

2304@dispatch.add_dispatch_support 

2305def adjust_gamma(image, gamma=1, gain=1): 

2306 """Performs [Gamma Correction](http://en.wikipedia.org/wiki/Gamma_correction). 

2307 

2308 on the input image. 

2309 

2310 Also known as Power Law Transform. This function converts the 

2311 input images at first to float representation, then transforms them 

2312 pixelwise according to the equation `Out = gain * In**gamma`, 

2313 and then converts the back to the original data type. 

2314 

2315 Usage Example: 

2316 

2317 >>> x = [[[1.0, 2.0, 3.0], 

2318 ... [4.0, 5.0, 6.0]], 

2319 ... [[7.0, 8.0, 9.0], 

2320 ... [10.0, 11.0, 12.0]]] 

2321 >>> tf.image.adjust_gamma(x, 0.2) 

2322 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

2323 array([[[1. , 1.1486983, 1.2457309], 

2324 [1.319508 , 1.3797297, 1.4309691]], 

2325 [[1.4757731, 1.5157166, 1.5518456], 

2326 [1.5848932, 1.6153942, 1.6437519]]], dtype=float32)> 

2327 

2328 Args: 

2329 image : RGB image or images to adjust. 

2330 gamma : A scalar or tensor. Non-negative real number. 

2331 gain : A scalar or tensor. The constant multiplier. 

2332 

2333 Returns: 

2334 A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`. 

2335 

2336 Raises: 

2337 ValueError: If gamma is negative. 

2338 Notes: 

2339 For gamma greater than 1, the histogram will shift towards left and 

2340 the output image will be darker than the input image. 

2341 For gamma less than 1, the histogram will shift towards right and 

2342 the output image will be brighter than the input image. 

2343 References: 

2344 [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction) 

2345 """ 

2346 

2347 with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name: 

2348 image = ops.convert_to_tensor(image, name='image') 

2349 # Remember original dtype to so we can convert back if needed 

2350 orig_dtype = image.dtype 

2351 

2352 if orig_dtype in [dtypes.float16, dtypes.float32]: 

2353 flt_image = image 

2354 else: 

2355 flt_image = convert_image_dtype(image, dtypes.float32) 

2356 

2357 assert_op = _assert(gamma >= 0, ValueError, 

2358 'Gamma should be a non-negative real number.') 

2359 if assert_op: 

2360 gamma = control_flow_ops.with_dependencies(assert_op, gamma) 

2361 

2362 # According to the definition of gamma correction. 

2363 adjusted_img = gain * flt_image**gamma 

2364 

2365 return convert_image_dtype(adjusted_img, orig_dtype, saturate=True) 

2366 

2367 

2368@tf_export('image.convert_image_dtype') 

2369@dispatch.register_unary_elementwise_api 

2370@dispatch.add_dispatch_support 

2371def convert_image_dtype(image, dtype, saturate=False, name=None): 

2372 """Convert `image` to `dtype`, scaling its values if needed. 

2373 

2374 The operation supports data types (for `image` and `dtype`) of 

2375 `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`, 

2376 `float16`, `float32`, `float64`, `bfloat16`. 

2377 

2378 Images that are represented using floating point values are expected to have 

2379 values in the range [0,1). Image data stored in integer data types are 

2380 expected to have values in the range `[0,MAX]`, where `MAX` is the largest 

2381 positive representable number for the data type. 

2382 

2383 This op converts between data types, scaling the values appropriately before 

2384 casting. 

2385 

2386 Usage Example: 

2387 

2388 >>> x = [[[1, 2, 3], [4, 5, 6]], 

2389 ... [[7, 8, 9], [10, 11, 12]]] 

2390 >>> x_int8 = tf.convert_to_tensor(x, dtype=tf.int8) 

2391 >>> tf.image.convert_image_dtype(x_int8, dtype=tf.float16, saturate=False) 

2392 <tf.Tensor: shape=(2, 2, 3), dtype=float16, numpy= 

2393 array([[[0.00787, 0.01575, 0.02362], 

2394 [0.0315 , 0.03937, 0.04724]], 

2395 [[0.0551 , 0.063 , 0.07086], 

2396 [0.07874, 0.0866 , 0.0945 ]]], dtype=float16)> 

2397 

2398 Converting integer types to floating point types returns normalized floating 

2399 point values in the range [0, 1); the values are normalized by the `MAX` value 

2400 of the input dtype. Consider the following two examples: 

2401 

2402 >>> a = [[[1], [2]], [[3], [4]]] 

2403 >>> a_int8 = tf.convert_to_tensor(a, dtype=tf.int8) 

2404 >>> tf.image.convert_image_dtype(a_int8, dtype=tf.float32) 

2405 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 

2406 array([[[0.00787402], 

2407 [0.01574803]], 

2408 [[0.02362205], 

2409 [0.03149606]]], dtype=float32)> 

2410 

2411 >>> a_int32 = tf.convert_to_tensor(a, dtype=tf.int32) 

2412 >>> tf.image.convert_image_dtype(a_int32, dtype=tf.float32) 

2413 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 

2414 array([[[4.6566129e-10], 

2415 [9.3132257e-10]], 

2416 [[1.3969839e-09], 

2417 [1.8626451e-09]]], dtype=float32)> 

2418 

2419 Despite having identical values of `a` and output dtype of `float32`, the 

2420 outputs differ due to the different input dtypes (`int8` vs. `int32`). This 

2421 is, again, because the values are normalized by the `MAX` value of the input 

2422 dtype. 

2423 

2424 Note that converting floating point values to integer type may lose precision. 

2425 In the example below, an image tensor `b` of dtype `float32` is converted to 

2426 `int8` and back to `float32`. The final output, however, is different from 

2427 the original input `b` due to precision loss. 

2428 

2429 >>> b = [[[0.12], [0.34]], [[0.56], [0.78]]] 

2430 >>> b_float32 = tf.convert_to_tensor(b, dtype=tf.float32) 

2431 >>> b_int8 = tf.image.convert_image_dtype(b_float32, dtype=tf.int8) 

2432 >>> tf.image.convert_image_dtype(b_int8, dtype=tf.float32) 

2433 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 

2434 array([[[0.11811024], 

2435 [0.33858266]], 

2436 [[0.5590551 ], 

2437 [0.77952754]]], dtype=float32)> 

2438 

2439 Scaling up from an integer type (input dtype) to another integer type (output 

2440 dtype) will not map input dtype's `MAX` to output dtype's `MAX` but converting 

2441 back and forth should result in no change. For example, as shown below, the 

2442 `MAX` value of int8 (=127) is not mapped to the `MAX` value of int16 (=32,767) 

2443 but, when scaled back, we get the same, original values of `c`. 

2444 

2445 >>> c = [[[1], [2]], [[127], [127]]] 

2446 >>> c_int8 = tf.convert_to_tensor(c, dtype=tf.int8) 

2447 >>> c_int16 = tf.image.convert_image_dtype(c_int8, dtype=tf.int16) 

2448 >>> print(c_int16) 

2449 tf.Tensor( 

2450 [[[ 256] 

2451 [ 512]] 

2452 [[32512] 

2453 [32512]]], shape=(2, 2, 1), dtype=int16) 

2454 >>> c_int8_back = tf.image.convert_image_dtype(c_int16, dtype=tf.int8) 

2455 >>> print(c_int8_back) 

2456 tf.Tensor( 

2457 [[[ 1] 

2458 [ 2]] 

2459 [[127] 

2460 [127]]], shape=(2, 2, 1), dtype=int8) 

2461 

2462 Scaling down from an integer type to another integer type can be a lossy 

2463 conversion. Notice in the example below that converting `int16` to `uint8` and 

2464 back to `int16` has lost precision. 

2465 

2466 >>> d = [[[1000], [2000]], [[3000], [4000]]] 

2467 >>> d_int16 = tf.convert_to_tensor(d, dtype=tf.int16) 

2468 >>> d_uint8 = tf.image.convert_image_dtype(d_int16, dtype=tf.uint8) 

2469 >>> d_int16_back = tf.image.convert_image_dtype(d_uint8, dtype=tf.int16) 

2470 >>> print(d_int16_back) 

2471 tf.Tensor( 

2472 [[[ 896] 

2473 [1920]] 

2474 [[2944] 

2475 [3968]]], shape=(2, 2, 1), dtype=int16) 

2476 

2477 Note that converting from floating point inputs to integer types may lead to 

2478 over/underflow problems. Set saturate to `True` to avoid such problem in 

2479 problematic conversions. If enabled, saturation will clip the output into the 

2480 allowed range before performing a potentially dangerous cast (and only before 

2481 performing such a cast, i.e., when casting from a floating point to an integer 

2482 type, and when casting from a signed to an unsigned type; `saturate` has no 

2483 effect on casts between floats, or on casts that increase the type's range). 

2484 

2485 Args: 

2486 image: An image. 

2487 dtype: A `DType` to convert `image` to. 

2488 saturate: If `True`, clip the input before casting (if necessary). 

2489 name: A name for this operation (optional). 

2490 

2491 Returns: 

2492 `image`, converted to `dtype`. 

2493 

2494 Raises: 

2495 AttributeError: Raises an attribute error when dtype is neither 

2496 float nor integer. 

2497 """ 

2498 image = ops.convert_to_tensor(image, name='image') 

2499 dtype = dtypes.as_dtype(dtype) 

2500 if not dtype.is_floating and not dtype.is_integer: 

2501 raise AttributeError('dtype must be either floating point or integer') 

2502 if not image.dtype.is_floating and not image.dtype.is_integer: 

2503 raise AttributeError('image dtype must be either floating point or integer') 

2504 if dtype == image.dtype: 

2505 return array_ops.identity(image, name=name) 

2506 

2507 with ops.name_scope(name, 'convert_image', [image]) as name: 

2508 # Both integer: use integer multiplication in the larger range 

2509 if image.dtype.is_integer and dtype.is_integer: 

2510 scale_in = image.dtype.max 

2511 scale_out = dtype.max 

2512 if scale_in > scale_out: 

2513 # Scaling down, scale first, then cast. The scaling factor will 

2514 # cause in.max to be mapped to above out.max but below out.max+1, 

2515 # so that the output is safely in the supported range. 

2516 scale = (scale_in + 1) // (scale_out + 1) 

2517 scaled = math_ops.floordiv(image, scale) 

2518 

2519 if saturate: 

2520 return math_ops.saturate_cast(scaled, dtype, name=name) 

2521 else: 

2522 return math_ops.cast(scaled, dtype, name=name) 

2523 else: 

2524 # Scaling up, cast first, then scale. The scale will not map in.max to 

2525 # out.max, but converting back and forth should result in no change. 

2526 if saturate: 

2527 cast = math_ops.saturate_cast(image, dtype) 

2528 else: 

2529 cast = math_ops.cast(image, dtype) 

2530 scale = (scale_out + 1) // (scale_in + 1) 

2531 return math_ops.multiply(cast, scale, name=name) 

2532 elif image.dtype.is_floating and dtype.is_floating: 

2533 # Both float: Just cast, no possible overflows in the allowed ranges. 

2534 # Note: We're ignoring float overflows. If your image dynamic range 

2535 # exceeds float range, you're on your own. 

2536 return math_ops.cast(image, dtype, name=name) 

2537 else: 

2538 if image.dtype.is_integer: 

2539 # Converting to float: first cast, then scale. No saturation possible. 

2540 cast = math_ops.cast(image, dtype) 

2541 scale = 1. / image.dtype.max 

2542 return math_ops.multiply(cast, scale, name=name) 

2543 else: 

2544 # Converting from float: first scale, then cast 

2545 scale = dtype.max + 0.5 # avoid rounding problems in the cast 

2546 scaled = math_ops.multiply(image, scale) 

2547 if saturate: 

2548 return math_ops.saturate_cast(scaled, dtype, name=name) 

2549 else: 

2550 return math_ops.cast(scaled, dtype, name=name) 

2551 

2552 

2553@tf_export('image.rgb_to_grayscale') 

2554@dispatch.add_dispatch_support 

2555def rgb_to_grayscale(images, name=None): 

2556 """Converts one or more images from RGB to Grayscale. 

2557 

2558 Outputs a tensor of the same `DType` and rank as `images`. The size of the 

2559 last dimension of the output is 1, containing the Grayscale value of the 

2560 pixels. 

2561 

2562 >>> original = tf.constant([[[1.0, 2.0, 3.0]]]) 

2563 >>> converted = tf.image.rgb_to_grayscale(original) 

2564 >>> print(converted.numpy()) 

2565 [[[1.81...]]] 

2566 

2567 Args: 

2568 images: The RGB tensor to convert. The last dimension must have size 3 and 

2569 should contain RGB values. 

2570 name: A name for the operation (optional). 

2571 

2572 Returns: 

2573 The converted grayscale image(s). 

2574 """ 

2575 with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name: 

2576 images = ops.convert_to_tensor(images, name='images') 

2577 # Remember original dtype to so we can convert back if needed 

2578 orig_dtype = images.dtype 

2579 flt_image = convert_image_dtype(images, dtypes.float32) 

2580 

2581 # Reference for converting between RGB and grayscale. 

2582 # https://en.wikipedia.org/wiki/Luma_%28video%29 

2583 rgb_weights = [0.2989, 0.5870, 0.1140] 

2584 gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1]) 

2585 gray_float = array_ops.expand_dims(gray_float, -1) 

2586 return convert_image_dtype(gray_float, orig_dtype, name=name) 

2587 

2588 

2589@tf_export('image.grayscale_to_rgb') 

2590@dispatch.add_dispatch_support 

2591def grayscale_to_rgb(images, name=None): 

2592 """Converts one or more images from Grayscale to RGB. 

2593 

2594 Outputs a tensor of the same `DType` and rank as `images`. The size of the 

2595 last dimension of the output is 3, containing the RGB value of the pixels. 

2596 The input images' last dimension must be size 1. 

2597 

2598 >>> original = tf.constant([[[1.0], [2.0], [3.0]]]) 

2599 >>> converted = tf.image.grayscale_to_rgb(original) 

2600 >>> print(converted.numpy()) 

2601 [[[1. 1. 1.] 

2602 [2. 2. 2.] 

2603 [3. 3. 3.]]] 

2604 

2605 Args: 

2606 images: The Grayscale tensor to convert. The last dimension must be size 1. 

2607 name: A name for the operation (optional). 

2608 

2609 Returns: 

2610 The converted grayscale image(s). 

2611 """ 

2612 with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name: 

2613 images = _AssertGrayscaleImage(images) 

2614 

2615 images = ops.convert_to_tensor(images, name='images') 

2616 rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) 

2617 shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] + 

2618 [array_ops.expand_dims(3, 0)]) 

2619 multiples = array_ops.concat(shape_list, 0) 

2620 rgb = array_ops.tile(images, multiples, name=name) 

2621 rgb.set_shape(images.get_shape()[:-1].concatenate([3])) 

2622 return rgb 

2623 

2624 

2625# pylint: disable=invalid-name 

2626@tf_export('image.random_hue') 

2627@dispatch.add_dispatch_support 

2628def random_hue(image, max_delta, seed=None): 

2629 """Adjust the hue of RGB images by a random factor. 

2630 

2631 Equivalent to `adjust_hue()` but uses a `delta` randomly 

2632 picked in the interval `[-max_delta, max_delta)`. 

2633 

2634 `max_delta` must be in the interval `[0, 0.5]`. 

2635 

2636 Usage Example: 

2637 

2638 >>> x = [[[1.0, 2.0, 3.0], 

2639 ... [4.0, 5.0, 6.0]], 

2640 ... [[7.0, 8.0, 9.0], 

2641 ... [10.0, 11.0, 12.0]]] 

2642 >>> tf.image.random_hue(x, 0.2) 

2643 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 

2644 

2645 For producing deterministic results given a `seed` value, use 

2646 `tf.image.stateless_random_hue`. Unlike using the `seed` param with 

2647 `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the same 

2648 results given the same seed independent of how many times the function is 

2649 called, and independent of global seed settings (e.g. tf.random.set_seed). 

2650 

2651 Args: 

2652 image: RGB image or images. The size of the last dimension must be 3. 

2653 max_delta: float. The maximum value for the random delta. 

2654 seed: An operation-specific seed. It will be used in conjunction with the 

2655 graph-level seed to determine the real seeds that will be used in this 

2656 operation. Please see the documentation of set_random_seed for its 

2657 interaction with the graph-level random seed. 

2658 

2659 Returns: 

2660 Adjusted image(s), same shape and DType as `image`. 

2661 

2662 Raises: 

2663 ValueError: if `max_delta` is invalid. 

2664 """ 

2665 if max_delta > 0.5: 

2666 raise ValueError('max_delta must be <= 0.5.') 

2667 

2668 if max_delta < 0: 

2669 raise ValueError('max_delta must be non-negative.') 

2670 

2671 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed) 

2672 return adjust_hue(image, delta) 

2673 

2674 

2675@tf_export('image.stateless_random_hue', v1=[]) 

2676@dispatch.add_dispatch_support 

2677def stateless_random_hue(image, max_delta, seed): 

2678 """Adjust the hue of RGB images by a random factor deterministically. 

2679 

2680 Equivalent to `adjust_hue()` but uses a `delta` randomly picked in the 

2681 interval `[-max_delta, max_delta)`. 

2682 

2683 Guarantees the same results given the same `seed` independent of how many 

2684 times the function is called, and independent of global seed settings (e.g. 

2685 `tf.random.set_seed`). 

2686 

2687 `max_delta` must be in the interval `[0, 0.5]`. 

2688 

2689 Usage Example: 

2690 

2691 >>> x = [[[1.0, 2.0, 3.0], 

2692 ... [4.0, 5.0, 6.0]], 

2693 ... [[7.0, 8.0, 9.0], 

2694 ... [10.0, 11.0, 12.0]]] 

2695 >>> seed = (1, 2) 

2696 >>> tf.image.stateless_random_hue(x, 0.2, seed) 

2697 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

2698 array([[[ 1.6514902, 1. , 3. ], 

2699 [ 4.65149 , 4. , 6. ]], 

2700 [[ 7.65149 , 7. , 9. ], 

2701 [10.65149 , 10. , 12. ]]], dtype=float32)> 

2702 

2703 Args: 

2704 image: RGB image or images. The size of the last dimension must be 3. 

2705 max_delta: float. The maximum value for the random delta. 

2706 seed: A shape [2] Tensor, the seed to the random number generator. Must have 

2707 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 

2708 

2709 Returns: 

2710 Adjusted image(s), same shape and DType as `image`. 

2711 

2712 Raises: 

2713 ValueError: if `max_delta` is invalid. 

2714 """ 

2715 if max_delta > 0.5: 

2716 raise ValueError('max_delta must be <= 0.5.') 

2717 

2718 if max_delta < 0: 

2719 raise ValueError('max_delta must be non-negative.') 

2720 

2721 delta = stateless_random_ops.stateless_random_uniform( 

2722 shape=[], minval=-max_delta, maxval=max_delta, seed=seed) 

2723 return adjust_hue(image, delta) 

2724 

2725 

2726@tf_export('image.adjust_hue') 

2727@dispatch.add_dispatch_support 

2728def adjust_hue(image, delta, name=None): 

2729 """Adjust hue of RGB images. 

2730 

2731 This is a convenience method that converts an RGB image to float 

2732 representation, converts it to HSV, adds an offset to the 

2733 hue channel, converts back to RGB and then back to the original 

2734 data type. If several adjustments are chained it is advisable to minimize 

2735 the number of redundant conversions. 

2736 

2737 `image` is an RGB image. The image hue is adjusted by converting the 

2738 image(s) to HSV and rotating the hue channel (H) by 

2739 `delta`. The image is then converted back to RGB. 

2740 

2741 `delta` must be in the interval `[-1, 1]`. 

2742 

2743 Usage Example: 

2744 

2745 >>> x = [[[1.0, 2.0, 3.0], 

2746 ... [4.0, 5.0, 6.0]], 

2747 ... [[7.0, 8.0, 9.0], 

2748 ... [10.0, 11.0, 12.0]]] 

2749 >>> tf.image.adjust_hue(x, 0.2) 

2750 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

2751 array([[[ 2.3999996, 1. , 3. ], 

2752 [ 5.3999996, 4. , 6. ]], 

2753 [[ 8.4 , 7. , 9. ], 

2754 [11.4 , 10. , 12. ]]], dtype=float32)> 

2755 

2756 Args: 

2757 image: RGB image or images. The size of the last dimension must be 3. 

2758 delta: float. How much to add to the hue channel. 

2759 name: A name for this operation (optional). 

2760 

2761 Returns: 

2762 Adjusted image(s), same shape and DType as `image`. 

2763 

2764 Raises: 

2765 InvalidArgumentError: image must have at least 3 dimensions. 

2766 InvalidArgumentError: The size of the last dimension must be 3. 

2767 ValueError: if `delta` is not in the interval of `[-1, 1]`. 

2768 

2769 Usage Example: 

2770 

2771 >>> image = [[[1, 2, 3], [4, 5, 6]], 

2772 ... [[7, 8, 9], [10, 11, 12]], 

2773 ... [[13, 14, 15], [16, 17, 18]]] 

2774 >>> image = tf.constant(image) 

2775 >>> tf.image.adjust_hue(image, 0.2) 

2776 <tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy= 

2777 array([[[ 2, 1, 3], 

2778 [ 5, 4, 6]], 

2779 [[ 8, 7, 9], 

2780 [11, 10, 12]], 

2781 [[14, 13, 15], 

2782 [17, 16, 18]]], dtype=int32)> 

2783 """ 

2784 with ops.name_scope(name, 'adjust_hue', [image]) as name: 

2785 if context.executing_eagerly(): 

2786 if delta < -1 or delta > 1: 

2787 raise ValueError('delta must be in the interval [-1, 1]') 

2788 image = ops.convert_to_tensor(image, name='image') 

2789 # Remember original dtype to so we can convert back if needed 

2790 orig_dtype = image.dtype 

2791 if orig_dtype in (dtypes.float16, dtypes.float32): 

2792 flt_image = image 

2793 else: 

2794 flt_image = convert_image_dtype(image, dtypes.float32) 

2795 

2796 rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) 

2797 

2798 return convert_image_dtype(rgb_altered, orig_dtype) 

2799 

2800 

2801# pylint: disable=invalid-name 

2802@tf_export('image.random_jpeg_quality') 

2803@dispatch.add_dispatch_support 

2804def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None): 

2805 """Randomly changes jpeg encoding quality for inducing jpeg noise. 

2806 

2807 `min_jpeg_quality` must be in the interval `[0, 100]` and less than 

2808 `max_jpeg_quality`. 

2809 `max_jpeg_quality` must be in the interval `[0, 100]`. 

2810 

2811 Usage Example: 

2812 

2813 >>> x = tf.constant([[[1, 2, 3], 

2814 ... [4, 5, 6]], 

2815 ... [[7, 8, 9], 

2816 ... [10, 11, 12]]], dtype=tf.uint8) 

2817 >>> tf.image.random_jpeg_quality(x, 75, 95) 

2818 <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy=...> 

2819 

2820 For producing deterministic results given a `seed` value, use 

2821 `tf.image.stateless_random_jpeg_quality`. Unlike using the `seed` param 

2822 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 

2823 same results given the same seed independent of how many times the function is 

2824 called, and independent of global seed settings (e.g. tf.random.set_seed). 

2825 

2826 Args: 

2827 image: 3D image. Size of the last dimension must be 1 or 3. 

2828 min_jpeg_quality: Minimum jpeg encoding quality to use. 

2829 max_jpeg_quality: Maximum jpeg encoding quality to use. 

2830 seed: An operation-specific seed. It will be used in conjunction with the 

2831 graph-level seed to determine the real seeds that will be used in this 

2832 operation. Please see the documentation of set_random_seed for its 

2833 interaction with the graph-level random seed. 

2834 

2835 Returns: 

2836 Adjusted image(s), same shape and DType as `image`. 

2837 

2838 Raises: 

2839 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid. 

2840 """ 

2841 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or 

2842 max_jpeg_quality > 100): 

2843 raise ValueError('jpeg encoding range must be between 0 and 100.') 

2844 

2845 if min_jpeg_quality >= max_jpeg_quality: 

2846 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.') 

2847 

2848 jpeg_quality = random_ops.random_uniform([], 

2849 min_jpeg_quality, 

2850 max_jpeg_quality, 

2851 seed=seed, 

2852 dtype=dtypes.int32) 

2853 return adjust_jpeg_quality(image, jpeg_quality) 

2854 

2855 

2856@tf_export('image.stateless_random_jpeg_quality', v1=[]) 

2857@dispatch.add_dispatch_support 

2858def stateless_random_jpeg_quality(image, 

2859 min_jpeg_quality, 

2860 max_jpeg_quality, 

2861 seed): 

2862 """Deterministically radomize jpeg encoding quality for inducing jpeg noise. 

2863 

2864 Guarantees the same results given the same `seed` independent of how many 

2865 times the function is called, and independent of global seed settings (e.g. 

2866 `tf.random.set_seed`). 

2867 

2868 `min_jpeg_quality` must be in the interval `[0, 100]` and less than 

2869 `max_jpeg_quality`. 

2870 `max_jpeg_quality` must be in the interval `[0, 100]`. 

2871 

2872 Usage Example: 

2873 

2874 >>> x = tf.constant([[[1, 2, 3], 

2875 ... [4, 5, 6]], 

2876 ... [[7, 8, 9], 

2877 ... [10, 11, 12]]], dtype=tf.uint8) 

2878 >>> seed = (1, 2) 

2879 >>> tf.image.stateless_random_jpeg_quality(x, 75, 95, seed) 

2880 <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy= 

2881 array([[[ 0, 4, 5], 

2882 [ 1, 5, 6]], 

2883 [[ 5, 9, 10], 

2884 [ 5, 9, 10]]], dtype=uint8)> 

2885 

2886 Args: 

2887 image: 3D image. Size of the last dimension must be 1 or 3. 

2888 min_jpeg_quality: Minimum jpeg encoding quality to use. 

2889 max_jpeg_quality: Maximum jpeg encoding quality to use. 

2890 seed: A shape [2] Tensor, the seed to the random number generator. Must have 

2891 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 

2892 

2893 Returns: 

2894 Adjusted image(s), same shape and DType as `image`. 

2895 

2896 Raises: 

2897 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid. 

2898 """ 

2899 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or 

2900 max_jpeg_quality > 100): 

2901 raise ValueError('jpeg encoding range must be between 0 and 100.') 

2902 

2903 if min_jpeg_quality >= max_jpeg_quality: 

2904 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.') 

2905 

2906 jpeg_quality = stateless_random_ops.stateless_random_uniform( 

2907 shape=[], minval=min_jpeg_quality, maxval=max_jpeg_quality, seed=seed, 

2908 dtype=dtypes.int32) 

2909 return adjust_jpeg_quality(image, jpeg_quality) 

2910 

2911 

2912@tf_export('image.adjust_jpeg_quality') 

2913@dispatch.add_dispatch_support 

2914def adjust_jpeg_quality(image, jpeg_quality, name=None): 

2915 """Adjust jpeg encoding quality of an image. 

2916 

2917 This is a convenience method that converts an image to uint8 representation, 

2918 encodes it to jpeg with `jpeg_quality`, decodes it, and then converts back 

2919 to the original data type. 

2920 

2921 `jpeg_quality` must be in the interval `[0, 100]`. 

2922 

2923 Usage Examples: 

2924 

2925 >>> x = [[[0.01, 0.02, 0.03], 

2926 ... [0.04, 0.05, 0.06]], 

2927 ... [[0.07, 0.08, 0.09], 

2928 ... [0.10, 0.11, 0.12]]] 

2929 >>> x_jpeg = tf.image.adjust_jpeg_quality(x, 75) 

2930 >>> x_jpeg.numpy() 

2931 array([[[0.00392157, 0.01960784, 0.03137255], 

2932 [0.02745098, 0.04313726, 0.05490196]], 

2933 [[0.05882353, 0.07450981, 0.08627451], 

2934 [0.08235294, 0.09803922, 0.10980393]]], dtype=float32) 

2935 

2936 Note that floating point values are expected to have values in the range 

2937 [0,1) and values outside this range are clipped. 

2938 

2939 >>> x = [[[1.0, 2.0, 3.0], 

2940 ... [4.0, 5.0, 6.0]], 

2941 ... [[7.0, 8.0, 9.0], 

2942 ... [10.0, 11.0, 12.0]]] 

2943 >>> tf.image.adjust_jpeg_quality(x, 75) 

2944 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

2945 array([[[1., 1., 1.], 

2946 [1., 1., 1.]], 

2947 [[1., 1., 1.], 

2948 [1., 1., 1.]]], dtype=float32)> 

2949 

2950 Note that `jpeg_quality` 100 is still lossy compresson. 

2951 

2952 >>> x = tf.constant([[[1, 2, 3], 

2953 ... [4, 5, 6]], 

2954 ... [[7, 8, 9], 

2955 ... [10, 11, 12]]], dtype=tf.uint8) 

2956 >>> tf.image.adjust_jpeg_quality(x, 100) 

2957 <tf.Tensor: shape(2, 2, 3), dtype=uint8, numpy= 

2958 array([[[ 0, 1, 3], 

2959 [ 3, 4, 6]], 

2960 [[ 6, 7, 9], 

2961 [ 9, 10, 12]]], dtype=uint8)> 

2962 

2963 Args: 

2964 image: 3D image. The size of the last dimension must be None, 1 or 3. 

2965 jpeg_quality: Python int or Tensor of type int32. jpeg encoding quality. 

2966 name: A name for this operation (optional). 

2967 

2968 Returns: 

2969 Adjusted image, same shape and DType as `image`. 

2970 

2971 Raises: 

2972 InvalidArgumentError: quality must be in [0,100] 

2973 InvalidArgumentError: image must have 1 or 3 channels 

2974 """ 

2975 with ops.name_scope(name, 'adjust_jpeg_quality', [image]): 

2976 image = ops.convert_to_tensor(image, name='image') 

2977 channels = image.shape.as_list()[-1] 

2978 # Remember original dtype to so we can convert back if needed 

2979 orig_dtype = image.dtype 

2980 image = convert_image_dtype(image, dtypes.uint8, saturate=True) 

2981 if not _is_tensor(jpeg_quality): 

2982 # If jpeg_quality is a int (not tensor). 

2983 jpeg_quality = ops.convert_to_tensor(jpeg_quality, dtype=dtypes.int32) 

2984 image = gen_image_ops.encode_jpeg_variable_quality(image, jpeg_quality) 

2985 

2986 image = gen_image_ops.decode_jpeg(image, channels=channels) 

2987 return convert_image_dtype(image, orig_dtype, saturate=True) 

2988 

2989 

2990@tf_export('image.random_saturation') 

2991@dispatch.add_dispatch_support 

2992def random_saturation(image, lower, upper, seed=None): 

2993 """Adjust the saturation of RGB images by a random factor. 

2994 

2995 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly 

2996 picked in the interval `[lower, upper)`. 

2997 

2998 Usage Example: 

2999 

3000 >>> x = [[[1.0, 2.0, 3.0], 

3001 ... [4.0, 5.0, 6.0]], 

3002 ... [[7.0, 8.0, 9.0], 

3003 ... [10.0, 11.0, 12.0]]] 

3004 >>> tf.image.random_saturation(x, 5, 10) 

3005 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

3006 array([[[ 0. , 1.5, 3. ], 

3007 [ 0. , 3. , 6. ]], 

3008 [[ 0. , 4.5, 9. ], 

3009 [ 0. , 6. , 12. ]]], dtype=float32)> 

3010 

3011 For producing deterministic results given a `seed` value, use 

3012 `tf.image.stateless_random_saturation`. Unlike using the `seed` param 

3013 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 

3014 same results given the same seed independent of how many times the function is 

3015 called, and independent of global seed settings (e.g. tf.random.set_seed). 

3016 

3017 Args: 

3018 image: RGB image or images. The size of the last dimension must be 3. 

3019 lower: float. Lower bound for the random saturation factor. 

3020 upper: float. Upper bound for the random saturation factor. 

3021 seed: An operation-specific seed. It will be used in conjunction with the 

3022 graph-level seed to determine the real seeds that will be used in this 

3023 operation. Please see the documentation of set_random_seed for its 

3024 interaction with the graph-level random seed. 

3025 

3026 Returns: 

3027 Adjusted image(s), same shape and DType as `image`. 

3028 

3029 Raises: 

3030 ValueError: if `upper <= lower` or if `lower < 0`. 

3031 """ 

3032 if upper <= lower: 

3033 raise ValueError('upper must be > lower.') 

3034 

3035 if lower < 0: 

3036 raise ValueError('lower must be non-negative.') 

3037 

3038 saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed) 

3039 return adjust_saturation(image, saturation_factor) 

3040 

3041 

3042@tf_export('image.stateless_random_saturation', v1=[]) 

3043@dispatch.add_dispatch_support 

3044def stateless_random_saturation(image, lower, upper, seed=None): 

3045 """Adjust the saturation of RGB images by a random factor deterministically. 

3046 

3047 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly 

3048 picked in the interval `[lower, upper)`. 

3049 

3050 Guarantees the same results given the same `seed` independent of how many 

3051 times the function is called, and independent of global seed settings (e.g. 

3052 `tf.random.set_seed`). 

3053 

3054 Usage Example: 

3055 

3056 >>> x = [[[1.0, 2.0, 3.0], 

3057 ... [4.0, 5.0, 6.0]], 

3058 ... [[7.0, 8.0, 9.0], 

3059 ... [10.0, 11.0, 12.0]]] 

3060 >>> seed = (1, 2) 

3061 >>> tf.image.stateless_random_saturation(x, 0.5, 1.0, seed) 

3062 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

3063 array([[[ 1.1559395, 2.0779698, 3. ], 

3064 [ 4.1559396, 5.07797 , 6. ]], 

3065 [[ 7.1559396, 8.07797 , 9. ], 

3066 [10.155939 , 11.07797 , 12. ]]], dtype=float32)> 

3067 

3068 Args: 

3069 image: RGB image or images. The size of the last dimension must be 3. 

3070 lower: float. Lower bound for the random saturation factor. 

3071 upper: float. Upper bound for the random saturation factor. 

3072 seed: A shape [2] Tensor, the seed to the random number generator. Must have 

3073 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 

3074 

3075 Returns: 

3076 Adjusted image(s), same shape and DType as `image`. 

3077 

3078 Raises: 

3079 ValueError: if `upper <= lower` or if `lower < 0`. 

3080 """ 

3081 if upper <= lower: 

3082 raise ValueError('upper must be > lower.') 

3083 

3084 if lower < 0: 

3085 raise ValueError('lower must be non-negative.') 

3086 

3087 saturation_factor = stateless_random_ops.stateless_random_uniform( 

3088 shape=[], minval=lower, maxval=upper, seed=seed) 

3089 return adjust_saturation(image, saturation_factor) 

3090 

3091 

3092@tf_export('image.adjust_saturation') 

3093@dispatch.add_dispatch_support 

3094def adjust_saturation(image, saturation_factor, name=None): 

3095 """Adjust saturation of RGB images. 

3096 

3097 This is a convenience method that converts RGB images to float 

3098 representation, converts them to HSV, adds an offset to the 

3099 saturation channel, converts back to RGB and then back to the original 

3100 data type. If several adjustments are chained it is advisable to minimize 

3101 the number of redundant conversions. 

3102 

3103 `image` is an RGB image or images. The image saturation is adjusted by 

3104 converting the images to HSV and multiplying the saturation (S) channel by 

3105 `saturation_factor` and clipping. The images are then converted back to RGB. 

3106 

3107 `saturation_factor` must be in the interval `[0, inf)`. 

3108 

3109 Usage Example: 

3110 

3111 >>> x = [[[1.0, 2.0, 3.0], 

3112 ... [4.0, 5.0, 6.0]], 

3113 ... [[7.0, 8.0, 9.0], 

3114 ... [10.0, 11.0, 12.0]]] 

3115 >>> tf.image.adjust_saturation(x, 0.5) 

3116 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 

3117 array([[[ 2. , 2.5, 3. ], 

3118 [ 5. , 5.5, 6. ]], 

3119 [[ 8. , 8.5, 9. ], 

3120 [11. , 11.5, 12. ]]], dtype=float32)> 

3121 

3122 Args: 

3123 image: RGB image or images. The size of the last dimension must be 3. 

3124 saturation_factor: float. Factor to multiply the saturation by. 

3125 name: A name for this operation (optional). 

3126 

3127 Returns: 

3128 Adjusted image(s), same shape and DType as `image`. 

3129 

3130 Raises: 

3131 InvalidArgumentError: input must have 3 channels 

3132 """ 

3133 with ops.name_scope(name, 'adjust_saturation', [image]) as name: 

3134 image = ops.convert_to_tensor(image, name='image') 

3135 # Remember original dtype to so we can convert back if needed 

3136 orig_dtype = image.dtype 

3137 if orig_dtype in (dtypes.float16, dtypes.float32): 

3138 flt_image = image 

3139 else: 

3140 flt_image = convert_image_dtype(image, dtypes.float32) 

3141 

3142 adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor) 

3143 

3144 return convert_image_dtype(adjusted, orig_dtype) 

3145 

3146 

3147@tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg']) 

3148def is_jpeg(contents, name=None): 

3149 r"""Convenience function to check if the 'contents' encodes a JPEG image. 

3150 

3151 Args: 

3152 contents: 0-D `string`. The encoded image bytes. 

3153 name: A name for the operation (optional) 

3154 

3155 Returns: 

3156 A scalar boolean tensor indicating if 'contents' may be a JPEG image. 

3157 is_jpeg is susceptible to false positives. 

3158 """ 

3159 # Normal JPEGs start with \xff\xd8\xff\xe0 

3160 # JPEG with EXIF starts with \xff\xd8\xff\xe1 

3161 # Use \xff\xd8\xff to cover both. 

3162 with ops.name_scope(name, 'is_jpeg'): 

3163 substr = string_ops.substr(contents, 0, 3) 

3164 return math_ops.equal(substr, b'\xff\xd8\xff', name=name) 

3165 

3166 

3167def _is_png(contents, name=None): 

3168 r"""Convenience function to check if the 'contents' encodes a PNG image. 

3169 

3170 Args: 

3171 contents: 0-D `string`. The encoded image bytes. 

3172 name: A name for the operation (optional) 

3173 

3174 Returns: 

3175 A scalar boolean tensor indicating if 'contents' may be a PNG image. 

3176 is_png is susceptible to false positives. 

3177 """ 

3178 with ops.name_scope(name, 'is_png'): 

3179 substr = string_ops.substr(contents, 0, 3) 

3180 return math_ops.equal(substr, b'\211PN', name=name) 

3181 

3182 

3183tf_export( 

3184 'io.decode_and_crop_jpeg', 

3185 'image.decode_and_crop_jpeg', 

3186 v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])( 

3187 dispatch.add_dispatch_support(gen_image_ops.decode_and_crop_jpeg)) 

3188 

3189tf_export( 

3190 'io.decode_bmp', 

3191 'image.decode_bmp', 

3192 v1=['io.decode_bmp', 'image.decode_bmp'])( 

3193 dispatch.add_dispatch_support(gen_image_ops.decode_bmp)) 

3194tf_export( 

3195 'io.decode_gif', 

3196 'image.decode_gif', 

3197 v1=['io.decode_gif', 'image.decode_gif'])( 

3198 dispatch.add_dispatch_support(gen_image_ops.decode_gif)) 

3199tf_export( 

3200 'io.decode_jpeg', 

3201 'image.decode_jpeg', 

3202 v1=['io.decode_jpeg', 'image.decode_jpeg'])( 

3203 dispatch.add_dispatch_support(gen_image_ops.decode_jpeg)) 

3204tf_export( 

3205 'io.decode_png', 

3206 'image.decode_png', 

3207 v1=['io.decode_png', 'image.decode_png'])( 

3208 dispatch.add_dispatch_support(gen_image_ops.decode_png)) 

3209 

3210tf_export( 

3211 'io.encode_jpeg', 

3212 'image.encode_jpeg', 

3213 v1=['io.encode_jpeg', 'image.encode_jpeg'])( 

3214 dispatch.add_dispatch_support(gen_image_ops.encode_jpeg)) 

3215tf_export( 

3216 'io.extract_jpeg_shape', 

3217 'image.extract_jpeg_shape', 

3218 v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])( 

3219 dispatch.add_dispatch_support(gen_image_ops.extract_jpeg_shape)) 

3220 

3221 

3222@tf_export('io.encode_png', 'image.encode_png') 

3223@dispatch.add_dispatch_support 

3224def encode_png(image, compression=-1, name=None): 

3225 r"""PNG-encode an image. 

3226 

3227 `image` is a rank-N Tensor of type uint8 or uint16 with shape `batch_dims + 

3228 [height, width, channels]`, where `channels` is: 

3229 

3230 * 1: for grayscale. 

3231 * 2: for grayscale + alpha. 

3232 * 3: for RGB. 

3233 * 4: for RGBA. 

3234 

3235 The ZLIB compression level, `compression`, can be -1 for the PNG-encoder 

3236 default or a value from 0 to 9. 9 is the highest compression level, 

3237 generating the smallest output, but is slower. 

3238 

3239 Args: 

3240 image: A `Tensor`. Must be one of the following types: `uint8`, `uint16`. 

3241 Rank N >= 3 with shape `batch_dims + [height, width, channels]`. 

3242 compression: An optional `int`. Defaults to `-1`. Compression level. 

3243 name: A name for the operation (optional). 

3244 

3245 Returns: 

3246 A `Tensor` of type `string`. 

3247 """ 

3248 return gen_image_ops.encode_png( 

3249 ops.convert_to_tensor(image), compression, name) 

3250 

3251 

3252@tf_export( 

3253 'io.decode_image', 

3254 'image.decode_image', 

3255 v1=['io.decode_image', 'image.decode_image']) 

3256@dispatch.add_dispatch_support 

3257def decode_image(contents, 

3258 channels=None, 

3259 dtype=dtypes.uint8, 

3260 name=None, 

3261 expand_animations=True): 

3262 """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`. 

3263 

3264 Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the 

3265 appropriate operation to convert the input bytes `string` into a `Tensor` 

3266 of type `dtype`. 

3267 

3268 Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as 

3269 opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D 

3270 arrays `[height, width, num_channels]`. Make sure to take this into account 

3271 when constructing your graph if you are intermixing GIF files with BMP, JPEG, 

3272 and/or PNG files. Alternately, set the `expand_animations` argument of this 

3273 function to `False`, in which case the op will return 3-dimensional tensors 

3274 and will truncate animated GIF files to the first frame. 

3275 

3276 NOTE: If the first frame of an animated GIF does not occupy the entire 

3277 canvas (maximum frame width x maximum frame height), then it fills the 

3278 unoccupied areas (in the first frame) with zeros (black). For frames after the 

3279 first frame that does not occupy the entire canvas, it uses the previous 

3280 frame to fill the unoccupied areas. 

3281 

3282 Args: 

3283 contents: A `Tensor` of type `string`. 0-D. The encoded image bytes. 

3284 channels: An optional `int`. Defaults to `0`. Number of color channels for 

3285 the decoded image. 

3286 dtype: The desired DType of the returned `Tensor`. 

3287 name: A name for the operation (optional) 

3288 expand_animations: An optional `bool`. Defaults to `True`. Controls the 

3289 shape of the returned op's output. If `True`, the returned op will produce 

3290 a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all GIFs, 

3291 whether animated or not. If, `False`, the returned op will produce a 3-D 

3292 tensor for all file types and will truncate animated GIFs to the first 

3293 frame. 

3294 

3295 Returns: 

3296 `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on 

3297 the file type and the value of the `expand_animations` parameter. 

3298 

3299 Raises: 

3300 ValueError: On incorrect number of channels. 

3301 """ 

3302 with ops.name_scope(name, 'decode_image'): 

3303 channels = 0 if channels is None else channels 

3304 if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]: 

3305 dest_dtype = dtype 

3306 dtype = dtypes.uint16 

3307 return convert_image_dtype( 

3308 gen_image_ops.decode_image( 

3309 contents=contents, 

3310 channels=channels, 

3311 expand_animations=expand_animations, 

3312 dtype=dtype), dest_dtype) 

3313 else: 

3314 return gen_image_ops.decode_image( 

3315 contents=contents, 

3316 channels=channels, 

3317 expand_animations=expand_animations, 

3318 dtype=dtype) 

3319 

3320 

3321@tf_export('image.total_variation') 

3322@dispatch.add_dispatch_support 

3323def total_variation(images, name=None): 

3324 """Calculate and return the total variation for one or more images. 

3325 

3326 The total variation is the sum of the absolute differences for neighboring 

3327 pixel-values in the input images. This measures how much noise is in the 

3328 images. 

3329 

3330 This can be used as a loss-function during optimization so as to suppress 

3331 noise in images. If you have a batch of images, then you should calculate 

3332 the scalar loss-value as the sum: 

3333 `loss = tf.reduce_sum(tf.image.total_variation(images))` 

3334 

3335 This implements the anisotropic 2-D version of the formula described here: 

3336 

3337 https://en.wikipedia.org/wiki/Total_variation_denoising 

3338 

3339 Args: 

3340 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 

3341 of shape `[height, width, channels]`. 

3342 name: A name for the operation (optional). 

3343 

3344 Raises: 

3345 ValueError: if images.shape is not a 3-D or 4-D vector. 

3346 

3347 Returns: 

3348 The total variation of `images`. 

3349 

3350 If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the 

3351 total variation for each image in the batch. 

3352 If `images` was 3-D, return a scalar float with the total variation for 

3353 that image. 

3354 """ 

3355 

3356 with ops.name_scope(name, 'total_variation'): 

3357 ndims = images.get_shape().ndims 

3358 

3359 if ndims == 3: 

3360 # The input is a single image with shape [height, width, channels]. 

3361 

3362 # Calculate the difference of neighboring pixel-values. 

3363 # The images are shifted one pixel along the height and width by slicing. 

3364 pixel_dif1 = images[1:, :, :] - images[:-1, :, :] 

3365 pixel_dif2 = images[:, 1:, :] - images[:, :-1, :] 

3366 

3367 # Sum for all axis. (None is an alias for all axis.) 

3368 sum_axis = None 

3369 elif ndims == 4: 

3370 # The input is a batch of images with shape: 

3371 # [batch, height, width, channels]. 

3372 

3373 # Calculate the difference of neighboring pixel-values. 

3374 # The images are shifted one pixel along the height and width by slicing. 

3375 pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :] 

3376 pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :] 

3377 

3378 # Only sum for the last 3 axis. 

3379 # This results in a 1-D tensor with the total variation for each image. 

3380 sum_axis = [1, 2, 3] 

3381 else: 

3382 raise ValueError('\'images\' must be either 3 or 4-dimensional.') 

3383 

3384 # Calculate the total variation by taking the absolute value of the 

3385 # pixel-differences and summing over the appropriate axis. 

3386 tot_var = ( 

3387 math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) + 

3388 math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis)) 

3389 

3390 return tot_var 

3391 

3392 

3393@tf_export('image.sample_distorted_bounding_box', v1=[]) 

3394@dispatch.add_dispatch_support 

3395def sample_distorted_bounding_box_v2(image_size, 

3396 bounding_boxes, 

3397 seed=0, 

3398 min_object_covered=0.1, 

3399 aspect_ratio_range=None, 

3400 area_range=None, 

3401 max_attempts=None, 

3402 use_image_if_no_bounding_boxes=None, 

3403 name=None): 

3404 """Generate a single randomly distorted bounding box for an image. 

3405 

3406 Bounding box annotations are often supplied in addition to ground-truth labels 

3407 in image recognition or object localization tasks. A common technique for 

3408 training such a system is to randomly distort an image while preserving 

3409 its content, i.e. *data augmentation*. This Op outputs a randomly distorted 

3410 localization of an object, i.e. bounding box, given an `image_size`, 

3411 `bounding_boxes` and a series of constraints. 

3412 

3413 The output of this Op is a single bounding box that may be used to crop the 

3414 original image. The output is returned as 3 tensors: `begin`, `size` and 

3415 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 

3416 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 

3417 visualize what the bounding box looks like. 

3418 

3419 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 

3420 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 

3421 and the height of the underlying image. 

3422 

3423 For example, 

3424 

3425 ```python 

3426 # Generate a single distorted bounding box. 

3427 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( 

3428 tf.shape(image), 

3429 bounding_boxes=bounding_boxes, 

3430 min_object_covered=0.1) 

3431 

3432 # Draw the bounding box in an image summary. 

3433 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), 

3434 bbox_for_draw) 

3435 tf.compat.v1.summary.image('images_with_box', image_with_box) 

3436 

3437 # Employ the bounding box to distort the image. 

3438 distorted_image = tf.slice(image, begin, size) 

3439 ``` 

3440 

3441 Note that if no bounding box information is available, setting 

3442 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit 

3443 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 

3444 false and no bounding boxes are supplied, an error is raised. 

3445 

3446 For producing deterministic results given a `seed` value, use 

3447 `tf.image.stateless_sample_distorted_bounding_box`. Unlike using the `seed` 

3448 param with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops 

3449 guarantee the same results given the same seed independent of how many times 

3450 the function is called, and independent of global seed settings 

3451 (e.g. tf.random.set_seed). 

3452 

3453 Args: 

3454 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 

3455 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 

3456 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 

3457 describing the N bounding boxes associated with the image. 

3458 seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the 

3459 random number generator is seeded by the given `seed`. Otherwise, it is 

3460 seeded by a random seed. 

3461 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 

3462 cropped area of the image must contain at least this fraction of any 

3463 bounding box supplied. The value of this parameter should be non-negative. 

3464 In the case of 0, the cropped area does not need to overlap any of the 

3465 bounding boxes supplied. 

3466 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 

3467 1.33]`. The cropped area of the image must have an aspect `ratio = width / 

3468 height` within this range. 

3469 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 

3470 cropped area of the image must contain a fraction of the supplied image 

3471 within this range. 

3472 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 

3473 generating a cropped region of the image of the specified constraints. 

3474 After `max_attempts` failures, return the entire image. 

3475 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 

3476 Controls behavior if no bounding boxes supplied. If true, assume an 

3477 implicit bounding box covering the whole input. If false, raise an error. 

3478 name: A name for the operation (optional). 

3479 

3480 Returns: 

3481 A tuple of `Tensor` objects (begin, size, bboxes). 

3482 

3483 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 

3484 `[offset_height, offset_width, 0]`. Provide as input to 

3485 `tf.slice`. 

3486 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 

3487 `[target_height, target_width, -1]`. Provide as input to 

3488 `tf.slice`. 

3489 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 

3490 the distorted bounding box. 

3491 Provide as input to `tf.image.draw_bounding_boxes`. 

3492 

3493 Raises: 

3494 ValueError: If no seed is specified and op determinism is enabled. 

3495 """ 

3496 if seed: 

3497 seed1, seed2 = random_seed.get_seed(seed) 

3498 else: 

3499 if config.is_op_determinism_enabled(): 

3500 raise ValueError( 

3501 f'tf.image.sample_distorted_bounding_box requires a non-zero seed to ' 

3502 f'be passed in when determinism is enabled, but got seed={seed}. ' 

3503 f'Please pass in a non-zero seed, e.g. by passing "seed=1".') 

3504 seed1, seed2 = (0, 0) 

3505 with ops.name_scope(name, 'sample_distorted_bounding_box'): 

3506 return gen_image_ops.sample_distorted_bounding_box_v2( 

3507 image_size, 

3508 bounding_boxes, 

3509 seed=seed1, 

3510 seed2=seed2, 

3511 min_object_covered=min_object_covered, 

3512 aspect_ratio_range=aspect_ratio_range, 

3513 area_range=area_range, 

3514 max_attempts=max_attempts, 

3515 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 

3516 name=name) 

3517 

3518 

3519@tf_export('image.stateless_sample_distorted_bounding_box', v1=[]) 

3520@dispatch.add_dispatch_support 

3521def stateless_sample_distorted_bounding_box(image_size, 

3522 bounding_boxes, 

3523 seed, 

3524 min_object_covered=0.1, 

3525 aspect_ratio_range=None, 

3526 area_range=None, 

3527 max_attempts=None, 

3528 use_image_if_no_bounding_boxes=None, 

3529 name=None): 

3530 """Generate a randomly distorted bounding box for an image deterministically. 

3531 

3532 Bounding box annotations are often supplied in addition to ground-truth labels 

3533 in image recognition or object localization tasks. A common technique for 

3534 training such a system is to randomly distort an image while preserving 

3535 its content, i.e. *data augmentation*. This Op, given the same `seed`, 

3536 deterministically outputs a randomly distorted localization of an object, i.e. 

3537 bounding box, given an `image_size`, `bounding_boxes` and a series of 

3538 constraints. 

3539 

3540 The output of this Op is a single bounding box that may be used to crop the 

3541 original image. The output is returned as 3 tensors: `begin`, `size` and 

3542 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 

3543 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 

3544 visualize what the bounding box looks like. 

3545 

3546 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 

3547 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 

3548 and the height of the underlying image. 

3549 

3550 The output of this Op is guaranteed to be the same given the same `seed` and 

3551 is independent of how many times the function is called, and independent of 

3552 global seed settings (e.g. `tf.random.set_seed`). 

3553 

3554 Example usage: 

3555 

3556 >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]]) 

3557 >>> bbox = tf.constant( 

3558 ... [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) 

3559 >>> seed = (1, 2) 

3560 >>> # Generate a single distorted bounding box. 

3561 >>> bbox_begin, bbox_size, bbox_draw = ( 

3562 ... tf.image.stateless_sample_distorted_bounding_box( 

3563 ... tf.shape(image), bounding_boxes=bbox, seed=seed)) 

3564 >>> # Employ the bounding box to distort the image. 

3565 >>> tf.slice(image, bbox_begin, bbox_size) 

3566 <tf.Tensor: shape=(2, 2, 1), dtype=int64, numpy= 

3567 array([[[1], 

3568 [2]], 

3569 [[4], 

3570 [5]]])> 

3571 >>> # Draw the bounding box in an image summary. 

3572 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 

3573 >>> tf.image.draw_bounding_boxes( 

3574 ... tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors) 

3575 <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy= 

3576 array([[[[1.], 

3577 [1.], 

3578 [3.]], 

3579 [[1.], 

3580 [1.], 

3581 [6.]], 

3582 [[7.], 

3583 [8.], 

3584 [9.]]]], dtype=float32)> 

3585 

3586 Note that if no bounding box information is available, setting 

3587 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit 

3588 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 

3589 false and no bounding boxes are supplied, an error is raised. 

3590 

3591 Args: 

3592 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 

3593 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 

3594 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 

3595 describing the N bounding boxes associated with the image. 

3596 seed: A shape [2] Tensor, the seed to the random number generator. Must have 

3597 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 

3598 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 

3599 cropped area of the image must contain at least this fraction of any 

3600 bounding box supplied. The value of this parameter should be non-negative. 

3601 In the case of 0, the cropped area does not need to overlap any of the 

3602 bounding boxes supplied. 

3603 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 

3604 1.33]`. The cropped area of the image must have an aspect `ratio = width / 

3605 height` within this range. 

3606 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 

3607 cropped area of the image must contain a fraction of the supplied image 

3608 within this range. 

3609 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 

3610 generating a cropped region of the image of the specified constraints. 

3611 After `max_attempts` failures, return the entire image. 

3612 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 

3613 Controls behavior if no bounding boxes supplied. If true, assume an 

3614 implicit bounding box covering the whole input. If false, raise an error. 

3615 name: A name for the operation (optional). 

3616 

3617 Returns: 

3618 A tuple of `Tensor` objects (begin, size, bboxes). 

3619 

3620 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 

3621 `[offset_height, offset_width, 0]`. Provide as input to 

3622 `tf.slice`. 

3623 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 

3624 `[target_height, target_width, -1]`. Provide as input to 

3625 `tf.slice`. 

3626 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 

3627 the distorted bounding box. 

3628 Provide as input to `tf.image.draw_bounding_boxes`. 

3629 """ 

3630 with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'): 

3631 return gen_image_ops.stateless_sample_distorted_bounding_box( 

3632 image_size=image_size, 

3633 bounding_boxes=bounding_boxes, 

3634 seed=seed, 

3635 min_object_covered=min_object_covered, 

3636 aspect_ratio_range=aspect_ratio_range, 

3637 area_range=area_range, 

3638 max_attempts=max_attempts, 

3639 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 

3640 name=name) 

3641 

3642 

3643@tf_export(v1=['image.sample_distorted_bounding_box']) 

3644@dispatch.add_dispatch_support 

3645@deprecation.deprecated( 

3646 date=None, 

3647 instructions='`seed2` arg is deprecated.' 

3648 'Use sample_distorted_bounding_box_v2 instead.') 

3649def sample_distorted_bounding_box(image_size, 

3650 bounding_boxes, 

3651 seed=None, 

3652 seed2=None, 

3653 min_object_covered=0.1, 

3654 aspect_ratio_range=None, 

3655 area_range=None, 

3656 max_attempts=None, 

3657 use_image_if_no_bounding_boxes=None, 

3658 name=None): 

3659 """Generate a single randomly distorted bounding box for an image. 

3660 

3661 Bounding box annotations are often supplied in addition to ground-truth labels 

3662 in image recognition or object localization tasks. A common technique for 

3663 training such a system is to randomly distort an image while preserving 

3664 its content, i.e. *data augmentation*. This Op outputs a randomly distorted 

3665 localization of an object, i.e. bounding box, given an `image_size`, 

3666 `bounding_boxes` and a series of constraints. 

3667 

3668 The output of this Op is a single bounding box that may be used to crop the 

3669 original image. The output is returned as 3 tensors: `begin`, `size` and 

3670 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 

3671 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 

3672 visualize what the bounding box looks like. 

3673 

3674 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 

3675 The 

3676 bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and 

3677 height of the underlying image. 

3678 

3679 For example, 

3680 

3681 ```python 

3682 # Generate a single distorted bounding box. 

3683 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( 

3684 tf.shape(image), 

3685 bounding_boxes=bounding_boxes, 

3686 min_object_covered=0.1) 

3687 

3688 # Draw the bounding box in an image summary. 

3689 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), 

3690 bbox_for_draw) 

3691 tf.compat.v1.summary.image('images_with_box', image_with_box) 

3692 

3693 # Employ the bounding box to distort the image. 

3694 distorted_image = tf.slice(image, begin, size) 

3695 ``` 

3696 

3697 Note that if no bounding box information is available, setting 

3698 `use_image_if_no_bounding_boxes = True` will assume there is a single implicit 

3699 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 

3700 false and no bounding boxes are supplied, an error is raised. 

3701 

3702 Args: 

3703 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 

3704 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 

3705 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 

3706 describing the N bounding boxes associated with the image. 

3707 seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are 

3708 set to non-zero, the random number generator is seeded by the given 

3709 `seed`. Otherwise, it is seeded by a random seed. 

3710 seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed 

3711 collision. 

3712 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 

3713 cropped area of the image must contain at least this fraction of any 

3714 bounding box supplied. The value of this parameter should be non-negative. 

3715 In the case of 0, the cropped area does not need to overlap any of the 

3716 bounding boxes supplied. 

3717 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 

3718 1.33]`. The cropped area of the image must have an aspect ratio = width / 

3719 height within this range. 

3720 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 

3721 cropped area of the image must contain a fraction of the supplied image 

3722 within this range. 

3723 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 

3724 generating a cropped region of the image of the specified constraints. 

3725 After `max_attempts` failures, return the entire image. 

3726 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 

3727 Controls behavior if no bounding boxes supplied. If true, assume an 

3728 implicit bounding box covering the whole input. If false, raise an error. 

3729 name: A name for the operation (optional). 

3730 

3731 Returns: 

3732 A tuple of `Tensor` objects (begin, size, bboxes). 

3733 

3734 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 

3735 `[offset_height, offset_width, 0]`. Provide as input to 

3736 `tf.slice`. 

3737 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 

3738 `[target_height, target_width, -1]`. Provide as input to 

3739 `tf.slice`. 

3740 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 

3741 the distorted bounding box. 

3742 Provide as input to `tf.image.draw_bounding_boxes`. 

3743 

3744 Raises: 

3745 ValueError: If no seed is specified and op determinism is enabled. 

3746 """ 

3747 if not seed and not seed2 and config.is_op_determinism_enabled(): 

3748 raise ValueError( 

3749 f'tf.compat.v1.image.sample_distorted_bounding_box requires "seed" or ' 

3750 f'"seed2" to be non-zero when determinism is enabled. Please pass in ' 

3751 f'a non-zero seed, e.g. by passing "seed=1". Got seed={seed} and ' 

3752 f"seed2={seed2}") 

3753 with ops.name_scope(name, 'sample_distorted_bounding_box'): 

3754 return gen_image_ops.sample_distorted_bounding_box_v2( 

3755 image_size, 

3756 bounding_boxes, 

3757 seed=seed, 

3758 seed2=seed2, 

3759 min_object_covered=min_object_covered, 

3760 aspect_ratio_range=aspect_ratio_range, 

3761 area_range=area_range, 

3762 max_attempts=max_attempts, 

3763 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 

3764 name=name) 

3765 

3766 

3767@tf_export('image.non_max_suppression') 

3768@dispatch.add_dispatch_support 

3769def non_max_suppression(boxes, 

3770 scores, 

3771 max_output_size, 

3772 iou_threshold=0.5, 

3773 score_threshold=float('-inf'), 

3774 name=None): 

3775 """Greedily selects a subset of bounding boxes in descending order of score. 

3776 

3777 Prunes away boxes that have high intersection-over-union (IOU) overlap 

3778 with previously selected boxes. Bounding boxes are supplied as 

3779 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 

3780 diagonal pair of box corners and the coordinates can be provided as normalized 

3781 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm 

3782 is agnostic to where the origin is in the coordinate system. Note that this 

3783 algorithm is invariant to orthogonal transformations and translations 

3784 of the coordinate system; thus translating or reflections of the coordinate 

3785 system result in the same boxes being selected by the algorithm. 

3786 The output of this operation is a set of integers indexing into the input 

3787 collection of bounding boxes representing the selected boxes. The bounding 

3788 box coordinates corresponding to the selected indices can then be obtained 

3789 using the `tf.gather` operation. For example: 

3790 ```python 

3791 selected_indices = tf.image.non_max_suppression( 

3792 boxes, scores, max_output_size, iou_threshold) 

3793 selected_boxes = tf.gather(boxes, selected_indices) 

3794 ``` 

3795 

3796 Args: 

3797 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 

3798 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 

3799 score corresponding to each box (each row of boxes). 

3800 max_output_size: A scalar integer `Tensor` representing the maximum number 

3801 of boxes to be selected by non-max suppression. 

3802 iou_threshold: A 0-D float tensor representing the threshold for deciding 

3803 whether boxes overlap too much with respect to IOU. 

3804 score_threshold: A 0-D float tensor representing the threshold for deciding 

3805 when to remove boxes based on score. 

3806 name: A name for the operation (optional). 

3807 

3808 Returns: 

3809 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 

3810 selected indices from the boxes tensor, where `M <= max_output_size`. 

3811 """ 

3812 with ops.name_scope(name, 'non_max_suppression'): 

3813 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 

3814 score_threshold = ops.convert_to_tensor( 

3815 score_threshold, name='score_threshold') 

3816 return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size, 

3817 iou_threshold, score_threshold) 

3818 

3819 

3820@tf_export('image.non_max_suppression_with_scores') 

3821@dispatch.add_dispatch_support 

3822def non_max_suppression_with_scores(boxes, 

3823 scores, 

3824 max_output_size, 

3825 iou_threshold=0.5, 

3826 score_threshold=float('-inf'), 

3827 soft_nms_sigma=0.0, 

3828 name=None): 

3829 """Greedily selects a subset of bounding boxes in descending order of score. 

3830 

3831 Prunes away boxes that have high intersection-over-union (IOU) overlap 

3832 with previously selected boxes. Bounding boxes are supplied as 

3833 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 

3834 diagonal pair of box corners and the coordinates can be provided as normalized 

3835 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm 

3836 is agnostic to where the origin is in the coordinate system. Note that this 

3837 algorithm is invariant to orthogonal transformations and translations 

3838 of the coordinate system; thus translating or reflections of the coordinate 

3839 system result in the same boxes being selected by the algorithm. 

3840 The output of this operation is a set of integers indexing into the input 

3841 collection of bounding boxes representing the selected boxes. The bounding 

3842 box coordinates corresponding to the selected indices can then be obtained 

3843 using the `tf.gather` operation. For example: 

3844 ```python 

3845 selected_indices, selected_scores = tf.image.non_max_suppression_padded( 

3846 boxes, scores, max_output_size, iou_threshold=1.0, score_threshold=0.1, 

3847 soft_nms_sigma=0.5) 

3848 selected_boxes = tf.gather(boxes, selected_indices) 

3849 ``` 

3850 

3851 This function generalizes the `tf.image.non_max_suppression` op by also 

3852 supporting a Soft-NMS (with Gaussian weighting) mode (c.f. 

3853 Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score 

3854 of other overlapping boxes instead of directly causing them to be pruned. 

3855 Consequently, in contrast to `tf.image.non_max_suppression`, 

3856 `tf.image.non_max_suppression_with_scores` returns the new scores of each 

3857 input box in the second output, `selected_scores`. 

3858 

3859 To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be 

3860 larger than 0. When `soft_nms_sigma` equals 0, the behavior of 

3861 `tf.image.non_max_suppression_with_scores` is identical to that of 

3862 `tf.image.non_max_suppression` (except for the extra output) both in function 

3863 and in running time. 

3864 

3865 Note that when `soft_nms_sigma` > 0, Soft-NMS is performed and `iou_threshold` 

3866 is ignored. `iou_threshold` is only used for standard NMS. 

3867 

3868 Args: 

3869 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 

3870 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 

3871 score corresponding to each box (each row of boxes). 

3872 max_output_size: A scalar integer `Tensor` representing the maximum number 

3873 of boxes to be selected by non-max suppression. 

3874 iou_threshold: A 0-D float tensor representing the threshold for deciding 

3875 whether boxes overlap too much with respect to IOU. 

3876 score_threshold: A 0-D float tensor representing the threshold for deciding 

3877 when to remove boxes based on score. 

3878 soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft 

3879 NMS; see Bodla et al (c.f. https://arxiv.org/abs/1704.04503). When 

3880 `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) 

3881 NMS. 

3882 name: A name for the operation (optional). 

3883 

3884 Returns: 

3885 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 

3886 selected indices from the boxes tensor, where `M <= max_output_size`. 

3887 selected_scores: A 1-D float tensor of shape `[M]` representing the 

3888 corresponding scores for each selected box, where `M <= max_output_size`. 

3889 Scores only differ from corresponding input scores when using Soft NMS 

3890 (i.e. when `soft_nms_sigma>0`) 

3891 """ 

3892 with ops.name_scope(name, 'non_max_suppression_with_scores'): 

3893 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 

3894 score_threshold = ops.convert_to_tensor( 

3895 score_threshold, name='score_threshold') 

3896 soft_nms_sigma = ops.convert_to_tensor( 

3897 soft_nms_sigma, name='soft_nms_sigma') 

3898 (selected_indices, selected_scores, 

3899 _) = gen_image_ops.non_max_suppression_v5( 

3900 boxes, 

3901 scores, 

3902 max_output_size, 

3903 iou_threshold, 

3904 score_threshold, 

3905 soft_nms_sigma, 

3906 pad_to_max_output_size=False) 

3907 return selected_indices, selected_scores 

3908 

3909 

3910@tf_export('image.non_max_suppression_overlaps') 

3911@dispatch.add_dispatch_support 

3912def non_max_suppression_with_overlaps(overlaps, 

3913 scores, 

3914 max_output_size, 

3915 overlap_threshold=0.5, 

3916 score_threshold=float('-inf'), 

3917 name=None): 

3918 """Greedily selects a subset of bounding boxes in descending order of score. 

3919 

3920 Prunes away boxes that have high overlap with previously selected boxes. 

3921 N-by-n overlap values are supplied as square matrix. 

3922 The output of this operation is a set of integers indexing into the input 

3923 collection of bounding boxes representing the selected boxes. The bounding 

3924 box coordinates corresponding to the selected indices can then be obtained 

3925 using the `tf.gather` operation. For example: 

3926 ```python 

3927 selected_indices = tf.image.non_max_suppression_overlaps( 

3928 overlaps, scores, max_output_size, iou_threshold) 

3929 selected_boxes = tf.gather(boxes, selected_indices) 

3930 ``` 

3931 

3932 Args: 

3933 overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]` 

3934 representing the n-by-n box overlap values. 

3935 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 

3936 score corresponding to each box (each row of boxes). 

3937 max_output_size: A scalar integer `Tensor` representing the maximum number 

3938 of boxes to be selected by non-max suppression. 

3939 overlap_threshold: A 0-D float tensor representing the threshold for 

3940 deciding whether boxes overlap too much with respect to the provided 

3941 overlap values. 

3942 score_threshold: A 0-D float tensor representing the threshold for deciding 

3943 when to remove boxes based on score. 

3944 name: A name for the operation (optional). 

3945 

3946 Returns: 

3947 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 

3948 selected indices from the overlaps tensor, where `M <= max_output_size`. 

3949 """ 

3950 with ops.name_scope(name, 'non_max_suppression_overlaps'): 

3951 overlap_threshold = ops.convert_to_tensor( 

3952 overlap_threshold, name='overlap_threshold') 

3953 # pylint: disable=protected-access 

3954 return gen_image_ops.non_max_suppression_with_overlaps( 

3955 overlaps, scores, max_output_size, overlap_threshold, score_threshold) 

3956 # pylint: enable=protected-access 

3957 

3958 

3959_rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115], 

3960 [0.587, -0.27455667, -0.52273617], 

3961 [0.114, -0.32134392, 0.31119955]] 

3962 

3963 

3964@tf_export('image.rgb_to_yiq') 

3965@dispatch.add_dispatch_support 

3966def rgb_to_yiq(images): 

3967 """Converts one or more images from RGB to YIQ. 

3968 

3969 Outputs a tensor of the same shape as the `images` tensor, containing the YIQ 

3970 value of the pixels. 

3971 The output is only well defined if the value in images are in [0,1]. 

3972 

3973 Usage Example: 

3974 

3975 >>> x = tf.constant([[[1.0, 2.0, 3.0]]]) 

3976 >>> tf.image.rgb_to_yiq(x) 

3977 <tf.Tensor: shape=(1, 1, 3), dtype=float32, 

3978 numpy=array([[[ 1.815 , -0.91724455, 0.09962624]]], dtype=float32)> 

3979 

3980 Args: 

3981 images: 2-D or higher rank. Image data to convert. Last dimension must be 

3982 size 3. 

3983 

3984 Returns: 

3985 images: tensor with the same shape as `images`. 

3986 """ 

3987 images = ops.convert_to_tensor(images, name='images') 

3988 kernel = ops.convert_to_tensor( 

3989 _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel') 

3990 ndims = images.get_shape().ndims 

3991 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 

3992 

3993 

3994_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021], 

3995 [0.6208248, -0.64720424, 1.70423049]] 

3996 

3997 

3998@tf_export('image.yiq_to_rgb') 

3999@dispatch.add_dispatch_support 

4000def yiq_to_rgb(images): 

4001 """Converts one or more images from YIQ to RGB. 

4002 

4003 Outputs a tensor of the same shape as the `images` tensor, containing the RGB 

4004 value of the pixels. 

4005 The output is only well defined if the Y value in images are in [0,1], 

4006 I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226]. 

4007 

4008 Args: 

4009 images: 2-D or higher rank. Image data to convert. Last dimension must be 

4010 size 3. 

4011 

4012 Returns: 

4013 images: tensor with the same shape as `images`. 

4014 """ 

4015 images = ops.convert_to_tensor(images, name='images') 

4016 kernel = ops.convert_to_tensor( 

4017 _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel') 

4018 ndims = images.get_shape().ndims 

4019 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 

4020 

4021 

4022_rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538], 

4023 [0.587, -0.28886916, -0.51496512], 

4024 [0.114, 0.43601035, -0.10001026]] 

4025 

4026 

4027@tf_export('image.rgb_to_yuv') 

4028@dispatch.add_dispatch_support 

4029def rgb_to_yuv(images): 

4030 """Converts one or more images from RGB to YUV. 

4031 

4032 Outputs a tensor of the same shape as the `images` tensor, containing the YUV 

4033 value of the pixels. 

4034 The output is only well defined if the value in images are in [0, 1]. 

4035 There are two ways of representing an image: [0, 255] pixel values range or 

4036 [0, 1] (as float) pixel values range. Users need to convert the input image 

4037 into a float [0, 1] range. 

4038 

4039 Args: 

4040 images: 2-D or higher rank. Image data to convert. Last dimension must be 

4041 size 3. 

4042 

4043 Returns: 

4044 images: tensor with the same shape as `images`. 

4045 """ 

4046 images = ops.convert_to_tensor(images, name='images') 

4047 kernel = ops.convert_to_tensor( 

4048 _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel') 

4049 ndims = images.get_shape().ndims 

4050 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 

4051 

4052 

4053_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185], 

4054 [1.13988303, -0.58062185, 0]] 

4055 

4056 

4057@tf_export('image.yuv_to_rgb') 

4058@dispatch.add_dispatch_support 

4059def yuv_to_rgb(images): 

4060 """Converts one or more images from YUV to RGB. 

4061 

4062 Outputs a tensor of the same shape as the `images` tensor, containing the RGB 

4063 value of the pixels. 

4064 The output is only well defined if the Y value in images are in [0,1], 

4065 U and V value are in [-0.5,0.5]. 

4066 

4067 As per the above description, you need to scale your YUV images if their 

4068 pixel values are not in the required range. Below given example illustrates 

4069 preprocessing of each channel of images before feeding them to `yuv_to_rgb`. 

4070 

4071 ```python 

4072 yuv_images = tf.random.uniform(shape=[100, 64, 64, 3], maxval=255) 

4073 last_dimension_axis = len(yuv_images.shape) - 1 

4074 yuv_tensor_images = tf.truediv( 

4075 tf.subtract( 

4076 yuv_images, 

4077 tf.reduce_min(yuv_images) 

4078 ), 

4079 tf.subtract( 

4080 tf.reduce_max(yuv_images), 

4081 tf.reduce_min(yuv_images) 

4082 ) 

4083 ) 

4084 y, u, v = tf.split(yuv_tensor_images, 3, axis=last_dimension_axis) 

4085 target_uv_min, target_uv_max = -0.5, 0.5 

4086 u = u * (target_uv_max - target_uv_min) + target_uv_min 

4087 v = v * (target_uv_max - target_uv_min) + target_uv_min 

4088 preprocessed_yuv_images = tf.concat([y, u, v], axis=last_dimension_axis) 

4089 rgb_tensor_images = tf.image.yuv_to_rgb(preprocessed_yuv_images) 

4090 ``` 

4091 

4092 Args: 

4093 images: 2-D or higher rank. Image data to convert. Last dimension must be 

4094 size 3. 

4095 

4096 Returns: 

4097 images: tensor with the same shape as `images`. 

4098 """ 

4099 images = ops.convert_to_tensor(images, name='images') 

4100 kernel = ops.convert_to_tensor( 

4101 _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel') 

4102 ndims = images.get_shape().ndims 

4103 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 

4104 

4105 

4106def _verify_compatible_image_shapes(img1, img2): 

4107 """Checks if two image tensors are compatible for applying SSIM or PSNR. 

4108 

4109 This function checks if two sets of images have ranks at least 3, and if the 

4110 last three dimensions match. 

4111 

4112 Args: 

4113 img1: Tensor containing the first image batch. 

4114 img2: Tensor containing the second image batch. 

4115 

4116 Returns: 

4117 A tuple containing: the first tensor shape, the second tensor shape, and a 

4118 list of control_flow_ops.Assert() ops implementing the checks. 

4119 

4120 Raises: 

4121 ValueError: When static shape check fails. 

4122 """ 

4123 shape1 = img1.get_shape().with_rank_at_least(3) 

4124 shape2 = img2.get_shape().with_rank_at_least(3) 

4125 shape1[-3:].assert_is_compatible_with(shape2[-3:]) 

4126 

4127 if shape1.ndims is not None and shape2.ndims is not None: 

4128 for dim1, dim2 in zip( 

4129 reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])): 

4130 if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)): 

4131 raise ValueError('Two images are not compatible: %s and %s' % 

4132 (shape1, shape2)) 

4133 

4134 # Now assign shape tensors. 

4135 shape1, shape2 = array_ops.shape_n([img1, img2]) 

4136 

4137 # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable. 

4138 checks = [] 

4139 checks.append( 

4140 control_flow_assert.Assert( 

4141 math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2], 

4142 summarize=10)) 

4143 checks.append( 

4144 control_flow_assert.Assert( 

4145 math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])), 

4146 [shape1, shape2], 

4147 summarize=10)) 

4148 return shape1, shape2, checks 

4149 

4150 

4151@tf_export('image.psnr') 

4152@dispatch.add_dispatch_support 

4153def psnr(a, b, max_val, name=None): 

4154 """Returns the Peak Signal-to-Noise Ratio between a and b. 

4155 

4156 This is intended to be used on signals (or images). Produces a PSNR value for 

4157 each image in batch. 

4158 

4159 The last three dimensions of input are expected to be [height, width, depth]. 

4160 

4161 Example: 

4162 

4163 ```python 

4164 # Read images from file. 

4165 im1 = tf.decode_png('path/to/im1.png') 

4166 im2 = tf.decode_png('path/to/im2.png') 

4167 # Compute PSNR over tf.uint8 Tensors. 

4168 psnr1 = tf.image.psnr(im1, im2, max_val=255) 

4169 

4170 # Compute PSNR over tf.float32 Tensors. 

4171 im1 = tf.image.convert_image_dtype(im1, tf.float32) 

4172 im2 = tf.image.convert_image_dtype(im2, tf.float32) 

4173 psnr2 = tf.image.psnr(im1, im2, max_val=1.0) 

4174 # psnr1 and psnr2 both have type tf.float32 and are almost equal. 

4175 ``` 

4176 

4177 Args: 

4178 a: First set of images. 

4179 b: Second set of images. 

4180 max_val: The dynamic range of the images (i.e., the difference between the 

4181 maximum the and minimum allowed values). 

4182 name: Namespace to embed the computation in. 

4183 

4184 Returns: 

4185 The scalar PSNR between a and b. The returned tensor has type `tf.float32` 

4186 and shape [batch_size, 1]. 

4187 """ 

4188 with ops.name_scope(name, 'PSNR', [a, b]): 

4189 # Need to convert the images to float32. Scale max_val accordingly so that 

4190 # PSNR is computed correctly. 

4191 max_val = math_ops.cast(max_val, a.dtype) 

4192 max_val = convert_image_dtype(max_val, dtypes.float32) 

4193 a = convert_image_dtype(a, dtypes.float32) 

4194 b = convert_image_dtype(b, dtypes.float32) 

4195 mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1]) 

4196 psnr_val = math_ops.subtract( 

4197 20 * math_ops.log(max_val) / math_ops.log(10.0), 

4198 np.float32(10 / np.log(10)) * math_ops.log(mse), 

4199 name='psnr') 

4200 

4201 _, _, checks = _verify_compatible_image_shapes(a, b) 

4202 with ops.control_dependencies(checks): 

4203 return array_ops.identity(psnr_val) 

4204 

4205 

4206def _ssim_helper(x, y, reducer, max_val, compensation=1.0, k1=0.01, k2=0.03): 

4207 r"""Helper function for computing SSIM. 

4208 

4209 SSIM estimates covariances with weighted sums. The default parameters 

4210 use a biased estimate of the covariance: 

4211 Suppose `reducer` is a weighted sum, then the mean estimators are 

4212 \mu_x = \sum_i w_i x_i, 

4213 \mu_y = \sum_i w_i y_i, 

4214 where w_i's are the weighted-sum weights, and covariance estimator is 

4215 cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) 

4216 with assumption \sum_i w_i = 1. This covariance estimator is biased, since 

4217 E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y). 

4218 For SSIM measure with unbiased covariance estimators, pass as `compensation` 

4219 argument (1 - \sum_i w_i ^ 2). 

4220 

4221 Args: 

4222 x: First set of images. 

4223 y: Second set of images. 

4224 reducer: Function that computes 'local' averages from the set of images. For 

4225 non-convolutional version, this is usually tf.reduce_mean(x, [1, 2]), and 

4226 for convolutional version, this is usually tf.nn.avg_pool2d or 

4227 tf.nn.conv2d with weighted-sum kernel. 

4228 max_val: The dynamic range (i.e., the difference between the maximum 

4229 possible allowed value and the minimum allowed value). 

4230 compensation: Compensation factor. See above. 

4231 k1: Default value 0.01 

4232 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 

4233 it would be better if we took the values in the range of 0 < K2 < 0.4). 

4234 

4235 Returns: 

4236 A pair containing the luminance measure, and the contrast-structure measure. 

4237 """ 

4238 

4239 c1 = (k1 * max_val)**2 

4240 c2 = (k2 * max_val)**2 

4241 

4242 # SSIM luminance measure is 

4243 # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1). 

4244 mean0 = reducer(x) 

4245 mean1 = reducer(y) 

4246 num0 = mean0 * mean1 * 2.0 

4247 den0 = math_ops.square(mean0) + math_ops.square(mean1) 

4248 luminance = (num0 + c1) / (den0 + c1) 

4249 

4250 # SSIM contrast-structure measure is 

4251 # (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2). 

4252 # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then 

4253 # cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) 

4254 # = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j). 

4255 num1 = reducer(x * y) * 2.0 

4256 den1 = reducer(math_ops.square(x) + math_ops.square(y)) 

4257 c2 *= compensation 

4258 cs = (num1 - num0 + c2) / (den1 - den0 + c2) 

4259 

4260 # SSIM score is the product of the luminance and contrast-structure measures. 

4261 return luminance, cs 

4262 

4263 

4264def _fspecial_gauss(size, sigma): 

4265 """Function to mimic the 'fspecial' gaussian MATLAB function.""" 

4266 size = ops.convert_to_tensor(size, dtypes.int32) 

4267 sigma = ops.convert_to_tensor(sigma) 

4268 

4269 coords = math_ops.cast(math_ops.range(size), sigma.dtype) 

4270 coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0 

4271 

4272 g = math_ops.square(coords) 

4273 g *= -0.5 / math_ops.square(sigma) 

4274 

4275 g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1]) 

4276 g = array_ops.reshape(g, shape=[1, -1]) # For tf.nn.softmax(). 

4277 g = nn_ops.softmax(g) 

4278 return array_ops.reshape(g, shape=[size, size, 1, 1]) 

4279 

4280 

4281def _ssim_per_channel(img1, 

4282 img2, 

4283 max_val=1.0, 

4284 filter_size=11, 

4285 filter_sigma=1.5, 

4286 k1=0.01, 

4287 k2=0.03, 

4288 return_index_map=False): 

4289 """Computes SSIM index between img1 and img2 per color channel. 

4290 

4291 This function matches the standard SSIM implementation from: 

4292 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image 

4293 quality assessment: from error visibility to structural similarity. IEEE 

4294 transactions on image processing. 

4295 

4296 Details: 

4297 - 11x11 Gaussian filter of width 1.5 is used. 

4298 - k1 = 0.01, k2 = 0.03 as in the original paper. 

4299 

4300 Args: 

4301 img1: First image batch. 

4302 img2: Second image batch. 

4303 max_val: The dynamic range of the images (i.e., the difference between the 

4304 maximum the and minimum allowed values). 

4305 filter_size: Default value 11 (size of gaussian filter). 

4306 filter_sigma: Default value 1.5 (width of gaussian filter). 

4307 k1: Default value 0.01 

4308 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 

4309 it would be better if we took the values in the range of 0 < K2 < 0.4). 

4310 return_index_map: If True returns local SSIM map instead of the global mean. 

4311 

4312 Returns: 

4313 A pair of tensors containing and channel-wise SSIM and contrast-structure 

4314 values. The shape is [..., channels]. 

4315 """ 

4316 filter_size = constant_op.constant(filter_size, dtype=dtypes.int32) 

4317 filter_sigma = constant_op.constant(filter_sigma, dtype=img1.dtype) 

4318 

4319 shape1, shape2 = array_ops.shape_n([img1, img2]) 

4320 checks = [ 

4321 control_flow_assert.Assert( 

4322 math_ops.reduce_all( 

4323 math_ops.greater_equal(shape1[-3:-1], filter_size)), 

4324 [shape1, filter_size], 

4325 summarize=8), 

4326 control_flow_assert.Assert( 

4327 math_ops.reduce_all( 

4328 math_ops.greater_equal(shape2[-3:-1], filter_size)), 

4329 [shape2, filter_size], 

4330 summarize=8) 

4331 ] 

4332 

4333 # Enforce the check to run before computation. 

4334 with ops.control_dependencies(checks): 

4335 img1 = array_ops.identity(img1) 

4336 

4337 # TODO(sjhwang): Try to cache kernels and compensation factor. 

4338 kernel = _fspecial_gauss(filter_size, filter_sigma) 

4339 kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) 

4340 

4341 # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`, 

4342 # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead. 

4343 compensation = 1.0 

4344 

4345 # TODO(sjhwang): Try FFT. 

4346 # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying 

4347 # 1-by-n and n-by-1 Gaussian filters instead of an n-by-n filter. 

4348 def reducer(x): 

4349 shape = array_ops.shape(x) 

4350 x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) 

4351 y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') 

4352 return array_ops.reshape( 

4353 y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0)) 

4354 

4355 luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation, k1, 

4356 k2) 

4357 

4358 # Average over the second and the third from the last: height, width. 

4359 if return_index_map: 

4360 ssim_val = luminance * cs 

4361 else: 

4362 axes = constant_op.constant([-3, -2], dtype=dtypes.int32) 

4363 ssim_val = math_ops.reduce_mean(luminance * cs, axes) 

4364 cs = math_ops.reduce_mean(cs, axes) 

4365 return ssim_val, cs 

4366 

4367 

4368@tf_export('image.ssim') 

4369@dispatch.add_dispatch_support 

4370def ssim(img1, 

4371 img2, 

4372 max_val, 

4373 filter_size=11, 

4374 filter_sigma=1.5, 

4375 k1=0.01, 

4376 k2=0.03, 

4377 return_index_map=False): 

4378 """Computes SSIM index between img1 and img2. 

4379 

4380 This function is based on the standard SSIM implementation from: 

4381 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image 

4382 quality assessment: from error visibility to structural similarity. IEEE 

4383 transactions on image processing. 

4384 

4385 Note: The true SSIM is only defined on grayscale. This function does not 

4386 perform any colorspace transform. (If the input is already YUV, then it will 

4387 compute YUV SSIM average.) 

4388 

4389 Details: 

4390 - 11x11 Gaussian filter of width 1.5 is used. 

4391 - k1 = 0.01, k2 = 0.03 as in the original paper. 

4392 

4393 The image sizes must be at least 11x11 because of the filter size. 

4394 

4395 Example: 

4396 

4397 ```python 

4398 # Read images (of size 255 x 255) from file. 

4399 im1 = tf.image.decode_image(tf.io.read_file('path/to/im1.png')) 

4400 im2 = tf.image.decode_image(tf.io.read_file('path/to/im2.png')) 

4401 tf.shape(im1) # `img1.png` has 3 channels; shape is `(255, 255, 3)` 

4402 tf.shape(im2) # `img2.png` has 3 channels; shape is `(255, 255, 3)` 

4403 # Add an outer batch for each image. 

4404 im1 = tf.expand_dims(im1, axis=0) 

4405 im2 = tf.expand_dims(im2, axis=0) 

4406 # Compute SSIM over tf.uint8 Tensors. 

4407 ssim1 = tf.image.ssim(im1, im2, max_val=255, filter_size=11, 

4408 filter_sigma=1.5, k1=0.01, k2=0.03) 

4409 

4410 # Compute SSIM over tf.float32 Tensors. 

4411 im1 = tf.image.convert_image_dtype(im1, tf.float32) 

4412 im2 = tf.image.convert_image_dtype(im2, tf.float32) 

4413 ssim2 = tf.image.ssim(im1, im2, max_val=1.0, filter_size=11, 

4414 filter_sigma=1.5, k1=0.01, k2=0.03) 

4415 # ssim1 and ssim2 both have type tf.float32 and are almost equal. 

4416 ``` 

4417 

4418 Args: 

4419 img1: First image batch. 4-D Tensor of shape `[batch, height, width, 

4420 channels]` with only Positive Pixel Values. 

4421 img2: Second image batch. 4-D Tensor of shape `[batch, height, width, 

4422 channels]` with only Positive Pixel Values. 

4423 max_val: The dynamic range of the images (i.e., the difference between the 

4424 maximum the and minimum allowed values). 

4425 filter_size: Default value 11 (size of gaussian filter). 

4426 filter_sigma: Default value 1.5 (width of gaussian filter). 

4427 k1: Default value 0.01 

4428 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 

4429 it would be better if we took the values in the range of 0 < K2 < 0.4). 

4430 return_index_map: If True returns local SSIM map instead of the global mean. 

4431 

4432 Returns: 

4433 A tensor containing an SSIM value for each image in batch or a tensor 

4434 containing an SSIM value for each pixel for each image in batch if 

4435 return_index_map is True. Returned SSIM values are in range (-1, 1], when 

4436 pixel values are non-negative. Returns a tensor with shape: 

4437 broadcast(img1.shape[:-3], img2.shape[:-3]) or broadcast(img1.shape[:-1], 

4438 img2.shape[:-1]). 

4439 """ 

4440 with ops.name_scope(None, 'SSIM', [img1, img2]): 

4441 # Convert to tensor if needed. 

4442 img1 = ops.convert_to_tensor(img1, name='img1') 

4443 img2 = ops.convert_to_tensor(img2, name='img2') 

4444 # Shape checking. 

4445 _, _, checks = _verify_compatible_image_shapes(img1, img2) 

4446 with ops.control_dependencies(checks): 

4447 img1 = array_ops.identity(img1) 

4448 

4449 # Need to convert the images to float32. Scale max_val accordingly so that 

4450 # SSIM is computed correctly. 

4451 max_val = math_ops.cast(max_val, img1.dtype) 

4452 max_val = convert_image_dtype(max_val, dtypes.float32) 

4453 img1 = convert_image_dtype(img1, dtypes.float32) 

4454 img2 = convert_image_dtype(img2, dtypes.float32) 

4455 ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val, filter_size, 

4456 filter_sigma, k1, k2, 

4457 return_index_map) 

4458 # Compute average over color channels. 

4459 return math_ops.reduce_mean(ssim_per_channel, [-1]) 

4460 

4461 

4462# Default values obtained by Wang et al. 

4463_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333) 

4464 

4465 

4466@tf_export('image.ssim_multiscale') 

4467@dispatch.add_dispatch_support 

4468def ssim_multiscale(img1, 

4469 img2, 

4470 max_val, 

4471 power_factors=_MSSSIM_WEIGHTS, 

4472 filter_size=11, 

4473 filter_sigma=1.5, 

4474 k1=0.01, 

4475 k2=0.03): 

4476 """Computes the MS-SSIM between img1 and img2. 

4477 

4478 This function assumes that `img1` and `img2` are image batches, i.e. the last 

4479 three dimensions are [height, width, channels]. 

4480 

4481 Note: The true SSIM is only defined on grayscale. This function does not 

4482 perform any colorspace transform. (If the input is already YUV, then it will 

4483 compute YUV SSIM average.) 

4484 

4485 Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale 

4486 structural similarity for image quality assessment." Signals, Systems and 

4487 Computers, 2004. 

4488 

4489 Args: 

4490 img1: First image batch with only Positive Pixel Values. 

4491 img2: Second image batch with only Positive Pixel Values. Must have the 

4492 same rank as img1. 

4493 max_val: The dynamic range of the images (i.e., the difference between the 

4494 maximum the and minimum allowed values). 

4495 power_factors: Iterable of weights for each of the scales. The number of 

4496 scales used is the length of the list. Index 0 is the unscaled 

4497 resolution's weight and each increasing scale corresponds to the image 

4498 being downsampled by 2. Defaults to (0.0448, 0.2856, 0.3001, 0.2363, 

4499 0.1333), which are the values obtained in the original paper. 

4500 filter_size: Default value 11 (size of gaussian filter). 

4501 filter_sigma: Default value 1.5 (width of gaussian filter). 

4502 k1: Default value 0.01 

4503 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 

4504 it would be better if we took the values in the range of 0 < K2 < 0.4). 

4505 

4506 Returns: 

4507 A tensor containing an MS-SSIM value for each image in batch. The values 

4508 are in range [0, 1]. Returns a tensor with shape: 

4509 broadcast(img1.shape[:-3], img2.shape[:-3]). 

4510 """ 

4511 with ops.name_scope(None, 'MS-SSIM', [img1, img2]): 

4512 # Convert to tensor if needed. 

4513 img1 = ops.convert_to_tensor(img1, name='img1') 

4514 img2 = ops.convert_to_tensor(img2, name='img2') 

4515 # Shape checking. 

4516 shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2) 

4517 with ops.control_dependencies(checks): 

4518 img1 = array_ops.identity(img1) 

4519 

4520 # Need to convert the images to float32. Scale max_val accordingly so that 

4521 # SSIM is computed correctly. 

4522 max_val = math_ops.cast(max_val, img1.dtype) 

4523 max_val = convert_image_dtype(max_val, dtypes.float32) 

4524 img1 = convert_image_dtype(img1, dtypes.float32) 

4525 img2 = convert_image_dtype(img2, dtypes.float32) 

4526 

4527 imgs = [img1, img2] 

4528 shapes = [shape1, shape2] 

4529 

4530 # img1 and img2 are assumed to be a (multi-dimensional) batch of 

4531 # 3-dimensional images (height, width, channels). `heads` contain the batch 

4532 # dimensions, and `tails` contain the image dimensions. 

4533 heads = [s[:-3] for s in shapes] 

4534 tails = [s[-3:] for s in shapes] 

4535 

4536 divisor = [1, 2, 2, 1] 

4537 divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32) 

4538 

4539 def do_pad(images, remainder): 

4540 padding = array_ops.expand_dims(remainder, -1) 

4541 padding = array_ops.pad(padding, [[1, 0], [1, 0]]) 

4542 return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images] 

4543 

4544 mcs = [] 

4545 for k in range(len(power_factors)): 

4546 with ops.name_scope(None, 'Scale%d' % k, imgs): 

4547 if k > 0: 

4548 # Avg pool takes rank 4 tensors. Flatten leading dimensions. 

4549 flat_imgs = [ 

4550 array_ops.reshape(x, array_ops.concat([[-1], t], 0)) 

4551 for x, t in zip(imgs, tails) 

4552 ] 

4553 

4554 remainder = tails[0] % divisor_tensor 

4555 need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0)) 

4556 # pylint: disable=cell-var-from-loop 

4557 padded = tf_cond.cond(need_padding, 

4558 lambda: do_pad(flat_imgs, remainder), 

4559 lambda: flat_imgs) 

4560 # pylint: enable=cell-var-from-loop 

4561 

4562 downscaled = [ 

4563 nn_ops.avg_pool( 

4564 x, ksize=divisor, strides=divisor, padding='VALID') 

4565 for x in padded 

4566 ] 

4567 tails = [x[1:] for x in array_ops.shape_n(downscaled)] 

4568 imgs = [ 

4569 array_ops.reshape(x, array_ops.concat([h, t], 0)) 

4570 for x, h, t in zip(downscaled, heads, tails) 

4571 ] 

4572 

4573 # Overwrite previous ssim value since we only need the last one. 

4574 ssim_per_channel, cs = _ssim_per_channel( 

4575 *imgs, 

4576 max_val=max_val, 

4577 filter_size=filter_size, 

4578 filter_sigma=filter_sigma, 

4579 k1=k1, 

4580 k2=k2) 

4581 mcs.append(nn_ops.relu(cs)) 

4582 

4583 # Remove the cs score for the last scale. In the MS-SSIM calculation, 

4584 # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p). 

4585 mcs.pop() # Remove the cs score for the last scale. 

4586 mcs_and_ssim = array_ops_stack.stack( 

4587 mcs + [nn_ops.relu(ssim_per_channel)], axis=-1) 

4588 # Take weighted geometric mean across the scale axis. 

4589 ms_ssim = math_ops.reduce_prod( 

4590 math_ops.pow(mcs_and_ssim, power_factors), [-1]) 

4591 

4592 return math_ops.reduce_mean(ms_ssim, [-1]) # Avg over color channels. 

4593 

4594 

4595@tf_export('image.image_gradients') 

4596@dispatch.add_dispatch_support 

4597def image_gradients(image): 

4598 """Returns image gradients (dy, dx) for each color channel. 

4599 

4600 Both output tensors have the same shape as the input: [batch_size, h, w, 

4601 d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in 

4602 location (x, y). That means that dy will always have zeros in the last row, 

4603 and dx will always have zeros in the last column. 

4604 

4605 Usage Example: 

4606 ```python 

4607 BATCH_SIZE = 1 

4608 IMAGE_HEIGHT = 5 

4609 IMAGE_WIDTH = 5 

4610 CHANNELS = 1 

4611 image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS, 

4612 delta=1, dtype=tf.float32), 

4613 shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS)) 

4614 dy, dx = tf.image.image_gradients(image) 

4615 print(image[0, :,:,0]) 

4616 tf.Tensor( 

4617 [[ 0. 1. 2. 3. 4.] 

4618 [ 5. 6. 7. 8. 9.] 

4619 [10. 11. 12. 13. 14.] 

4620 [15. 16. 17. 18. 19.] 

4621 [20. 21. 22. 23. 24.]], shape=(5, 5), dtype=float32) 

4622 print(dy[0, :,:,0]) 

4623 tf.Tensor( 

4624 [[5. 5. 5. 5. 5.] 

4625 [5. 5. 5. 5. 5.] 

4626 [5. 5. 5. 5. 5.] 

4627 [5. 5. 5. 5. 5.] 

4628 [0. 0. 0. 0. 0.]], shape=(5, 5), dtype=float32) 

4629 print(dx[0, :,:,0]) 

4630 tf.Tensor( 

4631 [[1. 1. 1. 1. 0.] 

4632 [1. 1. 1. 1. 0.] 

4633 [1. 1. 1. 1. 0.] 

4634 [1. 1. 1. 1. 0.] 

4635 [1. 1. 1. 1. 0.]], shape=(5, 5), dtype=float32) 

4636 ``` 

4637 

4638 Args: 

4639 image: Tensor with shape [batch_size, h, w, d]. 

4640 

4641 Returns: 

4642 Pair of tensors (dy, dx) holding the vertical and horizontal image 

4643 gradients (1-step finite difference). 

4644 

4645 Raises: 

4646 ValueError: If `image` is not a 4D tensor. 

4647 """ 

4648 if image.get_shape().ndims != 4: 

4649 raise ValueError('image_gradients expects a 4D tensor ' 

4650 '[batch_size, h, w, d], not {}.'.format(image.get_shape())) 

4651 image_shape = array_ops.shape(image) 

4652 batch_size, height, width, depth = array_ops_stack.unstack(image_shape) 

4653 dy = image[:, 1:, :, :] - image[:, :-1, :, :] 

4654 dx = image[:, :, 1:, :] - image[:, :, :-1, :] 

4655 

4656 # Return tensors with same size as original image by concatenating 

4657 # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y). 

4658 shape = array_ops_stack.stack([batch_size, 1, width, depth]) 

4659 dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1) 

4660 dy = array_ops.reshape(dy, image_shape) 

4661 

4662 shape = array_ops_stack.stack([batch_size, height, 1, depth]) 

4663 dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2) 

4664 dx = array_ops.reshape(dx, image_shape) 

4665 

4666 return dy, dx 

4667 

4668 

4669@tf_export('image.sobel_edges') 

4670@dispatch.add_dispatch_support 

4671def sobel_edges(image): 

4672 """Returns a tensor holding Sobel edge maps. 

4673 

4674 Example usage: 

4675 

4676 For general usage, `image` would be loaded from a file as below: 

4677 

4678 ```python 

4679 image_bytes = tf.io.read_file(path_to_image_file) 

4680 image = tf.image.decode_image(image_bytes) 

4681 image = tf.cast(image, tf.float32) 

4682 image = tf.expand_dims(image, 0) 

4683 ``` 

4684 But for demo purposes, we are using randomly generated values for `image`: 

4685 

4686 >>> image = tf.random.uniform( 

4687 ... maxval=255, shape=[1, 28, 28, 3], dtype=tf.float32) 

4688 >>> sobel = tf.image.sobel_edges(image) 

4689 >>> sobel_y = np.asarray(sobel[0, :, :, :, 0]) # sobel in y-direction 

4690 >>> sobel_x = np.asarray(sobel[0, :, :, :, 1]) # sobel in x-direction 

4691 

4692 For displaying the sobel results, PIL's [Image Module]( 

4693 https://pillow.readthedocs.io/en/stable/reference/Image.html) can be used: 

4694 

4695 ```python 

4696 # Display edge maps for the first channel (at index 0) 

4697 Image.fromarray(sobel_y[..., 0] / 4 + 0.5).show() 

4698 Image.fromarray(sobel_x[..., 0] / 4 + 0.5).show() 

4699 ``` 

4700 

4701 Args: 

4702 image: Image tensor with shape [batch_size, h, w, d] and type float32 or 

4703 float64. The image(s) must be 2x2 or larger. 

4704 

4705 Returns: 

4706 Tensor holding edge maps for each channel. Returns a tensor with shape 

4707 [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]], 

4708 [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter. 

4709 """ 

4710 # Define vertical and horizontal Sobel filters. 

4711 static_image_shape = image.get_shape() 

4712 image_shape = array_ops.shape(image) 

4713 kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]], 

4714 [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]] 

4715 num_kernels = len(kernels) 

4716 kernels = np.transpose(np.asarray(kernels), (1, 2, 0)) 

4717 kernels = np.expand_dims(kernels, -2) 

4718 kernels_tf = constant_op.constant(kernels, dtype=image.dtype) 

4719 

4720 kernels_tf = array_ops.tile( 

4721 kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters') 

4722 

4723 # Use depth-wise convolution to calculate edge maps per channel. 

4724 pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]] 

4725 padded = array_ops.pad(image, pad_sizes, mode='REFLECT') 

4726 

4727 # Output tensor has shape [batch_size, h, w, d * num_kernels]. 

4728 strides = [1, 1, 1, 1] 

4729 output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID') 

4730 

4731 # Reshape to [batch_size, h, w, d, num_kernels]. 

4732 shape = array_ops.concat([image_shape, [num_kernels]], 0) 

4733 output = array_ops.reshape(output, shape=shape) 

4734 output.set_shape(static_image_shape.concatenate([num_kernels])) 

4735 return output 

4736 

4737 

4738def resize_bicubic(images, 

4739 size, 

4740 align_corners=False, 

4741 name=None, 

4742 half_pixel_centers=False): 

4743 return gen_image_ops.resize_bicubic( 

4744 images=images, 

4745 size=size, 

4746 align_corners=align_corners, 

4747 half_pixel_centers=half_pixel_centers, 

4748 name=name) 

4749 

4750 

4751def resize_bilinear(images, 

4752 size, 

4753 align_corners=False, 

4754 name=None, 

4755 half_pixel_centers=False): 

4756 return gen_image_ops.resize_bilinear( 

4757 images=images, 

4758 size=size, 

4759 align_corners=align_corners, 

4760 half_pixel_centers=half_pixel_centers, 

4761 name=name) 

4762 

4763 

4764def resize_nearest_neighbor(images, 

4765 size, 

4766 align_corners=False, 

4767 name=None, 

4768 half_pixel_centers=False): 

4769 return gen_image_ops.resize_nearest_neighbor( 

4770 images=images, 

4771 size=size, 

4772 align_corners=align_corners, 

4773 half_pixel_centers=half_pixel_centers, 

4774 name=name) 

4775 

4776 

4777resize_area_deprecation = deprecation.deprecated( 

4778 date=None, 

4779 instructions=( 

4780 'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.')) 

4781tf_export(v1=['image.resize_area'])( 

4782 resize_area_deprecation( 

4783 dispatch.add_dispatch_support(gen_image_ops.resize_area))) 

4784 

4785resize_bicubic_deprecation = deprecation.deprecated( 

4786 date=None, 

4787 instructions=( 

4788 'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.')) 

4789tf_export(v1=['image.resize_bicubic'])( 

4790 dispatch.add_dispatch_support(resize_bicubic_deprecation(resize_bicubic))) 

4791 

4792resize_bilinear_deprecation = deprecation.deprecated( 

4793 date=None, 

4794 instructions=( 

4795 'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.')) 

4796tf_export(v1=['image.resize_bilinear'])( 

4797 dispatch.add_dispatch_support(resize_bilinear_deprecation(resize_bilinear))) 

4798 

4799resize_nearest_neighbor_deprecation = deprecation.deprecated( 

4800 date=None, 

4801 instructions=( 

4802 'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` ' 

4803 'instead.')) 

4804tf_export(v1=['image.resize_nearest_neighbor'])( 

4805 dispatch.add_dispatch_support( 

4806 resize_nearest_neighbor_deprecation(resize_nearest_neighbor))) 

4807 

4808 

4809@tf_export('image.crop_and_resize', v1=[]) 

4810@dispatch.add_dispatch_support 

4811def crop_and_resize_v2(image, 

4812 boxes, 

4813 box_indices, 

4814 crop_size, 

4815 method='bilinear', 

4816 extrapolation_value=.0, 

4817 name=None): 

4818 """Extracts crops from the input image tensor and resizes them. 

4819 

4820 Extracts crops from the input image tensor and resizes them using bilinear 

4821 sampling or nearest neighbor sampling (possibly with aspect ratio change) to a 

4822 common output size specified by `crop_size`. This is more general than the 

4823 `crop_to_bounding_box` op which extracts a fixed size slice from the input 

4824 image and does not allow resizing or aspect ratio change. The crops occur 

4825 first and then the resize. 

4826 

4827 Returns a tensor with `crops` from the input `image` at positions defined at 

4828 the bounding box locations in `boxes`. The cropped boxes are all resized (with 

4829 bilinear or nearest neighbor interpolation) to a fixed 

4830 `size = [crop_height, crop_width]`. The result is a 4-D tensor 

4831 `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned. 

4832 In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical 

4833 results to using `tf.compat.v1.image.resize_bilinear()` or 

4834 `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method` 

4835 argument) with 

4836 `align_corners=True`. 

4837 

4838 Args: 

4839 image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. 

4840 Both `image_height` and `image_width` need to be positive. 

4841 boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor 

4842 specifies the coordinates of a box in the `box_ind[i]` image and is 

4843 specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized 

4844 coordinate value of `y` is mapped to the image coordinate at `y * 

4845 (image_height - 1)`, so as the `[0, 1]` interval of normalized image 

4846 height is mapped to `[0, image_height - 1]` in image height coordinates. 

4847 We do allow `y1` > `y2`, in which case the sampled crop is an up-down 

4848 flipped version of the original image. The width dimension is treated 

4849 similarly. Normalized coordinates outside the `[0, 1]` range are allowed, 

4850 in which case we use `extrapolation_value` to extrapolate the input image 

4851 values. 

4852 box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, 

4853 batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box 

4854 refers to. 

4855 crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. 

4856 All cropped image patches are resized to this size. The aspect ratio of 

4857 the image content is not preserved. Both `crop_height` and `crop_width` 

4858 need to be positive. 

4859 method: An optional string specifying the sampling method for resizing. It 

4860 can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`. 

4861 Currently two sampling methods are supported: Bilinear and Nearest 

4862 Neighbor. 

4863 extrapolation_value: An optional `float`. Defaults to `0.0`. Value used for 

4864 extrapolation, when applicable. 

4865 name: A name for the operation (optional). 

4866 

4867 Returns: 

4868 A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. 

4869 

4870 Usage example: 

4871 

4872 >>> BATCH_SIZE = 1 

4873 >>> NUM_BOXES = 5 

4874 >>> IMAGE_HEIGHT = 256 

4875 >>> IMAGE_WIDTH = 256 

4876 >>> CHANNELS = 3 

4877 >>> CROP_SIZE = (24, 24) 

4878 

4879 >>> image = tf.random.normal(shape=( 

4880 ... BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS) ) 

4881 >>> boxes = tf.random.uniform(shape=(NUM_BOXES, 4)) 

4882 >>> box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0, 

4883 ... maxval=BATCH_SIZE, dtype=tf.int32) 

4884 >>> output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE) 

4885 >>> output.shape 

4886 TensorShape([5, 24, 24, 3]) 

4887 

4888 Example with linear interpolation: 

4889 

4890 >>> image = np.arange(0, 18, 2).astype('float32').reshape(3, 3) 

4891 >>> result = tf.image.crop_and_resize( 

4892 ... image[None, :, :, None], 

4893 ... np.asarray([[0.5,0.5,1,1]]), [0], [3, 3], method='bilinear') 

4894 >>> result[0][:, :, 0] 

4895 <tf.Tensor: shape=(3, 3), dtype=float32, numpy= 

4896 array([[ 8., 9., 10.], 

4897 [11., 12., 13.], 

4898 [14., 15., 16.]], dtype=float32)> 

4899 

4900 Example with nearest interpolation: 

4901 

4902 >>> image = np.arange(0, 18, 2).astype('float32').reshape(3, 3) 

4903 >>> result = tf.image.crop_and_resize( 

4904 ... image[None, :, :, None], 

4905 ... np.asarray([[0.5,0.5,1,1]]), [0], [3, 3], method='nearest') 

4906 >>> result[0][:, :, 0] 

4907 <tf.Tensor: shape=(3, 3), dtype=float32, numpy= 

4908 array([[ 8., 10., 10.], 

4909 [14., 16., 16.], 

4910 [14., 16., 16.]], dtype=float32)> 

4911 

4912 

4913 """ 

4914 return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size, 

4915 method, extrapolation_value, name) 

4916 

4917 

4918@tf_export(v1=['image.crop_and_resize']) 

4919@dispatch.add_dispatch_support 

4920@deprecation.deprecated_args(None, 

4921 'box_ind is deprecated, use box_indices instead', 

4922 'box_ind') 

4923def crop_and_resize_v1( # pylint: disable=missing-docstring 

4924 image, 

4925 boxes, 

4926 box_ind=None, 

4927 crop_size=None, 

4928 method='bilinear', 

4929 extrapolation_value=0, 

4930 name=None, 

4931 box_indices=None): 

4932 box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices, 

4933 'box_ind', box_ind) 

4934 return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method, 

4935 extrapolation_value, name) 

4936 

4937 

4938crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__ 

4939 

4940 

4941@tf_export(v1=['image.extract_glimpse']) 

4942@dispatch.add_dispatch_support 

4943def extract_glimpse( 

4944 input, # pylint: disable=redefined-builtin 

4945 size, 

4946 offsets, 

4947 centered=True, 

4948 normalized=True, 

4949 uniform_noise=True, 

4950 name=None): 

4951 """Extracts a glimpse from the input tensor. 

4952 

4953 Returns a set of windows called glimpses extracted at location 

4954 `offsets` from the input tensor. If the windows only partially 

4955 overlaps the inputs, the non-overlapping areas will be filled with 

4956 random noise. 

4957 

4958 The result is a 4-D tensor of shape `[batch_size, glimpse_height, 

4959 glimpse_width, channels]`. The channels and batch dimensions are the 

4960 same as that of the input tensor. The height and width of the output 

4961 windows are specified in the `size` parameter. 

4962 

4963 The argument `normalized` and `centered` controls how the windows are built: 

4964 

4965 * If the coordinates are normalized but not centered, 0.0 and 1.0 

4966 correspond to the minimum and maximum of each height and width 

4967 dimension. 

4968 * If the coordinates are both normalized and centered, they range from 

4969 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper 

4970 left corner, the lower right corner is located at (1.0, 1.0) and the 

4971 center is at (0, 0). 

4972 * If the coordinates are not normalized they are interpreted as 

4973 numbers of pixels. 

4974 

4975 Usage Example: 

4976 

4977 >>> x = [[[[0.0], 

4978 ... [1.0], 

4979 ... [2.0]], 

4980 ... [[3.0], 

4981 ... [4.0], 

4982 ... [5.0]], 

4983 ... [[6.0], 

4984 ... [7.0], 

4985 ... [8.0]]]] 

4986 >>> tf.compat.v1.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]], 

4987 ... centered=False, normalized=False) 

4988 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= 

4989 array([[[[0.], 

4990 [1.]], 

4991 [[3.], 

4992 [4.]]]], dtype=float32)> 

4993 

4994 Args: 

4995 input: A `Tensor` of type `float32`. A 4-D float tensor of shape 

4996 `[batch_size, height, width, channels]`. 

4997 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the 

4998 size of the glimpses to extract. The glimpse height must be specified 

4999 first, following by the glimpse width. 

5000 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape 

5001 `[batch_size, 2]` containing the y, x locations of the center of each 

5002 window. 

5003 centered: An optional `bool`. Defaults to `True`. indicates if the offset 

5004 coordinates are centered relative to the image, in which case the (0, 0) 

5005 offset is relative to the center of the input images. If false, the (0,0) 

5006 offset corresponds to the upper left corner of the input images. 

5007 normalized: An optional `bool`. Defaults to `True`. indicates if the offset 

5008 coordinates are normalized. 

5009 uniform_noise: An optional `bool`. Defaults to `True`. indicates if the 

5010 noise should be generated using a uniform distribution or a Gaussian 

5011 distribution. 

5012 name: A name for the operation (optional). 

5013 

5014 Returns: 

5015 A `Tensor` of type `float32`. 

5016 """ 

5017 return gen_image_ops.extract_glimpse( 

5018 input=input, 

5019 size=size, 

5020 offsets=offsets, 

5021 centered=centered, 

5022 normalized=normalized, 

5023 uniform_noise=uniform_noise, 

5024 name=name) 

5025 

5026 

5027@tf_export('image.extract_glimpse', v1=[]) 

5028@dispatch.add_dispatch_support 

5029def extract_glimpse_v2( 

5030 input, # pylint: disable=redefined-builtin 

5031 size, 

5032 offsets, 

5033 centered=True, 

5034 normalized=True, 

5035 noise='uniform', 

5036 name=None): 

5037 """Extracts a glimpse from the input tensor. 

5038 

5039 Returns a set of windows called glimpses extracted at location 

5040 `offsets` from the input tensor. If the windows only partially 

5041 overlaps the inputs, the non-overlapping areas will be filled with 

5042 random noise. 

5043 

5044 The result is a 4-D tensor of shape `[batch_size, glimpse_height, 

5045 glimpse_width, channels]`. The channels and batch dimensions are the 

5046 same as that of the input tensor. The height and width of the output 

5047 windows are specified in the `size` parameter. 

5048 

5049 The argument `normalized` and `centered` controls how the windows are built: 

5050 

5051 * If the coordinates are normalized but not centered, 0.0 and 1.0 

5052 correspond to the minimum and maximum of each height and width 

5053 dimension. 

5054 * If the coordinates are both normalized and centered, they range from 

5055 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper 

5056 left corner, the lower right corner is located at (1.0, 1.0) and the 

5057 center is at (0, 0). 

5058 * If the coordinates are not normalized they are interpreted as 

5059 numbers of pixels. 

5060 

5061 Usage Example: 

5062 

5063 >>> x = [[[[0.0], 

5064 ... [1.0], 

5065 ... [2.0]], 

5066 ... [[3.0], 

5067 ... [4.0], 

5068 ... [5.0]], 

5069 ... [[6.0], 

5070 ... [7.0], 

5071 ... [8.0]]]] 

5072 >>> tf.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]], 

5073 ... centered=False, normalized=False) 

5074 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= 

5075 array([[[[4.], 

5076 [5.]], 

5077 [[7.], 

5078 [8.]]]], dtype=float32)> 

5079 

5080 Args: 

5081 input: A `Tensor` of type `float32`. A 4-D float tensor of shape 

5082 `[batch_size, height, width, channels]`. 

5083 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the 

5084 size of the glimpses to extract. The glimpse height must be specified 

5085 first, following by the glimpse width. 

5086 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape 

5087 `[batch_size, 2]` containing the y, x locations of the center of each 

5088 window. 

5089 centered: An optional `bool`. Defaults to `True`. indicates if the offset 

5090 coordinates are centered relative to the image, in which case the (0, 0) 

5091 offset is relative to the center of the input images. If false, the (0,0) 

5092 offset corresponds to the upper left corner of the input images. 

5093 normalized: An optional `bool`. Defaults to `True`. indicates if the offset 

5094 coordinates are normalized. 

5095 noise: An optional `string`. Defaults to `uniform`. indicates if the noise 

5096 should be `uniform` (uniform distribution), `gaussian` (gaussian 

5097 distribution), or `zero` (zero padding). 

5098 name: A name for the operation (optional). 

5099 

5100 Returns: 

5101 A `Tensor` of type `float32`. 

5102 """ 

5103 return gen_image_ops.extract_glimpse_v2( 

5104 input=input, 

5105 size=size, 

5106 offsets=offsets, 

5107 centered=centered, 

5108 normalized=normalized, 

5109 noise=noise, 

5110 uniform_noise=False, 

5111 name=name) 

5112 

5113 

5114@tf_export('image.combined_non_max_suppression') 

5115@dispatch.add_dispatch_support 

5116def combined_non_max_suppression(boxes, 

5117 scores, 

5118 max_output_size_per_class, 

5119 max_total_size, 

5120 iou_threshold=0.5, 

5121 score_threshold=float('-inf'), 

5122 pad_per_class=False, 

5123 clip_boxes=True, 

5124 name=None): 

5125 """Greedily selects a subset of bounding boxes in descending order of score. 

5126 

5127 This operation performs non_max_suppression on the inputs per batch, across 

5128 all classes. 

5129 Prunes away boxes that have high intersection-over-union (IOU) overlap 

5130 with previously selected boxes. Bounding boxes are supplied as 

5131 [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any 

5132 diagonal pair of box corners and the coordinates can be provided as normalized 

5133 (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm 

5134 is agnostic to where the origin is in the coordinate system. Also note that 

5135 this algorithm is invariant to orthogonal transformations and translations 

5136 of the coordinate system; thus translating or reflections of the coordinate 

5137 system result in the same boxes being selected by the algorithm. 

5138 The output of this operation is the final boxes, scores and classes tensor 

5139 returned after performing non_max_suppression. 

5140 

5141 Args: 

5142 boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q` 

5143 is 1 then same boxes are used for all classes otherwise, if `q` is equal 

5144 to number of classes, class-specific boxes are used. 

5145 scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]` 

5146 representing a single score corresponding to each box (each row of boxes). 

5147 max_output_size_per_class: A scalar integer `Tensor` representing the 

5148 maximum number of boxes to be selected by non-max suppression per class 

5149 max_total_size: A int32 scalar representing maximum number of boxes retained 

5150 over all classes. Note that setting this value to a large number may 

5151 result in OOM error depending on the system workload. 

5152 iou_threshold: A float representing the threshold for deciding whether boxes 

5153 overlap too much with respect to IOU. 

5154 score_threshold: A float representing the threshold for deciding when to 

5155 remove boxes based on score. 

5156 pad_per_class: If false, the output nmsed boxes, scores and classes are 

5157 padded/clipped to `max_total_size`. If true, the output nmsed boxes, 

5158 scores and classes are padded to be of length 

5159 `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in 

5160 which case it is clipped to `max_total_size`. Defaults to false. 

5161 clip_boxes: If true, the coordinates of output nmsed boxes will be clipped 

5162 to [0, 1]. If false, output the box coordinates as it is. Defaults to 

5163 true. 

5164 name: A name for the operation (optional). 

5165 

5166 Returns: 

5167 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor 

5168 containing the non-max suppressed boxes. 

5169 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing 

5170 the scores for the boxes. 

5171 'nmsed_classes': A [batch_size, max_detections] float32 tensor 

5172 containing the class for boxes. 

5173 'valid_detections': A [batch_size] int32 tensor indicating the number of 

5174 valid detections per batch item. Only the top valid_detections[i] entries 

5175 in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the 

5176 entries are zero paddings. 

5177 """ 

5178 with ops.name_scope(name, 'combined_non_max_suppression'): 

5179 iou_threshold = ops.convert_to_tensor( 

5180 iou_threshold, dtype=dtypes.float32, name='iou_threshold') 

5181 score_threshold = ops.convert_to_tensor( 

5182 score_threshold, dtype=dtypes.float32, name='score_threshold') 

5183 

5184 # Convert `max_total_size` to tensor *without* setting the `dtype` param. 

5185 # This allows us to catch `int32` overflow case with `max_total_size` 

5186 # whose expected dtype is `int32` by the op registration. Any number within 

5187 # `int32` will get converted to `int32` tensor. Anything larger will get 

5188 # converted to `int64`. Passing in `int64` for `max_total_size` to the op 

5189 # will throw dtype mismatch exception. 

5190 # TODO(b/173251596): Once there is a more general solution to warn against 

5191 # int overflow conversions, revisit this check. 

5192 max_total_size = ops.convert_to_tensor(max_total_size) 

5193 

5194 return gen_image_ops.combined_non_max_suppression( 

5195 boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, 

5196 score_threshold, pad_per_class, clip_boxes) 

5197 

5198 

5199def _bbox_overlap(boxes_a, boxes_b): 

5200 """Calculates the overlap (iou - intersection over union) between boxes_a and boxes_b. 

5201 

5202 Args: 

5203 boxes_a: a tensor with a shape of [batch_size, N, 4]. N is the number of 

5204 boxes per image. The last dimension is the pixel coordinates in 

5205 [ymin, xmin, ymax, xmax] form. 

5206 boxes_b: a tensor with a shape of [batch_size, M, 4]. M is the number of 

5207 boxes. The last dimension is the pixel coordinates in 

5208 [ymin, xmin, ymax, xmax] form. 

5209 Returns: 

5210 intersection_over_union: a tensor with as a shape of [batch_size, N, M], 

5211 representing the ratio of intersection area over union area (IoU) between 

5212 two boxes 

5213 """ 

5214 with ops.name_scope('bbox_overlap'): 

5215 a_y_min, a_x_min, a_y_max, a_x_max = array_ops.split( 

5216 value=boxes_a, num_or_size_splits=4, axis=2) 

5217 b_y_min, b_x_min, b_y_max, b_x_max = array_ops.split( 

5218 value=boxes_b, num_or_size_splits=4, axis=2) 

5219 

5220 # Calculates the intersection area. 

5221 i_xmin = math_ops.maximum( 

5222 a_x_min, array_ops.transpose(b_x_min, [0, 2, 1])) 

5223 i_xmax = math_ops.minimum( 

5224 a_x_max, array_ops.transpose(b_x_max, [0, 2, 1])) 

5225 i_ymin = math_ops.maximum( 

5226 a_y_min, array_ops.transpose(b_y_min, [0, 2, 1])) 

5227 i_ymax = math_ops.minimum( 

5228 a_y_max, array_ops.transpose(b_y_max, [0, 2, 1])) 

5229 i_area = math_ops.maximum( 

5230 (i_xmax - i_xmin), 0) * math_ops.maximum((i_ymax - i_ymin), 0) 

5231 

5232 # Calculates the union area. 

5233 a_area = (a_y_max - a_y_min) * (a_x_max - a_x_min) 

5234 b_area = (b_y_max - b_y_min) * (b_x_max - b_x_min) 

5235 EPSILON = 1e-8 

5236 # Adds a small epsilon to avoid divide-by-zero. 

5237 u_area = a_area + array_ops.transpose(b_area, [0, 2, 1]) - i_area + EPSILON 

5238 

5239 # Calculates IoU. 

5240 intersection_over_union = i_area / u_area 

5241 

5242 return intersection_over_union 

5243 

5244 

5245def _self_suppression(iou, _, iou_sum, iou_threshold): 

5246 """Suppress boxes in the same tile. 

5247 

5248 Compute boxes that cannot be suppressed by others (i.e., 

5249 can_suppress_others), and then use them to suppress boxes in the same tile. 

5250 

5251 Args: 

5252 iou: a tensor of shape [batch_size, num_boxes_with_padding] representing 

5253 intersection over union. 

5254 iou_sum: a scalar tensor. 

5255 iou_threshold: a scalar tensor. 

5256 

5257 Returns: 

5258 iou_suppressed: a tensor of shape [batch_size, num_boxes_with_padding]. 

5259 iou_diff: a scalar tensor representing whether any box is supressed in 

5260 this step. 

5261 iou_sum_new: a scalar tensor of shape [batch_size] that represents 

5262 the iou sum after suppression. 

5263 iou_threshold: a scalar tensor. 

5264 """ 

5265 batch_size = array_ops.shape(iou)[0] 

5266 can_suppress_others = math_ops.cast( 

5267 array_ops.reshape( 

5268 math_ops.reduce_max(iou, 1) < iou_threshold, [batch_size, -1, 1]), 

5269 iou.dtype) 

5270 iou_after_suppression = array_ops.reshape( 

5271 math_ops.cast( 

5272 math_ops.reduce_max(can_suppress_others * iou, 1) < iou_threshold, 

5273 iou.dtype), 

5274 [batch_size, -1, 1]) * iou 

5275 iou_sum_new = math_ops.reduce_sum(iou_after_suppression, [1, 2]) 

5276 return [ 

5277 iou_after_suppression, 

5278 math_ops.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new, 

5279 iou_threshold 

5280 ] 

5281 

5282 

5283def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, tile_size): 

5284 """Suppress boxes between different tiles. 

5285 

5286 Args: 

5287 boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4] 

5288 box_slice: a tensor of shape [batch_size, tile_size, 4] 

5289 iou_threshold: a scalar tensor 

5290 inner_idx: a scalar tensor representing the tile index of the tile 

5291 that is used to supress box_slice 

5292 tile_size: an integer representing the number of boxes in a tile 

5293 

5294 Returns: 

5295 boxes: unchanged boxes as input 

5296 box_slice_after_suppression: box_slice after suppression 

5297 iou_threshold: unchanged 

5298 """ 

5299 batch_size = array_ops.shape(boxes)[0] 

5300 new_slice = array_ops.slice( 

5301 boxes, [0, inner_idx * tile_size, 0], 

5302 [batch_size, tile_size, 4]) 

5303 iou = _bbox_overlap(new_slice, box_slice) 

5304 box_slice_after_suppression = array_ops.expand_dims( 

5305 math_ops.cast(math_ops.reduce_all(iou < iou_threshold, [1]), 

5306 box_slice.dtype), 

5307 2) * box_slice 

5308 return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1 

5309 

5310 

5311def _suppression_loop_body(boxes, iou_threshold, output_size, idx, tile_size): 

5312 """Process boxes in the range [idx*tile_size, (idx+1)*tile_size). 

5313 

5314 Args: 

5315 boxes: a tensor with a shape of [batch_size, anchors, 4]. 

5316 iou_threshold: a float representing the threshold for deciding whether boxes 

5317 overlap too much with respect to IOU. 

5318 output_size: an int32 tensor of size [batch_size]. Representing the number 

5319 of selected boxes for each batch. 

5320 idx: an integer scalar representing induction variable. 

5321 tile_size: an integer representing the number of boxes in a tile 

5322 

5323 Returns: 

5324 boxes: updated boxes. 

5325 iou_threshold: pass down iou_threshold to the next iteration. 

5326 output_size: the updated output_size. 

5327 idx: the updated induction variable. 

5328 """ 

5329 with ops.name_scope('suppression_loop_body'): 

5330 num_tiles = array_ops.shape(boxes)[1] // tile_size 

5331 batch_size = array_ops.shape(boxes)[0] 

5332 

5333 def cross_suppression_func(boxes, box_slice, iou_threshold, inner_idx): 

5334 return _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, 

5335 tile_size) 

5336 

5337 # Iterates over tiles that can possibly suppress the current tile. 

5338 box_slice = array_ops.slice(boxes, [0, idx * tile_size, 0], 

5339 [batch_size, tile_size, 4]) 

5340 _, box_slice, _, _ = while_loop.while_loop( 

5341 lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, 

5342 cross_suppression_func, 

5343 [boxes, box_slice, iou_threshold, 

5344 constant_op.constant(0)]) 

5345 

5346 # Iterates over the current tile to compute self-suppression. 

5347 iou = _bbox_overlap(box_slice, box_slice) 

5348 mask = array_ops.expand_dims( 

5349 array_ops.reshape( 

5350 math_ops.range(tile_size), [1, -1]) > array_ops.reshape( 

5351 math_ops.range(tile_size), [-1, 1]), 0) 

5352 iou *= math_ops.cast( 

5353 math_ops.logical_and(mask, iou >= iou_threshold), iou.dtype) 

5354 suppressed_iou, _, _, _ = while_loop.while_loop( 

5355 lambda _iou, loop_condition, _iou_sum, _: loop_condition, 

5356 _self_suppression, [ 

5357 iou, 

5358 constant_op.constant(True), 

5359 math_ops.reduce_sum(iou, [1, 2]), iou_threshold 

5360 ]) 

5361 suppressed_box = math_ops.reduce_sum(suppressed_iou, 1) > 0 

5362 box_slice *= array_ops.expand_dims( 

5363 1.0 - math_ops.cast(suppressed_box, box_slice.dtype), 2) 

5364 

5365 # Uses box_slice to update the input boxes. 

5366 mask = array_ops.reshape( 

5367 math_ops.cast( 

5368 math_ops.equal(math_ops.range(num_tiles), idx), boxes.dtype), 

5369 [1, -1, 1, 1]) 

5370 boxes = array_ops.tile(array_ops.expand_dims( 

5371 box_slice, [1]), [1, num_tiles, 1, 1]) * mask + array_ops.reshape( 

5372 boxes, [batch_size, num_tiles, tile_size, 4]) * (1 - mask) 

5373 boxes = array_ops.reshape(boxes, [batch_size, -1, 4]) 

5374 

5375 # Updates output_size. 

5376 output_size += math_ops.reduce_sum( 

5377 math_ops.cast( 

5378 math_ops.reduce_any(box_slice > 0, [2]), dtypes.int32), [1]) 

5379 return boxes, iou_threshold, output_size, idx + 1 

5380 

5381 

5382@tf_export('image.non_max_suppression_padded') 

5383@dispatch.add_dispatch_support 

5384def non_max_suppression_padded(boxes, 

5385 scores, 

5386 max_output_size, 

5387 iou_threshold=0.5, 

5388 score_threshold=float('-inf'), 

5389 pad_to_max_output_size=False, 

5390 name=None, 

5391 sorted_input=False, 

5392 canonicalized_coordinates=False, 

5393 tile_size=512): 

5394 """Greedily selects a subset of bounding boxes in descending order of score. 

5395 

5396 Performs algorithmically equivalent operation to tf.image.non_max_suppression, 

5397 with the addition of an optional parameter which zero-pads the output to 

5398 be of size `max_output_size`. 

5399 The output of this operation is a tuple containing the set of integers 

5400 indexing into the input collection of bounding boxes representing the selected 

5401 boxes and the number of valid indices in the index set. The bounding box 

5402 coordinates corresponding to the selected indices can then be obtained using 

5403 the `tf.slice` and `tf.gather` operations. For example: 

5404 ```python 

5405 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded( 

5406 boxes, scores, max_output_size, iou_threshold, 

5407 score_threshold, pad_to_max_output_size=True) 

5408 selected_indices = tf.slice( 

5409 selected_indices_padded, tf.constant([0]), num_valid) 

5410 selected_boxes = tf.gather(boxes, selected_indices) 

5411 ``` 

5412 

5413 Args: 

5414 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4]. 

5415 Dimensions except the last two are batch dimensions. 

5416 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes]. 

5417 max_output_size: a scalar integer `Tensor` representing the maximum number 

5418 of boxes to be selected by non max suppression. Note that setting this 

5419 value to a large number may result in OOM error depending on the system 

5420 workload. 

5421 iou_threshold: a float representing the threshold for deciding whether boxes 

5422 overlap too much with respect to IoU (intersection over union). 

5423 score_threshold: a float representing the threshold for box scores. Boxes 

5424 with a score that is not larger than this threshold will be suppressed. 

5425 pad_to_max_output_size: whether to pad the output idx to max_output_size. 

5426 Must be set to True when the input is a batch of images. 

5427 name: name of operation. 

5428 sorted_input: a boolean indicating whether the input boxes and scores 

5429 are sorted in descending order by the score. 

5430 canonicalized_coordinates: if box coordinates are given as 

5431 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant 

5432 computation to canonicalize box coordinates. 

5433 tile_size: an integer representing the number of boxes in a tile, i.e., 

5434 the maximum number of boxes per image that can be used to suppress other 

5435 boxes in parallel; larger tile_size means larger parallelism and 

5436 potentially more redundant work. 

5437 Returns: 

5438 idx: a tensor with a shape of [..., num_boxes] representing the 

5439 indices selected by non-max suppression. The leading dimensions 

5440 are the batch dimensions of the input boxes. All numbers are within 

5441 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i] 

5442 indices (i.e., idx[i][:num_valid[i]]) are valid. 

5443 num_valid: a tensor of rank 0 or higher with a shape of [...] 

5444 representing the number of valid indices in idx. Its dimensions are the 

5445 batch dimensions of the input boxes. 

5446 Raises: 

5447 ValueError: When set pad_to_max_output_size to False for batched input. 

5448 """ 

5449 with ops.name_scope(name, 'non_max_suppression_padded'): 

5450 if not pad_to_max_output_size: 

5451 # pad_to_max_output_size may be set to False only when the shape of 

5452 # boxes is [num_boxes, 4], i.e., a single image. We make best effort to 

5453 # detect violations at compile time. If `boxes` does not have a static 

5454 # rank, the check allows computation to proceed. 

5455 if boxes.get_shape().rank is not None and boxes.get_shape().rank > 2: 

5456 raise ValueError("'pad_to_max_output_size' (value {}) must be True for " 

5457 'batched input'.format(pad_to_max_output_size)) 

5458 if name is None: 

5459 name = '' 

5460 idx, num_valid = non_max_suppression_padded_v2( 

5461 boxes, scores, max_output_size, iou_threshold, score_threshold, 

5462 sorted_input, canonicalized_coordinates, tile_size) 

5463 # def_function.function seems to lose shape information, so set it here. 

5464 if not pad_to_max_output_size: 

5465 idx = idx[0, :num_valid] 

5466 else: 

5467 batch_dims = array_ops.concat([ 

5468 array_ops.shape(boxes)[:-2], 

5469 array_ops.expand_dims(max_output_size, 0) 

5470 ], 0) 

5471 idx = array_ops.reshape(idx, batch_dims) 

5472 return idx, num_valid 

5473 

5474 

5475# TODO(b/158709815): Improve performance regression due to 

5476# def_function.function. 

5477@def_function.function( 

5478 experimental_implements='non_max_suppression_padded_v2') 

5479def non_max_suppression_padded_v2(boxes, 

5480 scores, 

5481 max_output_size, 

5482 iou_threshold=0.5, 

5483 score_threshold=float('-inf'), 

5484 sorted_input=False, 

5485 canonicalized_coordinates=False, 

5486 tile_size=512): 

5487 """Non-maximum suppression. 

5488 

5489 Prunes away boxes that have high intersection-over-union (IOU) overlap 

5490 with previously selected boxes. Bounding boxes are supplied as 

5491 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 

5492 diagonal pair of box corners and the coordinates can be provided as normalized 

5493 (i.e., lying in the interval `[0, 1]`) or absolute. The bounding box 

5494 coordinates are cannonicalized to `[y_min, x_min, y_max, x_max]`, 

5495 where `(y_min, x_min)` and `(y_max, x_mas)` are the coordinates of the lower 

5496 left and upper right corner. User may indiciate the input box coordinates are 

5497 already canonicalized to eliminate redundant work by setting 

5498 canonicalized_coordinates to `True`. Note that this algorithm is agnostic to 

5499 where the origin is in the coordinate system. Note that this algorithm is 

5500 invariant to orthogonal transformations and translations of the coordinate 

5501 system; thus translating or reflections of the coordinate system result in the 

5502 same boxes being selected by the algorithm. 

5503 

5504 Similar to tf.image.non_max_suppression, non_max_suppression_padded 

5505 implements hard NMS but can operate on a batch of images and improves 

5506 performance by titling the bounding boxes. Non_max_suppression_padded should 

5507 be preferred over tf.image_non_max_suppression when running on devices with 

5508 abundant parallelsim for higher computation speed. For soft NMS, refer to 

5509 tf.image.non_max_suppression_with_scores. 

5510 

5511 While a serial NMS algorithm iteratively uses the highest-scored unprocessed 

5512 box to suppress boxes, this algorithm uses many boxes to suppress other boxes 

5513 in parallel. The key idea is to partition boxes into tiles based on their 

5514 score and suppresses boxes tile by tile, thus achieving parallelism within a 

5515 tile. The tile size determines the degree of parallelism. 

5516 

5517 In cross suppression (using boxes of tile A to suppress boxes of tile B), 

5518 all boxes in A can independently suppress boxes in B. 

5519 

5520 Self suppression (suppressing boxes of the same tile) needs to be iteratively 

5521 applied until there's no more suppression. In each iteration, boxes that 

5522 cannot be suppressed are used to suppress boxes in the same tile. 

5523 

5524 boxes = boxes.pad_to_multiply_of(tile_size) 

5525 num_tiles = len(boxes) // tile_size 

5526 output_boxes = [] 

5527 for i in range(num_tiles): 

5528 box_tile = boxes[i*tile_size : (i+1)*tile_size] 

5529 for j in range(i - 1): 

5530 # in parallel suppress boxes in box_tile using boxes from suppressing_tile 

5531 suppressing_tile = boxes[j*tile_size : (j+1)*tile_size] 

5532 iou = _bbox_overlap(box_tile, suppressing_tile) 

5533 # if the box is suppressed in iou, clear it to a dot 

5534 box_tile *= _update_boxes(iou) 

5535 # Iteratively handle the diagnal tile. 

5536 iou = _box_overlap(box_tile, box_tile) 

5537 iou_changed = True 

5538 while iou_changed: 

5539 # boxes that are not suppressed by anything else 

5540 suppressing_boxes = _get_suppressing_boxes(iou) 

5541 # boxes that are suppressed by suppressing_boxes 

5542 suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes) 

5543 # clear iou to 0 for boxes that are suppressed, as they cannot be used 

5544 # to suppress other boxes any more 

5545 new_iou = _clear_iou(iou, suppressed_boxes) 

5546 iou_changed = (new_iou != iou) 

5547 iou = new_iou 

5548 # remaining boxes that can still suppress others, are selected boxes. 

5549 output_boxes.append(_get_suppressing_boxes(iou)) 

5550 if len(output_boxes) >= max_output_size: 

5551 break 

5552 

5553 Args: 

5554 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4]. 

5555 Dimensions except the last two are batch dimensions. The last dimension 

5556 represents box coordinates, given as [y_1, x_1, y_2, x_2]. The coordinates 

5557 on each dimension can be given in any order 

5558 (see also `canonicalized_coordinates`) but must describe a box with 

5559 a positive area. 

5560 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes]. 

5561 max_output_size: a scalar integer `Tensor` representing the maximum number 

5562 of boxes to be selected by non max suppression. 

5563 iou_threshold: a float representing the threshold for deciding whether boxes 

5564 overlap too much with respect to IoU (intersection over union). 

5565 score_threshold: a float representing the threshold for box scores. Boxes 

5566 with a score that is not larger than this threshold will be suppressed. 

5567 sorted_input: a boolean indicating whether the input boxes and scores 

5568 are sorted in descending order by the score. 

5569 canonicalized_coordinates: if box coordinates are given as 

5570 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant 

5571 computation to canonicalize box coordinates. 

5572 tile_size: an integer representing the number of boxes in a tile, i.e., 

5573 the maximum number of boxes per image that can be used to suppress other 

5574 boxes in parallel; larger tile_size means larger parallelism and 

5575 potentially more redundant work. 

5576 Returns: 

5577 idx: a tensor with a shape of [..., num_boxes] representing the 

5578 indices selected by non-max suppression. The leading dimensions 

5579 are the batch dimensions of the input boxes. All numbers are within 

5580 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i] 

5581 indices (i.e., idx[i][:num_valid[i]]) are valid. 

5582 num_valid: a tensor of rank 0 or higher with a shape of [...] 

5583 representing the number of valid indices in idx. Its dimensions are the 

5584 batch dimensions of the input boxes. 

5585 Raises: 

5586 ValueError: When set pad_to_max_output_size to False for batched input. 

5587 """ 

5588 def _sort_scores_and_boxes(scores, boxes): 

5589 """Sort boxes based their score from highest to lowest. 

5590 

5591 Args: 

5592 scores: a tensor with a shape of [batch_size, num_boxes] representing 

5593 the scores of boxes. 

5594 boxes: a tensor with a shape of [batch_size, num_boxes, 4] representing 

5595 the boxes. 

5596 Returns: 

5597 sorted_scores: a tensor with a shape of [batch_size, num_boxes] 

5598 representing the sorted scores. 

5599 sorted_boxes: a tensor representing the sorted boxes. 

5600 sorted_scores_indices: a tensor with a shape of [batch_size, num_boxes] 

5601 representing the index of the scores in a sorted descending order. 

5602 """ 

5603 with ops.name_scope('sort_scores_and_boxes'): 

5604 sorted_scores_indices = sort_ops.argsort( 

5605 scores, axis=1, direction='DESCENDING') 

5606 sorted_scores = array_ops.gather( 

5607 scores, sorted_scores_indices, axis=1, batch_dims=1 

5608 ) 

5609 sorted_boxes = array_ops.gather( 

5610 boxes, sorted_scores_indices, axis=1, batch_dims=1 

5611 ) 

5612 return sorted_scores, sorted_boxes, sorted_scores_indices 

5613 

5614 batch_dims = array_ops.shape(boxes)[:-2] 

5615 num_boxes = array_ops.shape(boxes)[-2] 

5616 boxes = array_ops.reshape(boxes, [-1, num_boxes, 4]) 

5617 scores = array_ops.reshape(scores, [-1, num_boxes]) 

5618 batch_size = array_ops.shape(boxes)[0] 

5619 if score_threshold != float('-inf'): 

5620 with ops.name_scope('filter_by_score'): 

5621 score_mask = math_ops.cast(scores > score_threshold, scores.dtype) 

5622 scores *= score_mask 

5623 box_mask = array_ops.expand_dims( 

5624 math_ops.cast(score_mask, boxes.dtype), 2) 

5625 boxes *= box_mask 

5626 

5627 if not canonicalized_coordinates: 

5628 with ops.name_scope('canonicalize_coordinates'): 

5629 y_1, x_1, y_2, x_2 = array_ops.split( 

5630 value=boxes, num_or_size_splits=4, axis=2) 

5631 y_1_is_min = math_ops.reduce_all( 

5632 math_ops.less_equal(y_1[0, 0, 0], y_2[0, 0, 0])) 

5633 y_min, y_max = tf_cond.cond( 

5634 y_1_is_min, lambda: (y_1, y_2), lambda: (y_2, y_1)) 

5635 x_1_is_min = math_ops.reduce_all( 

5636 math_ops.less_equal(x_1[0, 0, 0], x_2[0, 0, 0])) 

5637 x_min, x_max = tf_cond.cond( 

5638 x_1_is_min, lambda: (x_1, x_2), lambda: (x_2, x_1)) 

5639 boxes = array_ops.concat([y_min, x_min, y_max, x_max], axis=2) 

5640 # TODO(@bhack): https://github.com/tensorflow/tensorflow/issues/56089 

5641 # this will be required after deprecation 

5642 #else: 

5643 # y_1, x_1, y_2, x_2 = array_ops.split( 

5644 # value=boxes, num_or_size_splits=4, axis=2) 

5645 

5646 if not sorted_input: 

5647 scores, boxes, sorted_indices = _sort_scores_and_boxes(scores, boxes) 

5648 else: 

5649 # Default value required for Autograph. 

5650 sorted_indices = array_ops.zeros_like(scores, dtype=dtypes.int32) 

5651 

5652 pad = math_ops.cast( 

5653 math_ops.ceil( 

5654 math_ops.cast( 

5655 math_ops.maximum(num_boxes, max_output_size), dtypes.float32) / 

5656 math_ops.cast(tile_size, dtypes.float32)), 

5657 dtypes.int32) * tile_size - num_boxes 

5658 boxes = array_ops.pad( 

5659 math_ops.cast(boxes, dtypes.float32), [[0, 0], [0, pad], [0, 0]]) 

5660 scores = array_ops.pad( 

5661 math_ops.cast(scores, dtypes.float32), [[0, 0], [0, pad]]) 

5662 num_boxes_after_padding = num_boxes + pad 

5663 num_iterations = num_boxes_after_padding // tile_size 

5664 def _loop_cond(unused_boxes, unused_threshold, output_size, idx): 

5665 return math_ops.logical_and( 

5666 math_ops.reduce_min(output_size) < max_output_size, 

5667 idx < num_iterations) 

5668 

5669 def suppression_loop_body(boxes, iou_threshold, output_size, idx): 

5670 return _suppression_loop_body( 

5671 boxes, iou_threshold, output_size, idx, tile_size) 

5672 

5673 selected_boxes, _, output_size, _ = while_loop.while_loop( 

5674 _loop_cond, 

5675 suppression_loop_body, 

5676 [ 

5677 boxes, iou_threshold, 

5678 array_ops.zeros([batch_size], dtypes.int32), 

5679 constant_op.constant(0) 

5680 ], 

5681 shape_invariants=[ 

5682 tensor_shape.TensorShape([None, None, 4]), 

5683 tensor_shape.TensorShape([]), 

5684 tensor_shape.TensorShape([None]), 

5685 tensor_shape.TensorShape([]), 

5686 ], 

5687 ) 

5688 num_valid = math_ops.minimum(output_size, max_output_size) 

5689 idx = num_boxes_after_padding - math_ops.cast( 

5690 nn_ops.top_k( 

5691 math_ops.cast(math_ops.reduce_any( 

5692 selected_boxes > 0, [2]), dtypes.int32) * 

5693 array_ops.expand_dims( 

5694 math_ops.range(num_boxes_after_padding, 0, -1), 0), 

5695 max_output_size)[0], dtypes.int32) 

5696 idx = math_ops.minimum(idx, num_boxes - 1) 

5697 

5698 if not sorted_input: 

5699 index_offsets = math_ops.range(batch_size) * num_boxes 

5700 gather_idx = array_ops.reshape( 

5701 idx + array_ops.expand_dims(index_offsets, 1), [-1]) 

5702 idx = array_ops.reshape( 

5703 array_ops.gather(array_ops.reshape(sorted_indices, [-1]), 

5704 gather_idx), 

5705 [batch_size, -1]) 

5706 invalid_index = array_ops.zeros([batch_size, max_output_size], 

5707 dtype=dtypes.int32) 

5708 idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0) 

5709 num_valid_expanded = array_ops.expand_dims(num_valid, 1) 

5710 idx = array_ops.where(idx_index < num_valid_expanded, 

5711 idx, invalid_index) 

5712 

5713 num_valid = array_ops.reshape(num_valid, batch_dims) 

5714 return idx, num_valid 

5715 

5716 

5717def non_max_suppression_padded_v1(boxes, 

5718 scores, 

5719 max_output_size, 

5720 iou_threshold=0.5, 

5721 score_threshold=float('-inf'), 

5722 pad_to_max_output_size=False, 

5723 name=None): 

5724 """Greedily selects a subset of bounding boxes in descending order of score. 

5725 

5726 Performs algorithmically equivalent operation to tf.image.non_max_suppression, 

5727 with the addition of an optional parameter which zero-pads the output to 

5728 be of size `max_output_size`. 

5729 The output of this operation is a tuple containing the set of integers 

5730 indexing into the input collection of bounding boxes representing the selected 

5731 boxes and the number of valid indices in the index set. The bounding box 

5732 coordinates corresponding to the selected indices can then be obtained using 

5733 the `tf.slice` and `tf.gather` operations. For example: 

5734 ```python 

5735 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded( 

5736 boxes, scores, max_output_size, iou_threshold, 

5737 score_threshold, pad_to_max_output_size=True) 

5738 selected_indices = tf.slice( 

5739 selected_indices_padded, tf.constant([0]), num_valid) 

5740 selected_boxes = tf.gather(boxes, selected_indices) 

5741 ``` 

5742 

5743 Args: 

5744 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 

5745 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 

5746 score corresponding to each box (each row of boxes). 

5747 max_output_size: A scalar integer `Tensor` representing the maximum number 

5748 of boxes to be selected by non-max suppression. 

5749 iou_threshold: A float representing the threshold for deciding whether boxes 

5750 overlap too much with respect to IOU. 

5751 score_threshold: A float representing the threshold for deciding when to 

5752 remove boxes based on score. 

5753 pad_to_max_output_size: bool. If True, size of `selected_indices` output is 

5754 padded to `max_output_size`. 

5755 name: A name for the operation (optional). 

5756 

5757 Returns: 

5758 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 

5759 selected indices from the boxes tensor, where `M <= max_output_size`. 

5760 valid_outputs: A scalar integer `Tensor` denoting how many elements in 

5761 `selected_indices` are valid. Valid elements occur first, then padding. 

5762 """ 

5763 with ops.name_scope(name, 'non_max_suppression_padded'): 

5764 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 

5765 score_threshold = ops.convert_to_tensor( 

5766 score_threshold, name='score_threshold') 

5767 return gen_image_ops.non_max_suppression_v4(boxes, scores, max_output_size, 

5768 iou_threshold, score_threshold, 

5769 pad_to_max_output_size) 

5770 

5771 

5772@tf_export('image.draw_bounding_boxes', v1=[]) 

5773@dispatch.add_dispatch_support 

5774def draw_bounding_boxes_v2(images, boxes, colors, name=None): 

5775 """Draw bounding boxes on a batch of images. 

5776 

5777 Outputs a copy of `images` but draws on top of the pixels zero or more 

5778 bounding boxes specified by the locations in `boxes`. The coordinates of the 

5779 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. 

5780 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 

5781 and the height of the underlying image. 

5782 

5783 For example, if an image is 100 x 200 pixels (height x width) and the bounding 

5784 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of 

5785 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). 

5786 

5787 Parts of the bounding box may fall outside the image. 

5788 

5789 Args: 

5790 images: A `Tensor`. Must be one of the following types: `float32`, `half`. 

5791 4-D with shape `[batch, height, width, depth]`. A batch of images. 

5792 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, 

5793 num_bounding_boxes, 4]` containing bounding boxes. 

5794 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle 

5795 through for the boxes. 

5796 name: A name for the operation (optional). 

5797 

5798 Returns: 

5799 A `Tensor`. Has the same type as `images`. 

5800 

5801 Usage Example: 

5802 

5803 >>> # create an empty image 

5804 >>> img = tf.zeros([1, 3, 3, 3]) 

5805 >>> # draw a box around the image 

5806 >>> box = np.array([0, 0, 1, 1]) 

5807 >>> boxes = box.reshape([1, 1, 4]) 

5808 >>> # alternate between red and blue 

5809 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 

5810 >>> tf.image.draw_bounding_boxes(img, boxes, colors) 

5811 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy= 

5812 array([[[[1., 0., 0.], 

5813 [1., 0., 0.], 

5814 [1., 0., 0.]], 

5815 [[1., 0., 0.], 

5816 [0., 0., 0.], 

5817 [1., 0., 0.]], 

5818 [[1., 0., 0.], 

5819 [1., 0., 0.], 

5820 [1., 0., 0.]]]], dtype=float32)> 

5821 """ 

5822 if colors is None: 

5823 return gen_image_ops.draw_bounding_boxes(images, boxes, name) 

5824 return gen_image_ops.draw_bounding_boxes_v2(images, boxes, colors, name) 

5825 

5826 

5827@tf_export(v1=['image.draw_bounding_boxes']) 

5828@dispatch.add_dispatch_support 

5829def draw_bounding_boxes(images, boxes, name=None, colors=None): 

5830 """Draw bounding boxes on a batch of images. 

5831 

5832 Outputs a copy of `images` but draws on top of the pixels zero or more 

5833 bounding boxes specified by the locations in `boxes`. The coordinates of the 

5834 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. 

5835 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 

5836 and the height of the underlying image. 

5837 

5838 For example, if an image is 100 x 200 pixels (height x width) and the bounding 

5839 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of 

5840 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). 

5841 

5842 Parts of the bounding box may fall outside the image. 

5843 

5844 Args: 

5845 images: A `Tensor`. Must be one of the following types: `float32`, `half`. 

5846 4-D with shape `[batch, height, width, depth]`. A batch of images. 

5847 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, 

5848 num_bounding_boxes, 4]` containing bounding boxes. 

5849 name: A name for the operation (optional). 

5850 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle 

5851 through for the boxes. 

5852 

5853 Returns: 

5854 A `Tensor`. Has the same type as `images`. 

5855 

5856 Usage Example: 

5857 

5858 >>> # create an empty image 

5859 >>> img = tf.zeros([1, 3, 3, 3]) 

5860 >>> # draw a box around the image 

5861 >>> box = np.array([0, 0, 1, 1]) 

5862 >>> boxes = box.reshape([1, 1, 4]) 

5863 >>> # alternate between red and blue 

5864 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 

5865 >>> tf.image.draw_bounding_boxes(img, boxes, colors) 

5866 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy= 

5867 array([[[[1., 0., 0.], 

5868 [1., 0., 0.], 

5869 [1., 0., 0.]], 

5870 [[1., 0., 0.], 

5871 [0., 0., 0.], 

5872 [1., 0., 0.]], 

5873 [[1., 0., 0.], 

5874 [1., 0., 0.], 

5875 [1., 0., 0.]]]], dtype=float32)> 

5876 """ 

5877 return draw_bounding_boxes_v2(images, boxes, colors, name) 

5878 

5879 

5880@tf_export('image.generate_bounding_box_proposals') 

5881@dispatch.add_dispatch_support 

5882def generate_bounding_box_proposals(scores, 

5883 bbox_deltas, 

5884 image_info, 

5885 anchors, 

5886 nms_threshold=0.7, 

5887 pre_nms_topn=6000, 

5888 min_size=16, 

5889 post_nms_topn=300, 

5890 name=None): 

5891 """Generate bounding box proposals from encoded bounding boxes. 

5892 

5893 Args: 

5894 scores: A 4-D float `Tensor` of shape 

5895 `[num_images, height, width, num_achors]` containing scores of 

5896 the boxes for given anchors, can be unsorted. 

5897 bbox_deltas: A 4-D float `Tensor` of shape 

5898 `[num_images, height, width, 4 x num_anchors]` encoding boxes 

5899 with respect to each anchor. Coordinates are given 

5900 in the form `[dy, dx, dh, dw]`. 

5901 image_info: A 2-D float `Tensor` of shape `[num_images, 5]` 

5902 containing image information Height, Width, Scale. 

5903 anchors: A 2-D float `Tensor` of shape `[num_anchors, 4]` 

5904 describing the anchor boxes. 

5905 Boxes are formatted in the form `[y1, x1, y2, x2]`. 

5906 nms_threshold: A scalar float `Tensor` for non-maximal-suppression 

5907 threshold. Defaults to 0.7. 

5908 pre_nms_topn: A scalar int `Tensor` for the number of 

5909 top scoring boxes to be used as input. Defaults to 6000. 

5910 min_size: A scalar float `Tensor`. Any box that has a smaller size 

5911 than min_size will be discarded. Defaults to 16. 

5912 post_nms_topn: An integer. Maximum number of rois in the output. 

5913 name: A name for this operation (optional). 

5914 

5915 Returns: 

5916 rois: Region of interest boxes sorted by their scores. 

5917 roi_probabilities: scores of the ROI boxes in the ROIs' `Tensor`. 

5918 """ 

5919 return gen_image_ops.generate_bounding_box_proposals( 

5920 scores=scores, 

5921 bbox_deltas=bbox_deltas, 

5922 image_info=image_info, 

5923 anchors=anchors, 

5924 nms_threshold=nms_threshold, 

5925 pre_nms_topn=pre_nms_topn, 

5926 min_size=min_size, 

5927 post_nms_topn=post_nms_topn, 

5928 name=name)