Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/layers/preprocessing/image_preprocessing.py: 18%
549 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Keras image preprocessing layers."""
17import numpy as np
18import tensorflow.compat.v2 as tf
19from tensorflow.python.util.tf_export import keras_export
21from keras.src import backend
22from keras.src.engine import base_layer
23from keras.src.engine import base_preprocessing_layer
24from keras.src.layers.preprocessing import preprocessing_utils as utils
25from keras.src.utils import image_utils
26from keras.src.utils import tf_utils
28H_AXIS = -3
29W_AXIS = -2
32def check_fill_mode_and_interpolation(fill_mode, interpolation):
33 if fill_mode not in {"reflect", "wrap", "constant", "nearest"}:
34 raise NotImplementedError(
35 f"Unknown `fill_mode` {fill_mode}. Only `reflect`, `wrap`, "
36 "`constant` and `nearest` are supported."
37 )
38 if interpolation not in {"nearest", "bilinear"}:
39 raise NotImplementedError(
40 f"Unknown `interpolation` {interpolation}. Only `nearest` and "
41 "`bilinear` are supported."
42 )
45@keras_export(
46 "keras.layers.Resizing", "keras.layers.experimental.preprocessing.Resizing"
47)
48class Resizing(base_layer.Layer):
49 """A preprocessing layer which resizes images.
51 This layer resizes an image input to a target height and width. The input
52 should be a 4D (batched) or 3D (unbatched) tensor in `"channels_last"`
53 format. Input pixel values can be of any range
54 (e.g. `[0., 1.)` or `[0, 255]`) and of integer or floating point dtype.
55 By default, the layer will output floats.
57 This layer can be called on tf.RaggedTensor batches of input images of
58 distinct sizes, and will resize the outputs to dense tensors of uniform
59 size.
61 For an overview and full list of preprocessing layers, see the preprocessing
62 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
64 Args:
65 height: Integer, the height of the output shape.
66 width: Integer, the width of the output shape.
67 interpolation: String, the interpolation method.
68 Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`,
69 `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`.
70 Defaults to `"bilinear"`.
71 crop_to_aspect_ratio: If True, resize the images without aspect
72 ratio distortion. When the original aspect ratio differs
73 from the target aspect ratio, the output image will be
74 cropped so as to return the
75 largest possible window in the image (of size `(height, width)`)
76 that matches the target aspect ratio. By default
77 (`crop_to_aspect_ratio=False`), aspect ratio may not be preserved.
78 """
80 def __init__(
81 self,
82 height,
83 width,
84 interpolation="bilinear",
85 crop_to_aspect_ratio=False,
86 **kwargs,
87 ):
88 self.height = height
89 self.width = width
90 self.interpolation = interpolation
91 self.crop_to_aspect_ratio = crop_to_aspect_ratio
92 self._interpolation_method = image_utils.get_interpolation(
93 interpolation
94 )
95 super().__init__(**kwargs)
96 base_preprocessing_layer.keras_kpl_gauge.get_cell("Resizing").set(True)
98 def call(self, inputs):
99 # tf.image.resize will always output float32
100 # and operate more efficiently on float32
101 # unless interpolation is nearest, in which case ouput type matches
102 # input type.
103 if self.interpolation == "nearest":
104 input_dtype = self.compute_dtype
105 else:
106 input_dtype = tf.float32
107 inputs = convert_inputs(inputs, dtype=input_dtype)
108 size = [self.height, self.width]
109 if self.crop_to_aspect_ratio:
111 def resize_to_aspect(x):
112 if tf_utils.is_ragged(inputs):
113 x = x.to_tensor()
114 return image_utils.smart_resize(
115 x, size=size, interpolation=self._interpolation_method
116 )
118 if tf_utils.is_ragged(inputs):
119 size_as_shape = tf.TensorShape(size)
120 shape = size_as_shape + inputs.shape[-1:]
121 spec = tf.TensorSpec(shape, input_dtype)
122 outputs = tf.map_fn(
123 resize_to_aspect, inputs, fn_output_signature=spec
124 )
125 else:
126 outputs = resize_to_aspect(inputs)
127 else:
128 outputs = tf.image.resize(
129 inputs, size=size, method=self._interpolation_method
130 )
131 return tf.cast(outputs, self.compute_dtype)
133 def compute_output_shape(self, input_shape):
134 input_shape = tf.TensorShape(input_shape).as_list()
135 input_shape[H_AXIS] = self.height
136 input_shape[W_AXIS] = self.width
137 return tf.TensorShape(input_shape)
139 def get_config(self):
140 config = {
141 "height": self.height,
142 "width": self.width,
143 "interpolation": self.interpolation,
144 "crop_to_aspect_ratio": self.crop_to_aspect_ratio,
145 }
146 base_config = super().get_config()
147 return dict(list(base_config.items()) + list(config.items()))
150@keras_export(
151 "keras.layers.CenterCrop",
152 "keras.layers.experimental.preprocessing.CenterCrop",
153)
154class CenterCrop(base_layer.Layer):
155 """A preprocessing layer which crops images.
157 This layers crops the central portion of the images to a target size. If an
158 image is smaller than the target size, it will be resized and cropped
159 so as to return the largest possible window in the image that matches
160 the target aspect ratio.
162 Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and
163 of integer or floating point dtype.
164 By default, the layer will output floats.
166 For an overview and full list of preprocessing layers, see the preprocessing
167 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
169 Input shape:
170 3D (unbatched) or 4D (batched) tensor with shape:
171 `(..., height, width, channels)`, in `"channels_last"` format.
173 Output shape:
174 3D (unbatched) or 4D (batched) tensor with shape:
175 `(..., target_height, target_width, channels)`.
177 If the input height/width is even and the target height/width is odd (or
178 inversely), the input image is left-padded by 1 pixel.
180 Args:
181 height: Integer, the height of the output shape.
182 width: Integer, the width of the output shape.
183 """
185 def __init__(self, height, width, **kwargs):
186 self.height = height
187 self.width = width
188 super().__init__(**kwargs, autocast=False)
189 base_preprocessing_layer.keras_kpl_gauge.get_cell("CenterCrop").set(
190 True
191 )
193 def call(self, inputs):
194 inputs = convert_inputs(inputs, self.compute_dtype)
195 input_shape = tf.shape(inputs)
196 h_diff = input_shape[H_AXIS] - self.height
197 w_diff = input_shape[W_AXIS] - self.width
199 def center_crop():
200 h_start = tf.cast(h_diff / 2, tf.int32)
201 w_start = tf.cast(w_diff / 2, tf.int32)
202 return tf.image.crop_to_bounding_box(
203 inputs, h_start, w_start, self.height, self.width
204 )
206 def upsize():
207 outputs = image_utils.smart_resize(
208 inputs, [self.height, self.width]
209 )
210 # smart_resize will always output float32, so we need to re-cast.
211 return tf.cast(outputs, self.compute_dtype)
213 return tf.cond(
214 tf.reduce_all((h_diff >= 0, w_diff >= 0)), center_crop, upsize
215 )
217 def compute_output_shape(self, input_shape):
218 input_shape = tf.TensorShape(input_shape).as_list()
219 input_shape[H_AXIS] = self.height
220 input_shape[W_AXIS] = self.width
221 return tf.TensorShape(input_shape)
223 def get_config(self):
224 config = {
225 "height": self.height,
226 "width": self.width,
227 }
228 base_config = super().get_config()
229 return dict(list(base_config.items()) + list(config.items()))
232@keras_export(
233 "keras.layers.RandomCrop",
234 "keras.layers.experimental.preprocessing.RandomCrop",
235 v1=[],
236)
237class RandomCrop(base_layer.BaseRandomLayer):
238 """A preprocessing layer which randomly crops images during training.
240 During training, this layer will randomly choose a location to crop images
241 down to a target size. The layer will crop all the images in the same batch
242 to the same cropping location.
244 At inference time, and during training if an input image is smaller than the
245 target size, the input will be resized and cropped so as to return the
246 largest possible window in the image that matches the target aspect ratio.
247 If you need to apply random cropping at inference time, set `training` to
248 True when calling the layer.
250 Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and
251 of integer or floating point dtype. By default, the layer will output
252 floats.
254 For an overview and full list of preprocessing layers, see the preprocessing
255 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
257 Input shape:
258 3D (unbatched) or 4D (batched) tensor with shape:
259 `(..., height, width, channels)`, in `"channels_last"` format.
261 Output shape:
262 3D (unbatched) or 4D (batched) tensor with shape:
263 `(..., target_height, target_width, channels)`.
265 Args:
266 height: Integer, the height of the output shape.
267 width: Integer, the width of the output shape.
268 seed: Integer. Used to create a random seed.
269 """
271 def __init__(self, height, width, seed=None, **kwargs):
272 base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomCrop").set(
273 True
274 )
275 super().__init__(
276 **kwargs, autocast=False, seed=seed, force_generator=True
277 )
278 self.height = height
279 self.width = width
280 self.seed = seed
282 def call(self, inputs, training=True):
283 inputs = convert_inputs(inputs, dtype=self.compute_dtype)
284 input_shape = tf.shape(inputs)
285 h_diff = input_shape[H_AXIS] - self.height
286 w_diff = input_shape[W_AXIS] - self.width
288 def random_crop():
289 dtype = input_shape.dtype
290 rands = self._random_generator.random_uniform(
291 [2], 0, dtype.max, dtype
292 )
293 h_start = rands[0] % (h_diff + 1)
294 w_start = rands[1] % (w_diff + 1)
295 return tf.image.crop_to_bounding_box(
296 inputs, h_start, w_start, self.height, self.width
297 )
299 def resize():
300 outputs = image_utils.smart_resize(
301 inputs, [self.height, self.width]
302 )
303 # smart_resize will always output float32, so we need to re-cast.
304 return tf.cast(outputs, self.compute_dtype)
306 return tf.cond(
307 tf.reduce_all((training, h_diff >= 0, w_diff >= 0)),
308 random_crop,
309 resize,
310 )
312 def compute_output_shape(self, input_shape):
313 input_shape = tf.TensorShape(input_shape).as_list()
314 input_shape[H_AXIS] = self.height
315 input_shape[W_AXIS] = self.width
316 return tf.TensorShape(input_shape)
318 def get_config(self):
319 config = {
320 "height": self.height,
321 "width": self.width,
322 "seed": self.seed,
323 }
324 base_config = super().get_config()
325 return dict(list(base_config.items()) + list(config.items()))
328@keras_export(
329 "keras.layers.Rescaling",
330 "keras.layers.experimental.preprocessing.Rescaling",
331)
332class Rescaling(base_layer.Layer):
333 """A preprocessing layer which rescales input values to a new range.
335 This layer rescales every value of an input (often an image) by multiplying
336 by `scale` and adding `offset`.
338 For instance:
340 1. To rescale an input in the `[0, 255]` range
341 to be in the `[0, 1]` range, you would pass `scale=1./255`.
343 2. To rescale an input in the `[0, 255]` range to be in the `[-1, 1]` range,
344 you would pass `scale=1./127.5, offset=-1`.
346 The rescaling is applied both during training and inference. Inputs can be
347 of integer or floating point dtype, and by default the layer will output
348 floats.
350 For an overview and full list of preprocessing layers, see the preprocessing
351 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
353 Input shape:
354 Arbitrary.
356 Output shape:
357 Same as input.
359 Args:
360 scale: Float, the scale to apply to the inputs.
361 offset: Float, the offset to apply to the inputs.
362 """
364 def __init__(self, scale, offset=0.0, **kwargs):
365 self.scale = scale
366 self.offset = offset
367 super().__init__(**kwargs)
368 base_preprocessing_layer.keras_kpl_gauge.get_cell("Rescaling").set(True)
370 def call(self, inputs):
371 dtype = self.compute_dtype
372 inputs = convert_inputs(inputs, dtype=dtype)
373 scale = tf.cast(self.scale, dtype)
374 offset = tf.cast(self.offset, dtype)
375 return tf.cast(inputs, dtype) * scale + offset
377 def compute_output_shape(self, input_shape):
378 return input_shape
380 def get_config(self):
381 config = {
382 "scale": self.scale,
383 "offset": self.offset,
384 }
385 base_config = super().get_config()
386 return dict(list(base_config.items()) + list(config.items()))
389HORIZONTAL = "horizontal"
390VERTICAL = "vertical"
391HORIZONTAL_AND_VERTICAL = "horizontal_and_vertical"
394@keras_export(
395 "keras.layers.RandomFlip",
396 "keras.layers.experimental.preprocessing.RandomFlip",
397 v1=[],
398)
399class RandomFlip(base_layer.BaseRandomLayer):
400 """A preprocessing layer which randomly flips images during training.
402 This layer will flip the images horizontally and or vertically based on the
403 `mode` attribute. During inference time, the output will be identical to
404 input. Call the layer with `training=True` to flip the input.
406 Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and
407 of integer or floating point dtype.
408 By default, the layer will output floats.
410 For an overview and full list of preprocessing layers, see the preprocessing
411 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
413 Input shape:
414 3D (unbatched) or 4D (batched) tensor with shape:
415 `(..., height, width, channels)`, in `"channels_last"` format.
417 Output shape:
418 3D (unbatched) or 4D (batched) tensor with shape:
419 `(..., height, width, channels)`, in `"channels_last"` format.
421 Args:
422 mode: String indicating which flip mode to use. Can be `"horizontal"`,
423 `"vertical"`, or `"horizontal_and_vertical"`. `"horizontal"` is a
424 left-right flip and `"vertical"` is a top-bottom flip. Defaults to
425 `"horizontal_and_vertical"`
426 seed: Integer. Used to create a random seed.
427 """
429 def __init__(self, mode=HORIZONTAL_AND_VERTICAL, seed=None, **kwargs):
430 super().__init__(seed=seed, force_generator=True, **kwargs)
431 base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomFlip").set(
432 True
433 )
434 self.mode = mode
435 if mode == HORIZONTAL:
436 self.horizontal = True
437 self.vertical = False
438 elif mode == VERTICAL:
439 self.horizontal = False
440 self.vertical = True
441 elif mode == HORIZONTAL_AND_VERTICAL:
442 self.horizontal = True
443 self.vertical = True
444 else:
445 raise ValueError(
446 f"RandomFlip layer {self.name} received an unknown mode "
447 f"argument {mode}"
448 )
449 self.seed = seed
451 def call(self, inputs, training=True):
452 inputs = convert_inputs(inputs, self.compute_dtype)
454 def random_flipped_inputs(inputs):
455 flipped_outputs = inputs
456 if self.horizontal:
457 seed = self._random_generator.make_seed_for_stateless_op()
458 if seed is not None:
459 flipped_outputs = tf.image.stateless_random_flip_left_right(
460 flipped_outputs, seed=seed
461 )
462 else:
463 flipped_outputs = tf.image.random_flip_left_right(
464 flipped_outputs,
465 self._random_generator.make_legacy_seed(),
466 )
467 if self.vertical:
468 seed = self._random_generator.make_seed_for_stateless_op()
469 if seed is not None:
470 flipped_outputs = tf.image.stateless_random_flip_up_down(
471 flipped_outputs, seed=seed
472 )
473 else:
474 flipped_outputs = tf.image.random_flip_up_down(
475 flipped_outputs,
476 self._random_generator.make_legacy_seed(),
477 )
478 flipped_outputs.set_shape(inputs.shape)
479 return flipped_outputs
481 if training:
482 return random_flipped_inputs(inputs)
483 else:
484 return inputs
486 def compute_output_shape(self, input_shape):
487 return input_shape
489 def get_config(self):
490 config = {
491 "mode": self.mode,
492 "seed": self.seed,
493 }
494 base_config = super().get_config()
495 return dict(list(base_config.items()) + list(config.items()))
498# TODO(tanzheny): Add examples, here and everywhere.
499@keras_export(
500 "keras.layers.RandomTranslation",
501 "keras.layers.experimental.preprocessing.RandomTranslation",
502 v1=[],
503)
504class RandomTranslation(base_layer.BaseRandomLayer):
505 """A preprocessing layer which randomly translates images during training.
507 This layer will apply random translations to each image during training,
508 filling empty space according to `fill_mode`.
510 Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and
511 of integer or floating point dtype. By default, the layer will output
512 floats.
514 For an overview and full list of preprocessing layers, see the preprocessing
515 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
517 Args:
518 height_factor: a float represented as fraction of value, or a tuple of
519 size 2 representing lower and upper bound for shifting vertically. A
520 negative value means shifting image up, while a positive value means
521 shifting image down. When represented as a single positive float, this
522 value is used for both the upper and lower bound. For instance,
523 `height_factor=(-0.2, 0.3)` results in an output shifted by a random
524 amount in the range `[-20%, +30%]`. `height_factor=0.2` results in an
525 output height shifted by a random amount in the range `[-20%, +20%]`.
526 width_factor: a float represented as fraction of value, or a tuple of size
527 2 representing lower and upper bound for shifting horizontally. A
528 negative value means shifting image left, while a positive value means
529 shifting image right. When represented as a single positive float,
530 this value is used for both the upper and lower bound. For instance,
531 `width_factor=(-0.2, 0.3)` results in an output shifted left by 20%,
532 and shifted right by 30%. `width_factor=0.2` results
533 in an output height shifted left or right by 20%.
534 fill_mode: Points outside the boundaries of the input are filled according
535 to the given mode
536 (one of `{"constant", "reflect", "wrap", "nearest"}`).
537 - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by
538 reflecting about the edge of the last pixel.
539 - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by
540 filling all values beyond the edge with the same constant value
541 k = 0.
542 - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by
543 wrapping around to the opposite edge.
544 - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by
545 the nearest pixel.
546 interpolation: Interpolation mode. Supported values: `"nearest"`,
547 `"bilinear"`.
548 seed: Integer. Used to create a random seed.
549 fill_value: a float represents the value to be filled outside the
550 boundaries when `fill_mode="constant"`.
552 Input shape:
553 3D (unbatched) or 4D (batched) tensor with shape:
554 `(..., height, width, channels)`, in `"channels_last"` format.
556 Output shape:
557 3D (unbatched) or 4D (batched) tensor with shape:
558 `(..., height, width, channels)`, in `"channels_last"` format.
559 """
561 def __init__(
562 self,
563 height_factor,
564 width_factor,
565 fill_mode="reflect",
566 interpolation="bilinear",
567 seed=None,
568 fill_value=0.0,
569 **kwargs,
570 ):
571 base_preprocessing_layer.keras_kpl_gauge.get_cell(
572 "RandomTranslation"
573 ).set(True)
574 super().__init__(seed=seed, force_generator=True, **kwargs)
575 self.height_factor = height_factor
576 if isinstance(height_factor, (tuple, list)):
577 self.height_lower = height_factor[0]
578 self.height_upper = height_factor[1]
579 else:
580 self.height_lower = -height_factor
581 self.height_upper = height_factor
582 if self.height_upper < self.height_lower:
583 raise ValueError(
584 "`height_factor` cannot have upper bound less than "
585 f"lower bound, got {height_factor}"
586 )
587 if abs(self.height_lower) > 1.0 or abs(self.height_upper) > 1.0:
588 raise ValueError(
589 "`height_factor` argument must have values between [-1, 1]. "
590 f"Received: height_factor={height_factor}"
591 )
593 self.width_factor = width_factor
594 if isinstance(width_factor, (tuple, list)):
595 self.width_lower = width_factor[0]
596 self.width_upper = width_factor[1]
597 else:
598 self.width_lower = -width_factor
599 self.width_upper = width_factor
600 if self.width_upper < self.width_lower:
601 raise ValueError(
602 "`width_factor` cannot have upper bound less than "
603 f"lower bound, got {width_factor}"
604 )
605 if abs(self.width_lower) > 1.0 or abs(self.width_upper) > 1.0:
606 raise ValueError(
607 "`width_factor` must have values between [-1, 1], "
608 f"got {width_factor}"
609 )
611 check_fill_mode_and_interpolation(fill_mode, interpolation)
613 self.fill_mode = fill_mode
614 self.fill_value = fill_value
615 self.interpolation = interpolation
616 self.seed = seed
618 def call(self, inputs, training=True):
619 inputs = convert_inputs(inputs, self.compute_dtype)
621 def random_translated_inputs(inputs):
622 """Translated inputs with random ops."""
623 # The transform op only accepts rank 4 inputs,
624 # so if we have an unbatched image,
625 # we need to temporarily expand dims to a batch.
626 original_shape = inputs.shape
627 unbatched = inputs.shape.rank == 3
628 if unbatched:
629 inputs = tf.expand_dims(inputs, 0)
631 inputs_shape = tf.shape(inputs)
632 batch_size = inputs_shape[0]
633 img_hd = tf.cast(inputs_shape[H_AXIS], tf.float32)
634 img_wd = tf.cast(inputs_shape[W_AXIS], tf.float32)
635 height_translate = self._random_generator.random_uniform(
636 shape=[batch_size, 1],
637 minval=self.height_lower,
638 maxval=self.height_upper,
639 dtype=tf.float32,
640 )
641 height_translate = height_translate * img_hd
642 width_translate = self._random_generator.random_uniform(
643 shape=[batch_size, 1],
644 minval=self.width_lower,
645 maxval=self.width_upper,
646 dtype=tf.float32,
647 )
648 width_translate = width_translate * img_wd
649 translations = tf.cast(
650 tf.concat([width_translate, height_translate], axis=1),
651 dtype=tf.float32,
652 )
653 output = transform(
654 inputs,
655 get_translation_matrix(translations),
656 interpolation=self.interpolation,
657 fill_mode=self.fill_mode,
658 fill_value=self.fill_value,
659 )
660 if unbatched:
661 output = tf.squeeze(output, 0)
662 output.set_shape(original_shape)
663 return output
665 if training:
666 return random_translated_inputs(inputs)
667 else:
668 return inputs
670 def compute_output_shape(self, input_shape):
671 return input_shape
673 def get_config(self):
674 config = {
675 "height_factor": self.height_factor,
676 "width_factor": self.width_factor,
677 "fill_mode": self.fill_mode,
678 "fill_value": self.fill_value,
679 "interpolation": self.interpolation,
680 "seed": self.seed,
681 }
682 base_config = super().get_config()
683 return dict(list(base_config.items()) + list(config.items()))
686def get_translation_matrix(translations, name=None):
687 """Returns projective transform(s) for the given translation(s).
689 Args:
690 translations: A matrix of 2-element lists representing `[dx, dy]`
691 to translate for each image (for a batch of images).
692 name: The name of the op.
694 Returns:
695 A tensor of shape `(num_images, 8)` projective transforms
696 which can be given to `transform`.
697 """
698 with backend.name_scope(name or "translation_matrix"):
699 num_translations = tf.shape(translations)[0]
700 # The translation matrix looks like:
701 # [[1 0 -dx]
702 # [0 1 -dy]
703 # [0 0 1]]
704 # where the last entry is implicit.
705 # Translation matrices are always float32.
706 return tf.concat(
707 values=[
708 tf.ones((num_translations, 1), tf.float32),
709 tf.zeros((num_translations, 1), tf.float32),
710 -translations[:, 0, None],
711 tf.zeros((num_translations, 1), tf.float32),
712 tf.ones((num_translations, 1), tf.float32),
713 -translations[:, 1, None],
714 tf.zeros((num_translations, 2), tf.float32),
715 ],
716 axis=1,
717 )
720def transform(
721 images,
722 transforms,
723 fill_mode="reflect",
724 fill_value=0.0,
725 interpolation="bilinear",
726 output_shape=None,
727 name=None,
728):
729 """Applies the given transform(s) to the image(s).
731 Args:
732 images: A tensor of shape
733 `(num_images, num_rows, num_columns, num_channels)` (NHWC).
734 The rank must be statically known
735 (the shape is not `TensorShape(None)`).
736 transforms: Projective transform matrix/matrices.
737 A vector of length 8 or tensor of size N x 8.
738 If one row of transforms is [a0, a1, a2, b0, b1, b2,
739 c0, c1], then it maps the *output* point `(x, y)`
740 to a transformed *input* point
741 `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where
742 `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the
743 transform mapping input points to output points.
744 Note that gradients are not backpropagated
745 into transformation parameters.
746 fill_mode: Points outside the boundaries of the input are filled
747 according to the given mode
748 (one of `{"constant", "reflect", "wrap", "nearest"}`).
749 fill_value: a float represents the value to be filled outside
750 the boundaries when `fill_mode="constant"`.
751 interpolation: Interpolation mode. Supported values: `"nearest"`,
752 `"bilinear"`.
753 output_shape: Output dimension after the transform, `[height, width]`.
754 If `None`, output is the same size as input image.
755 name: The name of the op.
757 Fill mode behavior for each valid value is as follows:
759 - `"reflect"`: `(d c b a | a b c d | d c b a)`
760 The input is extended by reflecting about the edge of the last pixel.
762 - `"constant"`: `(k k k k | a b c d | k k k k)`
763 The input is extended by filling all
764 values beyond the edge with the same constant value k = 0.
766 - `"wrap"`: `(a b c d | a b c d | a b c d)`
767 The input is extended by wrapping around to the opposite edge.
769 - `"nearest"`: `(a a a a | a b c d | d d d d)`
770 The input is extended by the nearest pixel.
772 Input shape:
773 4D tensor with shape: `(samples, height, width, channels)`,
774 in `"channels_last"` format.
776 Output shape:
777 4D tensor with shape: `(samples, height, width, channels)`,
778 in `"channels_last"` format.
780 Returns:
781 Image(s) with the same type and shape as `images`, with the given
782 transform(s) applied. Transformed coordinates outside of the input image
783 will be filled with zeros.
784 """
785 with backend.name_scope(name or "transform"):
786 if output_shape is None:
787 output_shape = tf.shape(images)[1:3]
788 if not tf.executing_eagerly():
789 output_shape_value = tf.get_static_value(output_shape)
790 if output_shape_value is not None:
791 output_shape = output_shape_value
793 output_shape = tf.convert_to_tensor(
794 output_shape, tf.int32, name="output_shape"
795 )
797 if not output_shape.get_shape().is_compatible_with([2]):
798 raise ValueError(
799 "output_shape must be a 1-D Tensor of 2 elements: "
800 "new_height, new_width, instead got "
801 f"output_shape={output_shape}"
802 )
804 fill_value = tf.convert_to_tensor(
805 fill_value, tf.float32, name="fill_value"
806 )
808 return tf.raw_ops.ImageProjectiveTransformV3(
809 images=images,
810 output_shape=output_shape,
811 fill_value=fill_value,
812 transforms=transforms,
813 fill_mode=fill_mode.upper(),
814 interpolation=interpolation.upper(),
815 )
818def get_rotation_matrix(angles, image_height, image_width, name=None):
819 """Returns projective transform(s) for the given angle(s).
821 Args:
822 angles: A scalar angle to rotate all images by,
823 or (for batches of images) a vector with an angle to
824 rotate each image in the batch. The rank must be
825 statically known (the shape is not `TensorShape(None)`).
826 image_height: Height of the image(s) to be transformed.
827 image_width: Width of the image(s) to be transformed.
828 name: The name of the op.
830 Returns:
831 A tensor of shape (num_images, 8).
832 Projective transforms which can be given
833 to operation `image_projective_transform_v2`.
834 If one row of transforms is
835 [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point
836 `(x, y)` to a transformed *input* point
837 `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`,
838 where `k = c0 x + c1 y + 1`.
839 """
840 with backend.name_scope(name or "rotation_matrix"):
841 x_offset = (
842 (image_width - 1)
843 - (
844 tf.cos(angles) * (image_width - 1)
845 - tf.sin(angles) * (image_height - 1)
846 )
847 ) / 2.0
848 y_offset = (
849 (image_height - 1)
850 - (
851 tf.sin(angles) * (image_width - 1)
852 + tf.cos(angles) * (image_height - 1)
853 )
854 ) / 2.0
855 num_angles = tf.shape(angles)[0]
856 return tf.concat(
857 values=[
858 tf.cos(angles)[:, None],
859 -tf.sin(angles)[:, None],
860 x_offset[:, None],
861 tf.sin(angles)[:, None],
862 tf.cos(angles)[:, None],
863 y_offset[:, None],
864 tf.zeros((num_angles, 2), tf.float32),
865 ],
866 axis=1,
867 )
870@keras_export(
871 "keras.layers.RandomRotation",
872 "keras.layers.experimental.preprocessing.RandomRotation",
873 v1=[],
874)
875class RandomRotation(base_layer.BaseRandomLayer):
876 """A preprocessing layer which randomly rotates images during training.
878 This layer will apply random rotations to each image, filling empty space
879 according to `fill_mode`.
881 By default, random rotations are only applied during training.
882 At inference time, the layer does nothing. If you need to apply random
883 rotations at inference time, set `training` to True when calling the layer.
885 Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and
886 of integer or floating point dtype.
887 By default, the layer will output floats.
889 For an overview and full list of preprocessing layers, see the preprocessing
890 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
892 Input shape:
893 3D (unbatched) or 4D (batched) tensor with shape:
894 `(..., height, width, channels)`, in `"channels_last"` format
896 Output shape:
897 3D (unbatched) or 4D (batched) tensor with shape:
898 `(..., height, width, channels)`, in `"channels_last"` format
900 Args:
901 factor: a float represented as fraction of 2 Pi, or a tuple of size 2
902 representing lower and upper bound for rotating clockwise and
903 counter-clockwise. A positive values means rotating
904 counter clock-wise,
905 while a negative value means clock-wise.
906 When represented as a single
907 float, this value is used for both the upper and lower bound.
908 For instance, `factor=(-0.2, 0.3)`
909 results in an output rotation by a random
910 amount in the range `[-20% * 2pi, 30% * 2pi]`.
911 `factor=0.2` results in an
912 output rotating by a random amount
913 in the range `[-20% * 2pi, 20% * 2pi]`.
914 fill_mode: Points outside the boundaries of the input are filled
915 according to the given mode
916 (one of `{"constant", "reflect", "wrap", "nearest"}`).
917 - *reflect*: `(d c b a | a b c d | d c b a)`
918 The input is extended by reflecting about
919 the edge of the last pixel.
920 - *constant*: `(k k k k | a b c d | k k k k)`
921 The input is extended by
922 filling all values beyond the edge with
923 the same constant value k = 0.
924 - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by
925 wrapping around to the opposite edge.
926 - *nearest*: `(a a a a | a b c d | d d d d)`
927 The input is extended by the nearest pixel.
928 interpolation: Interpolation mode. Supported values: `"nearest"`,
929 `"bilinear"`.
930 seed: Integer. Used to create a random seed.
931 fill_value: a float represents the value to be filled outside
932 the boundaries when `fill_mode="constant"`.
933 """
935 def __init__(
936 self,
937 factor,
938 fill_mode="reflect",
939 interpolation="bilinear",
940 seed=None,
941 fill_value=0.0,
942 **kwargs,
943 ):
944 base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomRotation").set(
945 True
946 )
947 super().__init__(seed=seed, force_generator=True, **kwargs)
948 self.factor = factor
949 if isinstance(factor, (tuple, list)):
950 self.lower = factor[0]
951 self.upper = factor[1]
952 else:
953 self.lower = -factor
954 self.upper = factor
955 if self.upper < self.lower:
956 raise ValueError(
957 "`factor` argument cannot have a negative value. "
958 f"Received: factor={factor}"
959 )
960 check_fill_mode_and_interpolation(fill_mode, interpolation)
961 self.fill_mode = fill_mode
962 self.fill_value = fill_value
963 self.interpolation = interpolation
964 self.seed = seed
966 def call(self, inputs, training=True):
967 inputs = convert_inputs(inputs, self.compute_dtype)
969 def random_rotated_inputs(inputs):
970 """Rotated inputs with random ops."""
971 original_shape = inputs.shape
972 unbatched = inputs.shape.rank == 3
973 # The transform op only accepts rank 4 inputs,
974 # so if we have an unbatched image,
975 # we need to temporarily expand dims to a batch.
976 if unbatched:
977 inputs = tf.expand_dims(inputs, 0)
978 inputs_shape = tf.shape(inputs)
979 batch_size = inputs_shape[0]
980 img_hd = tf.cast(inputs_shape[H_AXIS], tf.float32)
981 img_wd = tf.cast(inputs_shape[W_AXIS], tf.float32)
982 min_angle = self.lower * 2.0 * np.pi
983 max_angle = self.upper * 2.0 * np.pi
984 angles = self._random_generator.random_uniform(
985 shape=[batch_size], minval=min_angle, maxval=max_angle
986 )
987 output = transform(
988 inputs,
989 get_rotation_matrix(angles, img_hd, img_wd),
990 fill_mode=self.fill_mode,
991 fill_value=self.fill_value,
992 interpolation=self.interpolation,
993 )
994 if unbatched:
995 output = tf.squeeze(output, 0)
996 output.set_shape(original_shape)
997 return output
999 if training:
1000 return random_rotated_inputs(inputs)
1001 else:
1002 return inputs
1004 def compute_output_shape(self, input_shape):
1005 return input_shape
1007 def get_config(self):
1008 config = {
1009 "factor": self.factor,
1010 "fill_mode": self.fill_mode,
1011 "fill_value": self.fill_value,
1012 "interpolation": self.interpolation,
1013 "seed": self.seed,
1014 }
1015 base_config = super().get_config()
1016 return dict(list(base_config.items()) + list(config.items()))
1019@keras_export(
1020 "keras.layers.RandomZoom",
1021 "keras.layers.experimental.preprocessing.RandomZoom",
1022 v1=[],
1023)
1024class RandomZoom(base_layer.BaseRandomLayer):
1025 """A preprocessing layer which randomly zooms images during training.
1027 This layer will randomly zoom in or out on each axis of an image
1028 independently, filling empty space according to `fill_mode`.
1030 Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and
1031 of integer or floating point dtype.
1032 By default, the layer will output floats.
1034 For an overview and full list of preprocessing layers, see the preprocessing
1035 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
1037 Args:
1038 height_factor: a float represented as fraction of value,
1039 or a tuple of size 2 representing lower and upper bound
1040 for zooming vertically. When represented as a single float,
1041 this value is used for both the upper and
1042 lower bound. A positive value means zooming out,
1043 while a negative value
1044 means zooming in. For instance, `height_factor=(0.2, 0.3)`
1045 result in an output zoomed out by a random amount
1046 in the range `[+20%, +30%]`.
1047 `height_factor=(-0.3, -0.2)` result in an output zoomed
1048 in by a random amount in the range `[+20%, +30%]`.
1049 width_factor: a float represented as fraction of value,
1050 or a tuple of size 2 representing lower and upper bound
1051 for zooming horizontally. When
1052 represented as a single float, this value is used
1053 for both the upper and
1054 lower bound. For instance, `width_factor=(0.2, 0.3)`
1055 result in an output
1056 zooming out between 20% to 30%.
1057 `width_factor=(-0.3, -0.2)` result in an
1058 output zooming in between 20% to 30%. `None` means
1059 i.e., zooming vertical and horizontal directions
1060 by preserving the aspect ratio. Defaults to `None`.
1061 fill_mode: Points outside the boundaries of the input are
1062 filled according to the given mode
1063 (one of `{"constant", "reflect", "wrap", "nearest"}`).
1064 - *reflect*: `(d c b a | a b c d | d c b a)`
1065 The input is extended by reflecting about
1066 the edge of the last pixel.
1067 - *constant*: `(k k k k | a b c d | k k k k)`
1068 The input is extended by filling all values beyond
1069 the edge with the same constant value k = 0.
1070 - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by
1071 wrapping around to the opposite edge.
1072 - *nearest*: `(a a a a | a b c d | d d d d)`
1073 The input is extended by the nearest pixel.
1074 interpolation: Interpolation mode. Supported values: `"nearest"`,
1075 `"bilinear"`.
1076 seed: Integer. Used to create a random seed.
1077 fill_value: a float represents the value to be filled outside
1078 the boundaries when `fill_mode="constant"`.
1080 Example:
1082 >>> input_img = np.random.random((32, 224, 224, 3))
1083 >>> layer = tf.keras.layers.RandomZoom(.5, .2)
1084 >>> out_img = layer(input_img)
1085 >>> out_img.shape
1086 TensorShape([32, 224, 224, 3])
1088 Input shape:
1089 3D (unbatched) or 4D (batched) tensor with shape:
1090 `(..., height, width, channels)`, in `"channels_last"` format.
1092 Output shape:
1093 3D (unbatched) or 4D (batched) tensor with shape:
1094 `(..., height, width, channels)`, in `"channels_last"` format.
1095 """
1097 def __init__(
1098 self,
1099 height_factor,
1100 width_factor=None,
1101 fill_mode="reflect",
1102 interpolation="bilinear",
1103 seed=None,
1104 fill_value=0.0,
1105 **kwargs,
1106 ):
1107 base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomZoom").set(
1108 True
1109 )
1110 super().__init__(seed=seed, force_generator=True, **kwargs)
1111 self.height_factor = height_factor
1112 if isinstance(height_factor, (tuple, list)):
1113 self.height_lower = height_factor[0]
1114 self.height_upper = height_factor[1]
1115 else:
1116 self.height_lower = -height_factor
1117 self.height_upper = height_factor
1119 if abs(self.height_lower) > 1.0 or abs(self.height_upper) > 1.0:
1120 raise ValueError(
1121 "`height_factor` argument must have values between [-1, 1]. "
1122 f"Received: height_factor={height_factor}"
1123 )
1125 self.width_factor = width_factor
1126 if width_factor is not None:
1127 if isinstance(width_factor, (tuple, list)):
1128 self.width_lower = width_factor[0]
1129 self.width_upper = width_factor[1]
1130 else:
1131 self.width_lower = -width_factor
1132 self.width_upper = width_factor
1134 if self.width_lower < -1.0 or self.width_upper < -1.0:
1135 raise ValueError(
1136 "`width_factor` argument must have values larger than -1. "
1137 f"Received: width_factor={width_factor}"
1138 )
1140 check_fill_mode_and_interpolation(fill_mode, interpolation)
1142 self.fill_mode = fill_mode
1143 self.fill_value = fill_value
1144 self.interpolation = interpolation
1145 self.seed = seed
1147 def call(self, inputs, training=True):
1148 inputs = convert_inputs(inputs, self.compute_dtype)
1150 def random_zoomed_inputs(inputs):
1151 """Zoomed inputs with random ops."""
1152 original_shape = inputs.shape
1153 unbatched = inputs.shape.rank == 3
1154 # The transform op only accepts rank 4 inputs,
1155 # so if we have an unbatched image,
1156 # we need to temporarily expand dims to a batch.
1157 if unbatched:
1158 inputs = tf.expand_dims(inputs, 0)
1159 inputs_shape = tf.shape(inputs)
1160 batch_size = inputs_shape[0]
1161 img_hd = tf.cast(inputs_shape[H_AXIS], tf.float32)
1162 img_wd = tf.cast(inputs_shape[W_AXIS], tf.float32)
1163 height_zoom = self._random_generator.random_uniform(
1164 shape=[batch_size, 1],
1165 minval=1.0 + self.height_lower,
1166 maxval=1.0 + self.height_upper,
1167 )
1168 if self.width_factor is not None:
1169 width_zoom = self._random_generator.random_uniform(
1170 shape=[batch_size, 1],
1171 minval=1.0 + self.width_lower,
1172 maxval=1.0 + self.width_upper,
1173 )
1174 else:
1175 width_zoom = height_zoom
1176 zooms = tf.cast(
1177 tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32
1178 )
1179 output = transform(
1180 inputs,
1181 get_zoom_matrix(zooms, img_hd, img_wd),
1182 fill_mode=self.fill_mode,
1183 fill_value=self.fill_value,
1184 interpolation=self.interpolation,
1185 )
1186 if unbatched:
1187 output = tf.squeeze(output, 0)
1188 output.set_shape(original_shape)
1189 return output
1191 if training:
1192 return random_zoomed_inputs(inputs)
1193 else:
1194 return inputs
1196 def compute_output_shape(self, input_shape):
1197 return input_shape
1199 def get_config(self):
1200 config = {
1201 "height_factor": self.height_factor,
1202 "width_factor": self.width_factor,
1203 "fill_mode": self.fill_mode,
1204 "fill_value": self.fill_value,
1205 "interpolation": self.interpolation,
1206 "seed": self.seed,
1207 }
1208 base_config = super().get_config()
1209 return dict(list(base_config.items()) + list(config.items()))
1212def get_zoom_matrix(zooms, image_height, image_width, name=None):
1213 """Returns projective transform(s) for the given zoom(s).
1215 Args:
1216 zooms: A matrix of 2-element lists representing `[zx, zy]`
1217 to zoom for each image (for a batch of images).
1218 image_height: Height of the image(s) to be transformed.
1219 image_width: Width of the image(s) to be transformed.
1220 name: The name of the op.
1222 Returns:
1223 A tensor of shape `(num_images, 8)`. Projective transforms which can be
1224 given to operation `image_projective_transform_v2`.
1225 If one row of transforms is
1226 `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps the *output* point
1227 `(x, y)` to a transformed *input* point
1228 `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`,
1229 where `k = c0 x + c1 y + 1`.
1230 """
1231 with backend.name_scope(name or "zoom_matrix"):
1232 num_zooms = tf.shape(zooms)[0]
1233 # The zoom matrix looks like:
1234 # [[zx 0 0]
1235 # [0 zy 0]
1236 # [0 0 1]]
1237 # where the last entry is implicit.
1238 # Zoom matrices are always float32.
1239 x_offset = ((image_width - 1.0) / 2.0) * (1.0 - zooms[:, 0, None])
1240 y_offset = ((image_height - 1.0) / 2.0) * (1.0 - zooms[:, 1, None])
1241 return tf.concat(
1242 values=[
1243 zooms[:, 0, None],
1244 tf.zeros((num_zooms, 1), tf.float32),
1245 x_offset,
1246 tf.zeros((num_zooms, 1), tf.float32),
1247 zooms[:, 1, None],
1248 y_offset,
1249 tf.zeros((num_zooms, 2), tf.float32),
1250 ],
1251 axis=1,
1252 )
1255@keras_export(
1256 "keras.layers.RandomContrast",
1257 "keras.layers.experimental.preprocessing.RandomContrast",
1258 v1=[],
1259)
1260class RandomContrast(base_layer.BaseRandomLayer):
1261 """A preprocessing layer which randomly adjusts contrast during training.
1263 This layer will randomly adjust the contrast of an image or images
1264 by a random factor. Contrast is adjusted independently
1265 for each channel of each image during training.
1267 For each channel, this layer computes the mean of the image pixels in the
1268 channel and then adjusts each component `x` of each pixel to
1269 `(x - mean) * contrast_factor + mean`.
1271 Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and
1272 in integer or floating point dtype.
1273 By default, the layer will output floats.
1274 The output value will be clipped to the range `[0, 255]`, the valid
1275 range of RGB colors.
1277 For an overview and full list of preprocessing layers, see the preprocessing
1278 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
1280 Input shape:
1281 3D (unbatched) or 4D (batched) tensor with shape:
1282 `(..., height, width, channels)`, in `"channels_last"` format.
1284 Output shape:
1285 3D (unbatched) or 4D (batched) tensor with shape:
1286 `(..., height, width, channels)`, in `"channels_last"` format.
1288 Args:
1289 factor: a positive float represented as fraction of value, or a tuple of
1290 size 2 representing lower and upper bound.
1291 When represented as a single float, lower = upper.
1292 The contrast factor will be randomly picked between
1293 `[1.0 - lower, 1.0 + upper]`. For any pixel x in the channel,
1294 the output will be `(x - mean) * factor + mean`
1295 where `mean` is the mean value of the channel.
1296 seed: Integer. Used to create a random seed.
1297 """
1299 def __init__(self, factor, seed=None, **kwargs):
1300 base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomContrast").set(
1301 True
1302 )
1303 super().__init__(seed=seed, force_generator=True, **kwargs)
1304 self.factor = factor
1305 if isinstance(factor, (tuple, list)):
1306 self.lower = factor[0]
1307 self.upper = factor[1]
1308 else:
1309 self.lower = self.upper = factor
1310 if self.lower < 0.0 or self.upper < 0.0 or self.lower > 1.0:
1311 raise ValueError(
1312 "`factor` argument cannot have negative values or values "
1313 "greater than 1."
1314 f"Received: factor={factor}"
1315 )
1316 self.seed = seed
1318 def call(self, inputs, training=True):
1319 inputs = convert_inputs(inputs, self.compute_dtype)
1321 def random_contrasted_inputs(inputs):
1322 seed = self._random_generator.make_seed_for_stateless_op()
1323 if seed is not None:
1324 output = tf.image.stateless_random_contrast(
1325 inputs, 1.0 - self.lower, 1.0 + self.upper, seed=seed
1326 )
1327 else:
1328 output = tf.image.random_contrast(
1329 inputs,
1330 1.0 - self.lower,
1331 1.0 + self.upper,
1332 seed=self._random_generator.make_legacy_seed(),
1333 )
1334 output = tf.clip_by_value(output, 0, 255)
1335 output.set_shape(inputs.shape)
1336 return output
1338 if training:
1339 return random_contrasted_inputs(inputs)
1340 else:
1341 return inputs
1343 def compute_output_shape(self, input_shape):
1344 return input_shape
1346 def get_config(self):
1347 config = {
1348 "factor": self.factor,
1349 "seed": self.seed,
1350 }
1351 base_config = super().get_config()
1352 return dict(list(base_config.items()) + list(config.items()))
1355@keras_export("keras.layers.RandomBrightness", v1=[])
1356class RandomBrightness(base_layer.BaseRandomLayer):
1357 """A preprocessing layer which randomly adjusts brightness during training.
1359 This layer will randomly increase/reduce the brightness for the input RGB
1360 images. At inference time, the output will be identical to the input.
1361 Call the layer with `training=True` to adjust the brightness of the input.
1363 Note that different brightness adjustment factors
1364 will be apply to each the images in the batch.
1366 For an overview and full list of preprocessing layers, see the preprocessing
1367 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
1369 Args:
1370 factor: Float or a list/tuple of 2 floats between -1.0 and 1.0. The
1371 factor is used to determine the lower bound and upper bound of the
1372 brightness adjustment. A float value will be chosen randomly between
1373 the limits. When -1.0 is chosen, the output image will be black, and
1374 when 1.0 is chosen, the image will be fully white.
1375 When only one float is provided, eg, 0.2,
1376 then -0.2 will be used for lower bound and 0.2
1377 will be used for upper bound.
1378 value_range: Optional list/tuple of 2 floats
1379 for the lower and upper limit
1380 of the values of the input data.
1381 To make no change, use [0.0, 1.0], e.g., if the image input
1382 has been scaled before this layer. Defaults to [0.0, 255.0].
1383 The brightness adjustment will be scaled to this range, and the
1384 output values will be clipped to this range.
1385 seed: optional integer, for fixed RNG behavior.
1387 Inputs: 3D (HWC) or 4D (NHWC) tensor, with float or int dtype. Input pixel
1388 values can be of any range (e.g. `[0., 1.)` or `[0, 255]`)
1390 Output: 3D (HWC) or 4D (NHWC) tensor with brightness adjusted based on the
1391 `factor`. By default, the layer will output floats.
1392 The output value will be clipped to the range `[0, 255]`,
1393 the valid range of RGB colors, and
1394 rescaled based on the `value_range` if needed.
1396 Sample usage:
1398 ```python
1399 random_bright = tf.keras.layers.RandomBrightness(factor=0.2)
1401 # An image with shape [2, 2, 3]
1402 image = [[[1, 2, 3], [4 ,5 ,6]], [[7, 8, 9], [10, 11, 12]]]
1404 # Assume we randomly select the factor to be 0.1, then it will apply
1405 # 0.1 * 255 to all the channel
1406 output = random_bright(image, training=True)
1408 # output will be int64 with 25.5 added to each channel and round down.
1409 tf.Tensor([[[26.5, 27.5, 28.5]
1410 [29.5, 30.5, 31.5]]
1411 [[32.5, 33.5, 34.5]
1412 [35.5, 36.5, 37.5]]],
1413 shape=(2, 2, 3), dtype=int64)
1414 ```
1415 """
1417 _FACTOR_VALIDATION_ERROR = (
1418 "The `factor` argument should be a number (or a list of two numbers) "
1419 "in the range [-1.0, 1.0]. "
1420 )
1421 _VALUE_RANGE_VALIDATION_ERROR = (
1422 "The `value_range` argument should be a list of two numbers. "
1423 )
1425 def __init__(self, factor, value_range=(0, 255), seed=None, **kwargs):
1426 base_preprocessing_layer.keras_kpl_gauge.get_cell(
1427 "RandomBrightness"
1428 ).set(True)
1429 super().__init__(seed=seed, force_generator=True, **kwargs)
1430 self._set_factor(factor)
1431 self._set_value_range(value_range)
1432 self._seed = seed
1434 def _set_value_range(self, value_range):
1435 if not isinstance(value_range, (tuple, list)):
1436 raise ValueError(
1437 self._VALUE_RANGE_VALIDATION_ERROR + f"Got {value_range}"
1438 )
1439 if len(value_range) != 2:
1440 raise ValueError(
1441 self._VALUE_RANGE_VALIDATION_ERROR + f"Got {value_range}"
1442 )
1443 self._value_range = sorted(value_range)
1445 def _set_factor(self, factor):
1446 if isinstance(factor, (tuple, list)):
1447 if len(factor) != 2:
1448 raise ValueError(
1449 self._FACTOR_VALIDATION_ERROR + f"Got {factor}"
1450 )
1451 self._check_factor_range(factor[0])
1452 self._check_factor_range(factor[1])
1453 self._factor = sorted(factor)
1454 elif isinstance(factor, (int, float)):
1455 self._check_factor_range(factor)
1456 factor = abs(factor)
1457 self._factor = [-factor, factor]
1458 else:
1459 raise ValueError(self._FACTOR_VALIDATION_ERROR + f"Got {factor}")
1461 def _check_factor_range(self, input_number):
1462 if input_number > 1.0 or input_number < -1.0:
1463 raise ValueError(
1464 self._FACTOR_VALIDATION_ERROR + f"Got {input_number}"
1465 )
1467 def call(self, inputs, training=True):
1468 inputs = convert_inputs(inputs, dtype=self.compute_dtype)
1469 if training:
1470 return self._brightness_adjust(inputs)
1471 else:
1472 return inputs
1474 def _brightness_adjust(self, images):
1475 rank = images.shape.rank
1476 if rank == 3:
1477 rgb_delta_shape = (1, 1, 1)
1478 elif rank == 4:
1479 # Keep only the batch dim. This will ensure to have same adjustment
1480 # with in one image, but different across the images.
1481 rgb_delta_shape = [tf.shape(images)[0], 1, 1, 1]
1482 else:
1483 raise ValueError(
1484 "Expected the input image to be rank 3 or 4. Got "
1485 f"inputs.shape = {images.shape}"
1486 )
1487 rgb_delta = self._random_generator.random_uniform(
1488 shape=rgb_delta_shape,
1489 minval=self._factor[0],
1490 maxval=self._factor[1],
1491 )
1492 rgb_delta = rgb_delta * (self._value_range[1] - self._value_range[0])
1493 rgb_delta = tf.cast(rgb_delta, images.dtype)
1494 images += rgb_delta
1495 return tf.clip_by_value(
1496 images, self._value_range[0], self._value_range[1]
1497 )
1499 def get_config(self):
1500 config = {
1501 "factor": self._factor,
1502 "value_range": self._value_range,
1503 "seed": self._seed,
1504 }
1505 base_config = super().get_config()
1506 return dict(list(base_config.items()) + list(config.items()))
1509@keras_export(
1510 "keras.layers.RandomHeight",
1511 "keras.layers.experimental.preprocessing.RandomHeight",
1512 v1=[],
1513)
1514class RandomHeight(base_layer.BaseRandomLayer):
1515 """A preprocessing layer which randomly varies image height during training.
1517 This layer adjusts the height of a batch of images by a random factor.
1518 The input should be a 3D (unbatched) or 4D (batched) tensor in the
1519 `"channels_last"` image data format. Input pixel values can be of any range
1520 (e.g. `[0., 1.)` or `[0, 255]`) and of integer or floating point dtype. By
1521 default, the layer will output floats.
1524 By default, this layer is inactive during inference.
1526 For an overview and full list of preprocessing layers, see the preprocessing
1527 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
1529 Args:
1530 factor: A positive float (fraction of original height),
1531 or a tuple of size 2 representing lower and upper bound
1532 for resizing vertically. When represented as a single float,
1533 this value is used for both the upper and
1534 lower bound. For instance, `factor=(0.2, 0.3)` results
1535 in an output with
1536 height changed by a random amount in the range `[20%, 30%]`.
1537 `factor=(-0.2, 0.3)` results in an output with height
1538 changed by a random amount in the range `[-20%, +30%]`.
1539 `factor=0.2` results in an output with
1540 height changed by a random amount in the range `[-20%, +20%]`.
1541 interpolation: String, the interpolation method.
1542 Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`,
1543 `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`.
1544 Defaults to `"bilinear"`.
1545 seed: Integer. Used to create a random seed.
1547 Input shape:
1548 3D (unbatched) or 4D (batched) tensor with shape:
1549 `(..., height, width, channels)`, in `"channels_last"` format.
1551 Output shape:
1552 3D (unbatched) or 4D (batched) tensor with shape:
1553 `(..., random_height, width, channels)`.
1554 """
1556 def __init__(self, factor, interpolation="bilinear", seed=None, **kwargs):
1557 base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomHeight").set(
1558 True
1559 )
1560 super().__init__(seed=seed, force_generator=True, **kwargs)
1561 self.factor = factor
1562 if isinstance(factor, (tuple, list)):
1563 self.height_lower = factor[0]
1564 self.height_upper = factor[1]
1565 else:
1566 self.height_lower = -factor
1567 self.height_upper = factor
1569 if self.height_upper < self.height_lower:
1570 raise ValueError(
1571 "`factor` argument cannot have an upper bound lesser than the "
1572 f"lower bound. Received: factor={factor}"
1573 )
1574 if self.height_lower < -1.0 or self.height_upper < -1.0:
1575 raise ValueError(
1576 "`factor` argument must have values larger than -1. "
1577 f"Received: factor={factor}"
1578 )
1579 self.interpolation = interpolation
1580 self._interpolation_method = image_utils.get_interpolation(
1581 interpolation
1582 )
1583 self.seed = seed
1585 def call(self, inputs, training=True):
1586 inputs = convert_inputs(inputs)
1588 def random_height_inputs(inputs):
1589 """Inputs height-adjusted with random ops."""
1590 inputs_shape = tf.shape(inputs)
1591 img_hd = tf.cast(inputs_shape[H_AXIS], tf.float32)
1592 img_wd = inputs_shape[W_AXIS]
1593 height_factor = self._random_generator.random_uniform(
1594 shape=[],
1595 minval=(1.0 + self.height_lower),
1596 maxval=(1.0 + self.height_upper),
1597 )
1598 adjusted_height = tf.cast(height_factor * img_hd, tf.int32)
1599 adjusted_size = tf.stack([adjusted_height, img_wd])
1600 output = tf.image.resize(
1601 images=inputs,
1602 size=adjusted_size,
1603 method=self._interpolation_method,
1604 )
1605 # tf.resize will output float32 regardless of input type.
1606 output = tf.cast(output, self.compute_dtype)
1607 output_shape = inputs.shape.as_list()
1608 output_shape[H_AXIS] = None
1609 output.set_shape(output_shape)
1610 return output
1612 if training:
1613 return random_height_inputs(inputs)
1614 else:
1615 return inputs
1617 def compute_output_shape(self, input_shape):
1618 input_shape = tf.TensorShape(input_shape).as_list()
1619 input_shape[H_AXIS] = None
1620 return tf.TensorShape(input_shape)
1622 def get_config(self):
1623 config = {
1624 "factor": self.factor,
1625 "interpolation": self.interpolation,
1626 "seed": self.seed,
1627 }
1628 base_config = super().get_config()
1629 return dict(list(base_config.items()) + list(config.items()))
1632@keras_export(
1633 "keras.layers.RandomWidth",
1634 "keras.layers.experimental.preprocessing.RandomWidth",
1635 v1=[],
1636)
1637class RandomWidth(base_layer.BaseRandomLayer):
1638 """A preprocessing layer which randomly varies image width during training.
1640 This layer will randomly adjusts the width of a batch of images of a
1641 batch of images by a random factor. The input should be a 3D (unbatched) or
1642 4D (batched) tensor in the `"channels_last"` image data format. Input pixel
1643 values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and of integer or
1644 floating point dtype. By default, the layer will output floats.
1646 By default, this layer is inactive during inference.
1648 For an overview and full list of preprocessing layers, see the preprocessing
1649 [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
1651 Args:
1652 factor: A positive float (fraction of original width),
1653 or a tuple of size 2 representing lower and upper bound
1654 for resizing vertically. When represented as a single float,
1655 this value is used for both the upper and
1656 lower bound. For instance, `factor=(0.2, 0.3)`
1657 results in an output with
1658 width changed by a random amount in the range `[20%, 30%]`.
1659 `factor=(-0.2, 0.3)` results in an output with width changed
1660 by a random amount in the range `[-20%, +30%]`.
1661 `factor=0.2` results in an output with width changed
1662 by a random amount in the range `[-20%, +20%]`.
1663 interpolation: String, the interpolation method.
1664 Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`,
1665 `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`.
1666 Defaults to `bilinear`.
1667 seed: Integer. Used to create a random seed.
1669 Input shape:
1670 3D (unbatched) or 4D (batched) tensor with shape:
1671 `(..., height, width, channels)`, in `"channels_last"` format.
1673 Output shape:
1674 3D (unbatched) or 4D (batched) tensor with shape:
1675 `(..., height, random_width, channels)`.
1676 """
1678 def __init__(self, factor, interpolation="bilinear", seed=None, **kwargs):
1679 base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomWidth").set(
1680 True
1681 )
1682 super().__init__(seed=seed, force_generator=True, **kwargs)
1683 self.factor = factor
1684 if isinstance(factor, (tuple, list)):
1685 self.width_lower = factor[0]
1686 self.width_upper = factor[1]
1687 else:
1688 self.width_lower = -factor
1689 self.width_upper = factor
1690 if self.width_upper < self.width_lower:
1691 raise ValueError(
1692 "`factor` argument cannot have an upper bound less than the "
1693 f"lower bound. Received: factor={factor}"
1694 )
1695 if self.width_lower < -1.0 or self.width_upper < -1.0:
1696 raise ValueError(
1697 "`factor` argument must have values larger than -1. "
1698 f"Received: factor={factor}"
1699 )
1700 self.interpolation = interpolation
1701 self._interpolation_method = image_utils.get_interpolation(
1702 interpolation
1703 )
1704 self.seed = seed
1706 def call(self, inputs, training=True):
1707 inputs = convert_inputs(inputs)
1709 def random_width_inputs(inputs):
1710 """Inputs width-adjusted with random ops."""
1711 inputs_shape = tf.shape(inputs)
1712 img_hd = inputs_shape[H_AXIS]
1713 img_wd = tf.cast(inputs_shape[W_AXIS], tf.float32)
1714 width_factor = self._random_generator.random_uniform(
1715 shape=[],
1716 minval=(1.0 + self.width_lower),
1717 maxval=(1.0 + self.width_upper),
1718 )
1719 adjusted_width = tf.cast(width_factor * img_wd, tf.int32)
1720 adjusted_size = tf.stack([img_hd, adjusted_width])
1721 output = tf.image.resize(
1722 images=inputs,
1723 size=adjusted_size,
1724 method=self._interpolation_method,
1725 )
1726 # tf.resize will output float32 regardless of input type.
1727 output = tf.cast(output, self.compute_dtype)
1728 output_shape = inputs.shape.as_list()
1729 output_shape[W_AXIS] = None
1730 output.set_shape(output_shape)
1731 return output
1733 if training:
1734 return random_width_inputs(inputs)
1735 else:
1736 return inputs
1738 def compute_output_shape(self, input_shape):
1739 input_shape = tf.TensorShape(input_shape).as_list()
1740 input_shape[W_AXIS] = None
1741 return tf.TensorShape(input_shape)
1743 def get_config(self):
1744 config = {
1745 "factor": self.factor,
1746 "interpolation": self.interpolation,
1747 "seed": self.seed,
1748 }
1749 base_config = super().get_config()
1750 return dict(list(base_config.items()) + list(config.items()))
1753def convert_inputs(inputs, dtype=None):
1754 if isinstance(inputs, dict):
1755 raise ValueError(
1756 "This layer can only process a tensor representing an image or "
1757 f"a batch of images. Received: type(inputs)={type(inputs)}."
1758 "If you need to pass a dict containing "
1759 "images, labels, and bounding boxes, you should "
1760 "instead use the preprocessing and augmentation layers "
1761 "from `keras_cv.layers`. See docs at "
1762 "https://keras.io/api/keras_cv/layers/"
1763 )
1764 inputs = utils.ensure_tensor(inputs, dtype=dtype)
1765 return inputs