Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorboard/plugins/audio/summary_v2.py: 21%
28 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Audio summaries and TensorFlow operations to create them, V2 versions.
17An audio summary stores a rank-2 string tensor of shape `[k, 2]`, where
18`k` is the number of audio clips recorded in the summary. Each row of
19the tensor is a pair `[encoded_audio, label]`, where `encoded_audio` is
20a binary string whose encoding is specified in the summary metadata, and
21`label` is a UTF-8 encoded Markdown string describing the audio clip.
22"""
25import functools
27from tensorboard.compat import tf2 as tf
28from tensorboard.plugins.audio import metadata
29from tensorboard.util import lazy_tensor_creator
32def audio(
33 name,
34 data,
35 sample_rate,
36 step=None,
37 max_outputs=3,
38 encoding=None,
39 description=None,
40):
41 """Write an audio summary.
43 Arguments:
44 name: A name for this summary. The summary tag used for TensorBoard will
45 be this name prefixed by any active name scopes.
46 data: A `Tensor` representing audio data with shape `[k, t, c]`,
47 where `k` is the number of audio clips, `t` is the number of
48 frames, and `c` is the number of channels. Elements should be
49 floating-point values in `[-1.0, 1.0]`. Any of the dimensions may
50 be statically unknown (i.e., `None`).
51 sample_rate: An `int` or rank-0 `int32` `Tensor` that represents the
52 sample rate, in Hz. Must be positive.
53 step: Explicit `int64`-castable monotonic step value for this summary. If
54 omitted, this defaults to `tf.summary.experimental.get_step()`, which must
55 not be None.
56 max_outputs: Optional `int` or rank-0 integer `Tensor`. At most this
57 many audio clips will be emitted at each step. When more than
58 `max_outputs` many clips are provided, the first `max_outputs`
59 many clips will be used and the rest silently discarded.
60 encoding: Optional constant `str` for the desired encoding. Only "wav"
61 is currently supported, but this is not guaranteed to remain the
62 default, so if you want "wav" in particular, set this explicitly.
63 description: Optional long-form description for this summary, as a
64 constant `str`. Markdown is supported. Defaults to empty.
66 Returns:
67 True on success, or false if no summary was emitted because no default
68 summary writer was available.
70 Raises:
71 ValueError: if a default writer exists, but no step was provided and
72 `tf.summary.experimental.get_step()` is None.
73 """
74 audio_ops = getattr(tf, "audio", None)
75 if audio_ops is None:
76 # Fallback for older versions of TF without tf.audio.
77 from tensorflow.python.ops import gen_audio_ops as audio_ops
79 if encoding is None:
80 encoding = "wav"
81 if encoding != "wav":
82 raise ValueError("Unknown encoding: %r" % encoding)
83 summary_metadata = metadata.create_summary_metadata(
84 display_name=None,
85 description=description,
86 encoding=metadata.Encoding.Value("WAV"),
87 )
88 inputs = [data, sample_rate, max_outputs, step]
89 # TODO(https://github.com/tensorflow/tensorboard/issues/2109): remove fallback
90 summary_scope = (
91 getattr(tf.summary.experimental, "summary_scope", None)
92 or tf.summary.summary_scope
93 )
94 with summary_scope(name, "audio_summary", values=inputs) as (tag, _):
95 # Defer audio encoding preprocessing by passing it as a callable to write(),
96 # wrapped in a LazyTensorCreator for backwards compatibility, so that we
97 # only do this work when summaries are actually written.
98 @lazy_tensor_creator.LazyTensorCreator
99 def lazy_tensor():
100 tf.debugging.assert_rank(data, 3)
101 tf.debugging.assert_non_negative(max_outputs)
102 limited_audio = data[:max_outputs]
103 encode_fn = functools.partial(
104 audio_ops.encode_wav, sample_rate=sample_rate
105 )
106 encoded_audio = tf.map_fn(
107 encode_fn,
108 limited_audio,
109 dtype=tf.string,
110 name="encode_each_audio",
111 )
112 # Workaround for map_fn returning float dtype for an empty elems input.
113 encoded_audio = tf.cond(
114 tf.shape(input=encoded_audio)[0] > 0,
115 lambda: encoded_audio,
116 lambda: tf.constant([], tf.string),
117 )
118 limited_labels = tf.tile([""], tf.shape(input=limited_audio)[:1])
119 return tf.transpose(a=tf.stack([encoded_audio, limited_labels]))
121 # To ensure that audio encoding logic is only executed when summaries
122 # are written, we pass callable to `tensor` parameter.
123 return tf.summary.write(
124 tag=tag, tensor=lazy_tensor, step=step, metadata=summary_metadata
125 )