Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorboard/plugins/audio/summary.py: 19%
57 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Audio summaries and TensorFlow operations to create them.
17An audio summary stores a rank-2 string tensor of shape `[k, 2]`, where
18`k` is the number of audio clips recorded in the summary. Each row of
19the tensor is a pair `[encoded_audio, label]`, where `encoded_audio` is
20a binary string whose encoding is specified in the summary metadata, and
21`label` is a UTF-8 encoded Markdown string describing the audio clip.
23NOTE: This module is in beta, and its API is subject to change, but the
24data that it stores to disk will be supported forever.
25"""
28import functools
29import warnings
31import numpy as np
33from tensorboard.util import encoder as encoder_util
34from tensorboard.plugins.audio import metadata
35from tensorboard.plugins.audio import summary_v2
38# Export V2 versions.
39audio = summary_v2.audio
42_LABELS_WARNING = (
43 "Labels on audio summaries are deprecated and will be removed. "
44 "See <https://github.com/tensorflow/tensorboard/issues/3513>."
45)
48def op(
49 name,
50 audio,
51 sample_rate,
52 labels=None,
53 max_outputs=3,
54 encoding=None,
55 display_name=None,
56 description=None,
57 collections=None,
58):
59 """Create a legacy audio summary op for use in a TensorFlow graph.
61 Arguments:
62 name: A unique name for the generated summary node.
63 audio: A `Tensor` representing audio data with shape `[k, t, c]`,
64 where `k` is the number of audio clips, `t` is the number of
65 frames, and `c` is the number of channels. Elements should be
66 floating-point values in `[-1.0, 1.0]`. Any of the dimensions may
67 be statically unknown (i.e., `None`).
68 sample_rate: An `int` or rank-0 `int32` `Tensor` that represents the
69 sample rate, in Hz. Must be positive.
70 labels: Deprecated. Do not set.
71 max_outputs: Optional `int` or rank-0 integer `Tensor`. At most this
72 many audio clips will be emitted at each step. When more than
73 `max_outputs` many clips are provided, the first `max_outputs`
74 many clips will be used and the rest silently discarded.
75 encoding: A constant `str` (not string tensor) indicating the
76 desired encoding. You can choose any format you like, as long as
77 it's "wav". Please see the "API compatibility note" below.
78 display_name: Optional name for this summary in TensorBoard, as a
79 constant `str`. Defaults to `name`.
80 description: Optional long-form description for this summary, as a
81 constant `str`. Markdown is supported. Defaults to empty.
82 collections: Optional list of graph collections keys. The new
83 summary op is added to these collections. Defaults to
84 `[Graph Keys.SUMMARIES]`.
86 Returns:
87 A TensorFlow summary op.
89 API compatibility note: The default value of the `encoding`
90 argument is _not_ guaranteed to remain unchanged across TensorBoard
91 versions. In the future, we will by default encode as FLAC instead of
92 as WAV. If the specific format is important to you, please provide a
93 file format explicitly.
94 """
95 if labels is not None:
96 warnings.warn(_LABELS_WARNING)
98 # TODO(nickfelt): remove on-demand imports once dep situation is fixed.
99 import tensorflow.compat.v1 as tf
101 if display_name is None:
102 display_name = name
103 if encoding is None:
104 encoding = "wav"
106 if encoding == "wav":
107 encoding = metadata.Encoding.Value("WAV")
108 encoder = functools.partial(
109 tf.audio.encode_wav, sample_rate=sample_rate
110 )
111 else:
112 raise ValueError("Unknown encoding: %r" % encoding)
114 with tf.name_scope(name), tf.control_dependencies(
115 [tf.assert_rank(audio, 3)]
116 ):
117 limited_audio = audio[:max_outputs]
118 encoded_audio = tf.map_fn(
119 encoder, limited_audio, dtype=tf.string, name="encode_each_audio"
120 )
121 if labels is None:
122 limited_labels = tf.tile([""], tf.shape(input=limited_audio)[:1])
123 else:
124 limited_labels = labels[:max_outputs]
125 tensor = tf.transpose(a=tf.stack([encoded_audio, limited_labels]))
126 summary_metadata = metadata.create_summary_metadata(
127 display_name=display_name,
128 description=description,
129 encoding=encoding,
130 )
131 return tf.summary.tensor_summary(
132 name="audio_summary",
133 tensor=tensor,
134 collections=collections,
135 summary_metadata=summary_metadata,
136 )
139def pb(
140 name,
141 audio,
142 sample_rate,
143 labels=None,
144 max_outputs=3,
145 encoding=None,
146 display_name=None,
147 description=None,
148):
149 """Create a legacy audio summary protobuf.
151 This behaves as if you were to create an `op` with the same arguments
152 (wrapped with constant tensors where appropriate) and then execute
153 that summary op in a TensorFlow session.
155 Arguments:
156 name: A unique name for the generated summary node.
157 audio: An `np.array` representing audio data with shape `[k, t, c]`,
158 where `k` is the number of audio clips, `t` is the number of
159 frames, and `c` is the number of channels. Elements should be
160 floating-point values in `[-1.0, 1.0]`.
161 sample_rate: An `int` that represents the sample rate, in Hz.
162 Must be positive.
163 labels: Deprecated. Do not set.
164 max_outputs: Optional `int`. At most this many audio clips will be
165 emitted. When more than `max_outputs` many clips are provided, the
166 first `max_outputs` many clips will be used and the rest silently
167 discarded.
168 encoding: A constant `str` indicating the desired encoding. You
169 can choose any format you like, as long as it's "wav". Please see
170 the "API compatibility note" below.
171 display_name: Optional name for this summary in TensorBoard, as a
172 `str`. Defaults to `name`.
173 description: Optional long-form description for this summary, as a
174 `str`. Markdown is supported. Defaults to empty.
176 Returns:
177 A `tf.Summary` protobuf object.
179 API compatibility note: The default value of the `encoding`
180 argument is _not_ guaranteed to remain unchanged across TensorBoard
181 versions. In the future, we will by default encode as FLAC instead of
182 as WAV. If the specific format is important to you, please provide a
183 file format explicitly.
184 """
185 if labels is not None:
186 warnings.warn(_LABELS_WARNING)
188 # TODO(nickfelt): remove on-demand imports once dep situation is fixed.
189 import tensorflow.compat.v1 as tf
191 audio = np.array(audio)
192 if audio.ndim != 3:
193 raise ValueError("Shape %r must have rank 3" % (audio.shape,))
194 if encoding is None:
195 encoding = "wav"
197 if encoding == "wav":
198 encoding = metadata.Encoding.Value("WAV")
199 encoder = functools.partial(
200 encoder_util.encode_wav, samples_per_second=sample_rate
201 )
202 else:
203 raise ValueError("Unknown encoding: %r" % encoding)
205 limited_audio = audio[:max_outputs]
206 if labels is None:
207 limited_labels = [b""] * len(limited_audio)
208 else:
209 limited_labels = [
210 tf.compat.as_bytes(label) for label in labels[:max_outputs]
211 ]
213 encoded_audio = [encoder(a) for a in limited_audio]
214 content = np.array([encoded_audio, limited_labels]).transpose()
215 tensor = tf.make_tensor_proto(content, dtype=tf.string)
217 if display_name is None:
218 display_name = name
219 summary_metadata = metadata.create_summary_metadata(
220 display_name=display_name, description=description, encoding=encoding
221 )
222 tf_summary_metadata = tf.SummaryMetadata.FromString(
223 summary_metadata.SerializeToString()
224 )
226 summary = tf.Summary()
227 summary.value.add(
228 tag="%s/audio_summary" % name,
229 metadata=tf_summary_metadata,
230 tensor=tensor,
231 )
232 return summary