Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorboard/plugins/audio/summary.py: 19%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Audio summaries and TensorFlow operations to create them.

17An audio summary stores a rank-2 string tensor of shape `[k, 2]`, where

18`k` is the number of audio clips recorded in the summary. Each row of

19the tensor is a pair `[encoded_audio, label]`, where `encoded_audio` is

20a binary string whose encoding is specified in the summary metadata, and

21`label` is a UTF-8 encoded Markdown string describing the audio clip.

23NOTE: This module is in beta, and its API is subject to change, but the

24data that it stores to disk will be supported forever.

25"""

28import functools

29import warnings

31import numpy as np

33from tensorboard.util import encoder as encoder_util

34from tensorboard.plugins.audio import metadata

35from tensorboard.plugins.audio import summary_v2

38# Export V2 versions.

39audio = summary_v2.audio

42_LABELS_WARNING = (

43 "Labels on audio summaries are deprecated and will be removed. "

44 "See <https://github.com/tensorflow/tensorboard/issues/3513>."

45)

48def op(

49 name,

50 audio,

51 sample_rate,

52 labels=None,

53 max_outputs=3,

54 encoding=None,

55 display_name=None,

56 description=None,

57 collections=None,

58):

59 """Create a legacy audio summary op for use in a TensorFlow graph.

61 Arguments:

62 name: A unique name for the generated summary node.

63 audio: A `Tensor` representing audio data with shape `[k, t, c]`,

64 where `k` is the number of audio clips, `t` is the number of

65 frames, and `c` is the number of channels. Elements should be

66 floating-point values in `[-1.0, 1.0]`. Any of the dimensions may

67 be statically unknown (i.e., `None`).

68 sample_rate: An `int` or rank-0 `int32` `Tensor` that represents the

69 sample rate, in Hz. Must be positive.

70 labels: Deprecated. Do not set.

71 max_outputs: Optional `int` or rank-0 integer `Tensor`. At most this

72 many audio clips will be emitted at each step. When more than

73 `max_outputs` many clips are provided, the first `max_outputs`

74 many clips will be used and the rest silently discarded.

75 encoding: A constant `str` (not string tensor) indicating the

76 desired encoding. You can choose any format you like, as long as

77 it's "wav". Please see the "API compatibility note" below.

78 display_name: Optional name for this summary in TensorBoard, as a

79 constant `str`. Defaults to `name`.

80 description: Optional long-form description for this summary, as a

81 constant `str`. Markdown is supported. Defaults to empty.

82 collections: Optional list of graph collections keys. The new

83 summary op is added to these collections. Defaults to

84 `[Graph Keys.SUMMARIES]`.

86 Returns:

87 A TensorFlow summary op.

89 API compatibility note: The default value of the `encoding`

90 argument is _not_ guaranteed to remain unchanged across TensorBoard

91 versions. In the future, we will by default encode as FLAC instead of

92 as WAV. If the specific format is important to you, please provide a

93 file format explicitly.

94 """

95 if labels is not None:

96 warnings.warn(_LABELS_WARNING)

98 # TODO(nickfelt): remove on-demand imports once dep situation is fixed.

99 import tensorflow.compat.v1 as tf

100

101 if display_name is None:

102 display_name = name

103 if encoding is None:

104 encoding = "wav"

105

106 if encoding == "wav":

107 encoding = metadata.Encoding.Value("WAV")

108 encoder = functools.partial(

109 tf.audio.encode_wav, sample_rate=sample_rate

110 )

111 else:

112 raise ValueError("Unknown encoding: %r" % encoding)

113

114 with tf.name_scope(name), tf.control_dependencies(

115 [tf.assert_rank(audio, 3)]

116 ):

117 limited_audio = audio[:max_outputs]

118 encoded_audio = tf.map_fn(

119 encoder, limited_audio, dtype=tf.string, name="encode_each_audio"

120 )

121 if labels is None:

122 limited_labels = tf.tile([""], tf.shape(input=limited_audio)[:1])

123 else:

124 limited_labels = labels[:max_outputs]

125 tensor = tf.transpose(a=tf.stack([encoded_audio, limited_labels]))

126 summary_metadata = metadata.create_summary_metadata(

127 display_name=display_name,

128 description=description,

129 encoding=encoding,

130 )

131 return tf.summary.tensor_summary(

132 name="audio_summary",

133 tensor=tensor,

134 collections=collections,

135 summary_metadata=summary_metadata,

136 )

137

138

139def pb(

140 name,

141 audio,

142 sample_rate,

143 labels=None,

144 max_outputs=3,

145 encoding=None,

146 display_name=None,

147 description=None,

148):

149 """Create a legacy audio summary protobuf.

150

151 This behaves as if you were to create an `op` with the same arguments

152 (wrapped with constant tensors where appropriate) and then execute

153 that summary op in a TensorFlow session.

154

155 Arguments:

156 name: A unique name for the generated summary node.

157 audio: An `np.array` representing audio data with shape `[k, t, c]`,

158 where `k` is the number of audio clips, `t` is the number of

159 frames, and `c` is the number of channels. Elements should be

160 floating-point values in `[-1.0, 1.0]`.

161 sample_rate: An `int` that represents the sample rate, in Hz.

162 Must be positive.

163 labels: Deprecated. Do not set.

164 max_outputs: Optional `int`. At most this many audio clips will be

165 emitted. When more than `max_outputs` many clips are provided, the

166 first `max_outputs` many clips will be used and the rest silently

167 discarded.

168 encoding: A constant `str` indicating the desired encoding. You

169 can choose any format you like, as long as it's "wav". Please see

170 the "API compatibility note" below.

171 display_name: Optional name for this summary in TensorBoard, as a

172 `str`. Defaults to `name`.

173 description: Optional long-form description for this summary, as a

174 `str`. Markdown is supported. Defaults to empty.

175

176 Returns:

177 A `tf.Summary` protobuf object.

178

179 API compatibility note: The default value of the `encoding`

180 argument is _not_ guaranteed to remain unchanged across TensorBoard

181 versions. In the future, we will by default encode as FLAC instead of

182 as WAV. If the specific format is important to you, please provide a

183 file format explicitly.

184 """

185 if labels is not None:

186 warnings.warn(_LABELS_WARNING)

187

188 # TODO(nickfelt): remove on-demand imports once dep situation is fixed.

189 import tensorflow.compat.v1 as tf

190

191 audio = np.array(audio)

192 if audio.ndim != 3:

193 raise ValueError("Shape %r must have rank 3" % (audio.shape,))

194 if encoding is None:

195 encoding = "wav"

196

197 if encoding == "wav":

198 encoding = metadata.Encoding.Value("WAV")

199 encoder = functools.partial(

200 encoder_util.encode_wav, samples_per_second=sample_rate

201 )

202 else:

203 raise ValueError("Unknown encoding: %r" % encoding)

204

205 limited_audio = audio[:max_outputs]

206 if labels is None:

207 limited_labels = [b""] * len(limited_audio)

208 else:

209 limited_labels = [

210 tf.compat.as_bytes(label) for label in labels[:max_outputs]

211 ]

212

213 encoded_audio = [encoder(a) for a in limited_audio]

214 content = np.array([encoded_audio, limited_labels]).transpose()

215 tensor = tf.make_tensor_proto(content, dtype=tf.string)

216

217 if display_name is None:

218 display_name = name

219 summary_metadata = metadata.create_summary_metadata(

220 display_name=display_name, description=description, encoding=encoding

221 )

222 tf_summary_metadata = tf.SummaryMetadata.FromString(

223 summary_metadata.SerializeToString()

224 )

225

226 summary = tf.Summary()

227 summary.value.add(

228 tag="%s/audio_summary" % name,

229 metadata=tf_summary_metadata,

230 tensor=tensor,

231 )

232 return summary