Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorboard/plugins/audio/summary.py: 19%

57 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Audio summaries and TensorFlow operations to create them. 

16 

17An audio summary stores a rank-2 string tensor of shape `[k, 2]`, where 

18`k` is the number of audio clips recorded in the summary. Each row of 

19the tensor is a pair `[encoded_audio, label]`, where `encoded_audio` is 

20a binary string whose encoding is specified in the summary metadata, and 

21`label` is a UTF-8 encoded Markdown string describing the audio clip. 

22 

23NOTE: This module is in beta, and its API is subject to change, but the 

24data that it stores to disk will be supported forever. 

25""" 

26 

27 

28import functools 

29import warnings 

30 

31import numpy as np 

32 

33from tensorboard.util import encoder as encoder_util 

34from tensorboard.plugins.audio import metadata 

35from tensorboard.plugins.audio import summary_v2 

36 

37 

38# Export V2 versions. 

39audio = summary_v2.audio 

40 

41 

42_LABELS_WARNING = ( 

43 "Labels on audio summaries are deprecated and will be removed. " 

44 "See <https://github.com/tensorflow/tensorboard/issues/3513>." 

45) 

46 

47 

48def op( 

49 name, 

50 audio, 

51 sample_rate, 

52 labels=None, 

53 max_outputs=3, 

54 encoding=None, 

55 display_name=None, 

56 description=None, 

57 collections=None, 

58): 

59 """Create a legacy audio summary op for use in a TensorFlow graph. 

60 

61 Arguments: 

62 name: A unique name for the generated summary node. 

63 audio: A `Tensor` representing audio data with shape `[k, t, c]`, 

64 where `k` is the number of audio clips, `t` is the number of 

65 frames, and `c` is the number of channels. Elements should be 

66 floating-point values in `[-1.0, 1.0]`. Any of the dimensions may 

67 be statically unknown (i.e., `None`). 

68 sample_rate: An `int` or rank-0 `int32` `Tensor` that represents the 

69 sample rate, in Hz. Must be positive. 

70 labels: Deprecated. Do not set. 

71 max_outputs: Optional `int` or rank-0 integer `Tensor`. At most this 

72 many audio clips will be emitted at each step. When more than 

73 `max_outputs` many clips are provided, the first `max_outputs` 

74 many clips will be used and the rest silently discarded. 

75 encoding: A constant `str` (not string tensor) indicating the 

76 desired encoding. You can choose any format you like, as long as 

77 it's "wav". Please see the "API compatibility note" below. 

78 display_name: Optional name for this summary in TensorBoard, as a 

79 constant `str`. Defaults to `name`. 

80 description: Optional long-form description for this summary, as a 

81 constant `str`. Markdown is supported. Defaults to empty. 

82 collections: Optional list of graph collections keys. The new 

83 summary op is added to these collections. Defaults to 

84 `[Graph Keys.SUMMARIES]`. 

85 

86 Returns: 

87 A TensorFlow summary op. 

88 

89 API compatibility note: The default value of the `encoding` 

90 argument is _not_ guaranteed to remain unchanged across TensorBoard 

91 versions. In the future, we will by default encode as FLAC instead of 

92 as WAV. If the specific format is important to you, please provide a 

93 file format explicitly. 

94 """ 

95 if labels is not None: 

96 warnings.warn(_LABELS_WARNING) 

97 

98 # TODO(nickfelt): remove on-demand imports once dep situation is fixed. 

99 import tensorflow.compat.v1 as tf 

100 

101 if display_name is None: 

102 display_name = name 

103 if encoding is None: 

104 encoding = "wav" 

105 

106 if encoding == "wav": 

107 encoding = metadata.Encoding.Value("WAV") 

108 encoder = functools.partial( 

109 tf.audio.encode_wav, sample_rate=sample_rate 

110 ) 

111 else: 

112 raise ValueError("Unknown encoding: %r" % encoding) 

113 

114 with tf.name_scope(name), tf.control_dependencies( 

115 [tf.assert_rank(audio, 3)] 

116 ): 

117 limited_audio = audio[:max_outputs] 

118 encoded_audio = tf.map_fn( 

119 encoder, limited_audio, dtype=tf.string, name="encode_each_audio" 

120 ) 

121 if labels is None: 

122 limited_labels = tf.tile([""], tf.shape(input=limited_audio)[:1]) 

123 else: 

124 limited_labels = labels[:max_outputs] 

125 tensor = tf.transpose(a=tf.stack([encoded_audio, limited_labels])) 

126 summary_metadata = metadata.create_summary_metadata( 

127 display_name=display_name, 

128 description=description, 

129 encoding=encoding, 

130 ) 

131 return tf.summary.tensor_summary( 

132 name="audio_summary", 

133 tensor=tensor, 

134 collections=collections, 

135 summary_metadata=summary_metadata, 

136 ) 

137 

138 

139def pb( 

140 name, 

141 audio, 

142 sample_rate, 

143 labels=None, 

144 max_outputs=3, 

145 encoding=None, 

146 display_name=None, 

147 description=None, 

148): 

149 """Create a legacy audio summary protobuf. 

150 

151 This behaves as if you were to create an `op` with the same arguments 

152 (wrapped with constant tensors where appropriate) and then execute 

153 that summary op in a TensorFlow session. 

154 

155 Arguments: 

156 name: A unique name for the generated summary node. 

157 audio: An `np.array` representing audio data with shape `[k, t, c]`, 

158 where `k` is the number of audio clips, `t` is the number of 

159 frames, and `c` is the number of channels. Elements should be 

160 floating-point values in `[-1.0, 1.0]`. 

161 sample_rate: An `int` that represents the sample rate, in Hz. 

162 Must be positive. 

163 labels: Deprecated. Do not set. 

164 max_outputs: Optional `int`. At most this many audio clips will be 

165 emitted. When more than `max_outputs` many clips are provided, the 

166 first `max_outputs` many clips will be used and the rest silently 

167 discarded. 

168 encoding: A constant `str` indicating the desired encoding. You 

169 can choose any format you like, as long as it's "wav". Please see 

170 the "API compatibility note" below. 

171 display_name: Optional name for this summary in TensorBoard, as a 

172 `str`. Defaults to `name`. 

173 description: Optional long-form description for this summary, as a 

174 `str`. Markdown is supported. Defaults to empty. 

175 

176 Returns: 

177 A `tf.Summary` protobuf object. 

178 

179 API compatibility note: The default value of the `encoding` 

180 argument is _not_ guaranteed to remain unchanged across TensorBoard 

181 versions. In the future, we will by default encode as FLAC instead of 

182 as WAV. If the specific format is important to you, please provide a 

183 file format explicitly. 

184 """ 

185 if labels is not None: 

186 warnings.warn(_LABELS_WARNING) 

187 

188 # TODO(nickfelt): remove on-demand imports once dep situation is fixed. 

189 import tensorflow.compat.v1 as tf 

190 

191 audio = np.array(audio) 

192 if audio.ndim != 3: 

193 raise ValueError("Shape %r must have rank 3" % (audio.shape,)) 

194 if encoding is None: 

195 encoding = "wav" 

196 

197 if encoding == "wav": 

198 encoding = metadata.Encoding.Value("WAV") 

199 encoder = functools.partial( 

200 encoder_util.encode_wav, samples_per_second=sample_rate 

201 ) 

202 else: 

203 raise ValueError("Unknown encoding: %r" % encoding) 

204 

205 limited_audio = audio[:max_outputs] 

206 if labels is None: 

207 limited_labels = [b""] * len(limited_audio) 

208 else: 

209 limited_labels = [ 

210 tf.compat.as_bytes(label) for label in labels[:max_outputs] 

211 ] 

212 

213 encoded_audio = [encoder(a) for a in limited_audio] 

214 content = np.array([encoded_audio, limited_labels]).transpose() 

215 tensor = tf.make_tensor_proto(content, dtype=tf.string) 

216 

217 if display_name is None: 

218 display_name = name 

219 summary_metadata = metadata.create_summary_metadata( 

220 display_name=display_name, description=description, encoding=encoding 

221 ) 

222 tf_summary_metadata = tf.SummaryMetadata.FromString( 

223 summary_metadata.SerializeToString() 

224 ) 

225 

226 summary = tf.Summary() 

227 summary.value.add( 

228 tag="%s/audio_summary" % name, 

229 metadata=tf_summary_metadata, 

230 tensor=tensor, 

231 ) 

232 return summary