Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/gen_audio_ops.py: 15%
238 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1"""Python wrappers around TensorFlow ops.
3This file is MACHINE GENERATED! Do not edit.
4"""
6import collections
8from tensorflow.python import pywrap_tfe as pywrap_tfe
9from tensorflow.python.eager import context as _context
10from tensorflow.python.eager import core as _core
11from tensorflow.python.eager import execute as _execute
12from tensorflow.python.framework import dtypes as _dtypes
13from tensorflow.security.fuzzing.py import annotation_types as _atypes
15from tensorflow.python.framework import op_def_registry as _op_def_registry
16from tensorflow.python.framework import ops as _ops
17from tensorflow.python.framework import op_def_library as _op_def_library
18from tensorflow.python.util.deprecation import deprecated_endpoints
19from tensorflow.python.util import dispatch as _dispatch
20from tensorflow.python.util.tf_export import tf_export
22from typing import TypeVar
24def audio_spectrogram(input, window_size, stride, magnitude_squared=False, name=None):
25 r"""Produces a visualization of audio data over time.
27 Spectrograms are a standard way of representing audio information as a series of
28 slices of frequency information, one slice for each window of time. By joining
29 these together into a sequence, they form a distinctive fingerprint of the sound
30 over time.
32 This op expects to receive audio data as an input, stored as floats in the range
33 -1 to 1, together with a window width in samples, and a stride specifying how
34 far to move the window between slices. From this it generates a three
35 dimensional output. The first dimension is for the channels in the input, so a
36 stereo audio input would have two here for example. The second dimension is time,
37 with successive frequency slices. The third dimension has an amplitude value for
38 each frequency during that time slice.
40 This means the layout when converted and saved as an image is rotated 90 degrees
41 clockwise from a typical spectrogram. Time is descending down the Y axis, and
42 the frequency decreases from left to right.
44 Each value in the result represents the square root of the sum of the real and
45 imaginary parts of an FFT on the current window of samples. In this way, the
46 lowest dimension represents the power of each frequency in the current window,
47 and adjacent windows are concatenated in the next dimension.
49 To get a more intuitive and visual look at what this operation does, you can run
50 tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
51 resulting spectrogram as a PNG image.
53 Args:
54 input: A `Tensor` of type `float32`. Float representation of audio data.
55 window_size: An `int`.
56 How wide the input window is in samples. For the highest efficiency
57 this should be a power of two, but other values are accepted.
58 stride: An `int`.
59 How widely apart the center of adjacent sample windows should be.
60 magnitude_squared: An optional `bool`. Defaults to `False`.
61 Whether to return the squared magnitude or just the
62 magnitude. Using squared magnitude can avoid extra calculations.
63 name: A name for the operation (optional).
65 Returns:
66 A `Tensor` of type `float32`.
67 """
68 _ctx = _context._context or _context.context()
69 tld = _ctx._thread_local_data
70 if tld.is_eager:
71 try:
72 _result = pywrap_tfe.TFE_Py_FastPathExecute(
73 _ctx, "AudioSpectrogram", name, input, "window_size", window_size,
74 "stride", stride, "magnitude_squared", magnitude_squared)
75 return _result
76 except _core._NotOkStatusException as e:
77 _ops.raise_from_not_ok_status(e, name)
78 except _core._FallbackException:
79 pass
80 try:
81 return audio_spectrogram_eager_fallback(
82 input, window_size=window_size, stride=stride,
83 magnitude_squared=magnitude_squared, name=name, ctx=_ctx)
84 except _core._SymbolicException:
85 pass # Add nodes to the TensorFlow graph.
86 # Add nodes to the TensorFlow graph.
87 window_size = _execute.make_int(window_size, "window_size")
88 stride = _execute.make_int(stride, "stride")
89 if magnitude_squared is None:
90 magnitude_squared = False
91 magnitude_squared = _execute.make_bool(magnitude_squared, "magnitude_squared")
92 _, _, _op, _outputs = _op_def_library._apply_op_helper(
93 "AudioSpectrogram", input=input, window_size=window_size,
94 stride=stride,
95 magnitude_squared=magnitude_squared, name=name)
96 _result = _outputs[:]
97 if _execute.must_record_gradient():
98 _attrs = ("window_size", _op._get_attr_int("window_size"), "stride",
99 _op._get_attr_int("stride"), "magnitude_squared",
100 _op._get_attr_bool("magnitude_squared"))
101 _inputs_flat = _op.inputs
102 _execute.record_gradient(
103 "AudioSpectrogram", _inputs_flat, _attrs, _result)
104 _result, = _result
105 return _result
107AudioSpectrogram = tf_export("raw_ops.AudioSpectrogram")(_ops.to_raw_op(audio_spectrogram))
110def audio_spectrogram_eager_fallback(input, window_size, stride, magnitude_squared, name, ctx):
111 window_size = _execute.make_int(window_size, "window_size")
112 stride = _execute.make_int(stride, "stride")
113 if magnitude_squared is None:
114 magnitude_squared = False
115 magnitude_squared = _execute.make_bool(magnitude_squared, "magnitude_squared")
116 input = _ops.convert_to_tensor(input, _dtypes.float32)
117 _inputs_flat = [input]
118 _attrs = ("window_size", window_size, "stride", stride, "magnitude_squared",
119 magnitude_squared)
120 _result = _execute.execute(b"AudioSpectrogram", 1, inputs=_inputs_flat,
121 attrs=_attrs, ctx=ctx, name=name)
122 if _execute.must_record_gradient():
123 _execute.record_gradient(
124 "AudioSpectrogram", _inputs_flat, _attrs, _result)
125 _result, = _result
126 return _result
128_DecodeWavOutput = collections.namedtuple(
129 "DecodeWav",
130 ["audio", "sample_rate"])
133@_dispatch.add_fallback_dispatch_list
134@_dispatch.add_type_based_api_dispatcher
135@tf_export('audio.decode_wav')
136def decode_wav(contents, desired_channels=-1, desired_samples=-1, name=None):
137 r"""Decode a 16-bit PCM WAV file to a float tensor.
139 The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float.
141 When desired_channels is set, if the input contains fewer channels than this
142 then the last channel will be duplicated to give the requested number, else if
143 the input has more channels than requested then the additional channels will be
144 ignored.
146 If desired_samples is set, then the audio will be cropped or padded with zeroes
147 to the requested length.
149 The first output contains a Tensor with the content of the audio samples. The
150 lowest dimension will be the number of channels, and the second will be the
151 number of samples. For example, a ten-sample-long stereo WAV file should give an
152 output shape of [10, 2].
154 Args:
155 contents: A `Tensor` of type `string`.
156 The WAV-encoded audio, usually from a file.
157 desired_channels: An optional `int`. Defaults to `-1`.
158 Number of sample channels wanted.
159 desired_samples: An optional `int`. Defaults to `-1`.
160 Length of audio requested.
161 name: A name for the operation (optional).
163 Returns:
164 A tuple of `Tensor` objects (audio, sample_rate).
166 audio: A `Tensor` of type `float32`.
167 sample_rate: A `Tensor` of type `int32`.
168 """
169 _ctx = _context._context or _context.context()
170 tld = _ctx._thread_local_data
171 if tld.is_eager:
172 try:
173 _result = pywrap_tfe.TFE_Py_FastPathExecute(
174 _ctx, "DecodeWav", name, contents, "desired_channels",
175 desired_channels, "desired_samples", desired_samples)
176 _result = _DecodeWavOutput._make(_result)
177 return _result
178 except _core._NotOkStatusException as e:
179 _ops.raise_from_not_ok_status(e, name)
180 except _core._FallbackException:
181 pass
182 try:
183 _result = _dispatcher_for_decode_wav(
184 (contents, desired_channels, desired_samples, name,), None)
185 if _result is not NotImplemented:
186 return _result
187 return decode_wav_eager_fallback(
188 contents, desired_channels=desired_channels,
189 desired_samples=desired_samples, name=name, ctx=_ctx)
190 except _core._SymbolicException:
191 pass # Add nodes to the TensorFlow graph.
192 except (TypeError, ValueError):
193 _result = _dispatch.dispatch(
194 decode_wav, (), dict(contents=contents,
195 desired_channels=desired_channels,
196 desired_samples=desired_samples, name=name)
197 )
198 if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
199 return _result
200 raise
201 else:
202 _result = _dispatcher_for_decode_wav(
203 (contents, desired_channels, desired_samples, name,), None)
204 if _result is not NotImplemented:
205 return _result
206 # Add nodes to the TensorFlow graph.
207 if desired_channels is None:
208 desired_channels = -1
209 desired_channels = _execute.make_int(desired_channels, "desired_channels")
210 if desired_samples is None:
211 desired_samples = -1
212 desired_samples = _execute.make_int(desired_samples, "desired_samples")
213 try:
214 _, _, _op, _outputs = _op_def_library._apply_op_helper(
215 "DecodeWav", contents=contents, desired_channels=desired_channels,
216 desired_samples=desired_samples, name=name)
217 except (TypeError, ValueError):
218 _result = _dispatch.dispatch(
219 decode_wav, (), dict(contents=contents,
220 desired_channels=desired_channels,
221 desired_samples=desired_samples, name=name)
222 )
223 if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
224 return _result
225 raise
226 _result = _outputs[:]
227 if _execute.must_record_gradient():
228 _attrs = ("desired_channels", _op._get_attr_int("desired_channels"),
229 "desired_samples", _op._get_attr_int("desired_samples"))
230 _inputs_flat = _op.inputs
231 _execute.record_gradient(
232 "DecodeWav", _inputs_flat, _attrs, _result)
233 _result = _DecodeWavOutput._make(_result)
234 return _result
236DecodeWav = tf_export("raw_ops.DecodeWav")(_ops.to_raw_op(decode_wav))
237_dispatcher_for_decode_wav = decode_wav._tf_type_based_dispatcher.Dispatch
240def decode_wav_eager_fallback(contents, desired_channels, desired_samples, name, ctx):
241 if desired_channels is None:
242 desired_channels = -1
243 desired_channels = _execute.make_int(desired_channels, "desired_channels")
244 if desired_samples is None:
245 desired_samples = -1
246 desired_samples = _execute.make_int(desired_samples, "desired_samples")
247 contents = _ops.convert_to_tensor(contents, _dtypes.string)
248 _inputs_flat = [contents]
249 _attrs = ("desired_channels", desired_channels, "desired_samples",
250 desired_samples)
251 _result = _execute.execute(b"DecodeWav", 2, inputs=_inputs_flat,
252 attrs=_attrs, ctx=ctx, name=name)
253 if _execute.must_record_gradient():
254 _execute.record_gradient(
255 "DecodeWav", _inputs_flat, _attrs, _result)
256 _result = _DecodeWavOutput._make(_result)
257 return _result
260@_dispatch.add_fallback_dispatch_list
261@_dispatch.add_type_based_api_dispatcher
262@tf_export('audio.encode_wav')
263def encode_wav(audio, sample_rate, name=None):
264 r"""Encode audio data using the WAV file format.
266 This operation will generate a string suitable to be saved out to create a .wav
267 audio file. It will be encoded in the 16-bit PCM format. It takes in float
268 values in the range -1.0f to 1.0f, and any outside that value will be clamped to
269 that range.
271 `audio` is a 2-D float Tensor of shape `[length, channels]`.
272 `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
274 Args:
275 audio: A `Tensor` of type `float32`. 2-D with shape `[length, channels]`.
276 sample_rate: A `Tensor` of type `int32`.
277 Scalar containing the sample frequency.
278 name: A name for the operation (optional).
280 Returns:
281 A `Tensor` of type `string`.
282 """
283 _ctx = _context._context or _context.context()
284 tld = _ctx._thread_local_data
285 if tld.is_eager:
286 try:
287 _result = pywrap_tfe.TFE_Py_FastPathExecute(
288 _ctx, "EncodeWav", name, audio, sample_rate)
289 return _result
290 except _core._NotOkStatusException as e:
291 _ops.raise_from_not_ok_status(e, name)
292 except _core._FallbackException:
293 pass
294 try:
295 _result = _dispatcher_for_encode_wav(
296 (audio, sample_rate, name,), None)
297 if _result is not NotImplemented:
298 return _result
299 return encode_wav_eager_fallback(
300 audio, sample_rate, name=name, ctx=_ctx)
301 except _core._SymbolicException:
302 pass # Add nodes to the TensorFlow graph.
303 except (TypeError, ValueError):
304 _result = _dispatch.dispatch(
305 encode_wav, (), dict(audio=audio, sample_rate=sample_rate,
306 name=name)
307 )
308 if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
309 return _result
310 raise
311 else:
312 _result = _dispatcher_for_encode_wav(
313 (audio, sample_rate, name,), None)
314 if _result is not NotImplemented:
315 return _result
316 # Add nodes to the TensorFlow graph.
317 try:
318 _, _, _op, _outputs = _op_def_library._apply_op_helper(
319 "EncodeWav", audio=audio, sample_rate=sample_rate, name=name)
320 except (TypeError, ValueError):
321 _result = _dispatch.dispatch(
322 encode_wav, (), dict(audio=audio, sample_rate=sample_rate,
323 name=name)
324 )
325 if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
326 return _result
327 raise
328 _result = _outputs[:]
329 if _execute.must_record_gradient():
330 _attrs = ()
331 _inputs_flat = _op.inputs
332 _execute.record_gradient(
333 "EncodeWav", _inputs_flat, _attrs, _result)
334 _result, = _result
335 return _result
337EncodeWav = tf_export("raw_ops.EncodeWav")(_ops.to_raw_op(encode_wav))
338_dispatcher_for_encode_wav = encode_wav._tf_type_based_dispatcher.Dispatch
341def encode_wav_eager_fallback(audio, sample_rate, name, ctx):
342 audio = _ops.convert_to_tensor(audio, _dtypes.float32)
343 sample_rate = _ops.convert_to_tensor(sample_rate, _dtypes.int32)
344 _inputs_flat = [audio, sample_rate]
345 _attrs = None
346 _result = _execute.execute(b"EncodeWav", 1, inputs=_inputs_flat,
347 attrs=_attrs, ctx=ctx, name=name)
348 if _execute.must_record_gradient():
349 _execute.record_gradient(
350 "EncodeWav", _inputs_flat, _attrs, _result)
351 _result, = _result
352 return _result
355def mfcc(spectrogram, sample_rate, upper_frequency_limit=4000, lower_frequency_limit=20, filterbank_channel_count=40, dct_coefficient_count=13, name=None):
356 r"""Transforms a spectrogram into a form that's useful for speech recognition.
358 Mel Frequency Cepstral Coefficients are a way of representing audio data that's
359 been effective as an input feature for machine learning. They are created by
360 taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the
361 higher frequencies that are less significant to the human ear. They have a long
362 history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
363 is a good resource to learn more.
365 Args:
366 spectrogram: A `Tensor` of type `float32`.
367 Typically produced by the Spectrogram op, with magnitude_squared
368 set to true.
369 sample_rate: A `Tensor` of type `int32`.
370 How many samples per second the source audio used.
371 upper_frequency_limit: An optional `float`. Defaults to `4000`.
372 The highest frequency to use when calculating the
373 ceptstrum.
374 lower_frequency_limit: An optional `float`. Defaults to `20`.
375 The lowest frequency to use when calculating the
376 ceptstrum.
377 filterbank_channel_count: An optional `int`. Defaults to `40`.
378 Resolution of the Mel bank used internally.
379 dct_coefficient_count: An optional `int`. Defaults to `13`.
380 How many output channels to produce per time slice.
381 name: A name for the operation (optional).
383 Returns:
384 A `Tensor` of type `float32`.
385 """
386 _ctx = _context._context or _context.context()
387 tld = _ctx._thread_local_data
388 if tld.is_eager:
389 try:
390 _result = pywrap_tfe.TFE_Py_FastPathExecute(
391 _ctx, "Mfcc", name, spectrogram, sample_rate, "upper_frequency_limit",
392 upper_frequency_limit, "lower_frequency_limit", lower_frequency_limit,
393 "filterbank_channel_count", filterbank_channel_count,
394 "dct_coefficient_count", dct_coefficient_count)
395 return _result
396 except _core._NotOkStatusException as e:
397 _ops.raise_from_not_ok_status(e, name)
398 except _core._FallbackException:
399 pass
400 try:
401 return mfcc_eager_fallback(
402 spectrogram, sample_rate,
403 upper_frequency_limit=upper_frequency_limit,
404 lower_frequency_limit=lower_frequency_limit,
405 filterbank_channel_count=filterbank_channel_count,
406 dct_coefficient_count=dct_coefficient_count, name=name, ctx=_ctx)
407 except _core._SymbolicException:
408 pass # Add nodes to the TensorFlow graph.
409 # Add nodes to the TensorFlow graph.
410 if upper_frequency_limit is None:
411 upper_frequency_limit = 4000
412 upper_frequency_limit = _execute.make_float(upper_frequency_limit, "upper_frequency_limit")
413 if lower_frequency_limit is None:
414 lower_frequency_limit = 20
415 lower_frequency_limit = _execute.make_float(lower_frequency_limit, "lower_frequency_limit")
416 if filterbank_channel_count is None:
417 filterbank_channel_count = 40
418 filterbank_channel_count = _execute.make_int(filterbank_channel_count, "filterbank_channel_count")
419 if dct_coefficient_count is None:
420 dct_coefficient_count = 13
421 dct_coefficient_count = _execute.make_int(dct_coefficient_count, "dct_coefficient_count")
422 _, _, _op, _outputs = _op_def_library._apply_op_helper(
423 "Mfcc", spectrogram=spectrogram, sample_rate=sample_rate,
424 upper_frequency_limit=upper_frequency_limit,
425 lower_frequency_limit=lower_frequency_limit,
426 filterbank_channel_count=filterbank_channel_count,
427 dct_coefficient_count=dct_coefficient_count, name=name)
428 _result = _outputs[:]
429 if _execute.must_record_gradient():
430 _attrs = ("upper_frequency_limit", _op.get_attr("upper_frequency_limit"),
431 "lower_frequency_limit", _op.get_attr("lower_frequency_limit"),
432 "filterbank_channel_count",
433 _op._get_attr_int("filterbank_channel_count"),
434 "dct_coefficient_count",
435 _op._get_attr_int("dct_coefficient_count"))
436 _inputs_flat = _op.inputs
437 _execute.record_gradient(
438 "Mfcc", _inputs_flat, _attrs, _result)
439 _result, = _result
440 return _result
442Mfcc = tf_export("raw_ops.Mfcc")(_ops.to_raw_op(mfcc))
445def mfcc_eager_fallback(spectrogram, sample_rate, upper_frequency_limit, lower_frequency_limit, filterbank_channel_count, dct_coefficient_count, name, ctx):
446 if upper_frequency_limit is None:
447 upper_frequency_limit = 4000
448 upper_frequency_limit = _execute.make_float(upper_frequency_limit, "upper_frequency_limit")
449 if lower_frequency_limit is None:
450 lower_frequency_limit = 20
451 lower_frequency_limit = _execute.make_float(lower_frequency_limit, "lower_frequency_limit")
452 if filterbank_channel_count is None:
453 filterbank_channel_count = 40
454 filterbank_channel_count = _execute.make_int(filterbank_channel_count, "filterbank_channel_count")
455 if dct_coefficient_count is None:
456 dct_coefficient_count = 13
457 dct_coefficient_count = _execute.make_int(dct_coefficient_count, "dct_coefficient_count")
458 spectrogram = _ops.convert_to_tensor(spectrogram, _dtypes.float32)
459 sample_rate = _ops.convert_to_tensor(sample_rate, _dtypes.int32)
460 _inputs_flat = [spectrogram, sample_rate]
461 _attrs = ("upper_frequency_limit", upper_frequency_limit,
462 "lower_frequency_limit", lower_frequency_limit, "filterbank_channel_count",
463 filterbank_channel_count, "dct_coefficient_count", dct_coefficient_count)
464 _result = _execute.execute(b"Mfcc", 1, inputs=_inputs_flat, attrs=_attrs,
465 ctx=ctx, name=name)
466 if _execute.must_record_gradient():
467 _execute.record_gradient(
468 "Mfcc", _inputs_flat, _attrs, _result)
469 _result, = _result
470 return _result