Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/signal/mfcc

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Mel-Frequency Cepstral Coefficients (MFCCs) ops."""

17from tensorflow.python.framework import ops

18from tensorflow.python.ops import array_ops

19from tensorflow.python.ops import math_ops

20from tensorflow.python.ops.signal import dct_ops

21from tensorflow.python.util import dispatch

22from tensorflow.python.util.tf_export import tf_export

25@tf_export('signal.mfccs_from_log_mel_spectrograms')

26@dispatch.add_dispatch_support

27def mfccs_from_log_mel_spectrograms(log_mel_spectrograms, name=None):

28 """Computes [MFCCs][mfcc] of `log_mel_spectrograms`.

30 Implemented with GPU-compatible ops and supports gradients.

32 [Mel-Frequency Cepstral Coefficient (MFCC)][mfcc] calculation consists of

33 taking the DCT-II of a log-magnitude mel-scale spectrogram. [HTK][htk]'s MFCCs

34 use a particular scaling of the DCT-II which is almost orthogonal

35 normalization. We follow this convention.

37 All `num_mel_bins` MFCCs are returned and it is up to the caller to select

38 a subset of the MFCCs based on their application. For example, it is typical

39 to only use the first few for speech recognition, as this results in

40 an approximately pitch-invariant representation of the signal.

42 For example:

44 ```python

45 batch_size, num_samples, sample_rate = 32, 32000, 16000.0

46 # A Tensor of [batch_size, num_samples] mono PCM samples in the range [-1, 1].

47 pcm = tf.random.normal([batch_size, num_samples], dtype=tf.float32)

49 # A 1024-point STFT with frames of 64 ms and 75% overlap.

50 stfts = tf.signal.stft(pcm, frame_length=1024, frame_step=256,

51 fft_length=1024)

52 spectrograms = tf.abs(stfts)

54 # Warp the linear scale spectrograms into the mel-scale.

55 num_spectrogram_bins = stfts.shape[-1].value

56 lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 80

57 linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(

58 num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,

59 upper_edge_hertz)

60 mel_spectrograms = tf.tensordot(

61 spectrograms, linear_to_mel_weight_matrix, 1)

62 mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(

63 linear_to_mel_weight_matrix.shape[-1:]))

65 # Compute a stabilized log to get log-magnitude mel-scale spectrograms.

66 log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

68 # Compute MFCCs from log_mel_spectrograms and take the first 13.

69 mfccs = tf.signal.mfccs_from_log_mel_spectrograms(

70 log_mel_spectrograms)[..., :13]

71 ```

73 Args:

74 log_mel_spectrograms: A `[..., num_mel_bins]` `float32`/`float64` `Tensor`

75 of log-magnitude mel-scale spectrograms.

76 name: An optional name for the operation.

77 Returns:

78 A `[..., num_mel_bins]` `float32`/`float64` `Tensor` of the MFCCs of

79 `log_mel_spectrograms`.

81 Raises:

82 ValueError: If `num_mel_bins` is not positive.

84 [mfcc]: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum

85 [htk]: https://en.wikipedia.org/wiki/HTK_(software)

86 """

87 with ops.name_scope(name, 'mfccs_from_log_mel_spectrograms',

88 [log_mel_spectrograms]):

89 # Compute the DCT-II of the resulting log-magnitude mel-scale spectrogram.

90 # The DCT used in HTK scales every basis vector by sqrt(2/N), which is the

91 # scaling required for an "orthogonal" DCT-II *except* in the 0th bin, where

92 # the true orthogonal DCT (as implemented by scipy) scales by sqrt(1/N). For

93 # this reason, we don't apply orthogonal normalization and scale the DCT by

94 # `0.5 * sqrt(2/N)` manually.

95 log_mel_spectrograms = ops.convert_to_tensor(log_mel_spectrograms)

96 if (log_mel_spectrograms.shape.ndims and

97 log_mel_spectrograms.shape.dims[-1].value is not None):

98 num_mel_bins = log_mel_spectrograms.shape.dims[-1].value

99 if num_mel_bins == 0:

100 raise ValueError('num_mel_bins must be positive. Got: %s' %

101 log_mel_spectrograms)

102 else:

103 num_mel_bins = array_ops.shape(log_mel_spectrograms)[-1]

104

105 dct2 = dct_ops.dct(log_mel_spectrograms, type=2)

106 return dct2 * math_ops.rsqrt(

107 math_ops.cast(num_mel_bins, dct2.dtype) * 2.0)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/signal/mfcc_ops.py: 53%

19 statements