Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/lite/experimental/microfrontend/python/ops/audio_microfrontend_op.py: 67%

18 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2018 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""AudioMicrofrontend Op creates filterbanks from audio data.""" 

16 

17from tensorflow.lite.experimental.microfrontend.ops import gen_audio_microfrontend_op 

18from tensorflow.python.framework import dtypes 

19from tensorflow.python.framework import load_library 

20from tensorflow.python.framework import ops 

21from tensorflow.python.ops import array_ops 

22from tensorflow.python.platform import resource_loader 

23from tensorflow.python.util.tf_export import tf_export 

24 

25_audio_microfrontend_op = load_library.load_op_library( 

26 resource_loader.get_path_to_datafile("_audio_microfrontend_op.so")) 

27 

28 

29@tf_export("lite.experimental.microfrontend.python.ops.audio_microfrontend") 

30def audio_microfrontend(audio, 

31 sample_rate=16000, 

32 window_size=25, 

33 window_step=10, 

34 num_channels=32, 

35 upper_band_limit=7500.0, 

36 lower_band_limit=125.0, 

37 smoothing_bits=10, 

38 even_smoothing=0.025, 

39 odd_smoothing=0.06, 

40 min_signal_remaining=0.05, 

41 enable_pcan=True, 

42 pcan_strength=0.95, 

43 pcan_offset=80.0, 

44 gain_bits=21, 

45 enable_log=True, 

46 scale_shift=6, 

47 left_context=0, 

48 right_context=0, 

49 frame_stride=1, 

50 zero_padding=False, 

51 out_scale=1, 

52 out_type=dtypes.uint16): 

53 """Audio Microfrontend Op. 

54 

55 This Op converts a sequence of audio data into one or more 

56 feature vectors containing filterbanks of the input. The 

57 conversion process uses a lightweight library to perform: 

58 

59 1. A slicing window function 

60 2. Short-time FFTs 

61 3. Filterbank calculations 

62 4. Noise reduction 

63 5. PCAN Auto Gain Control 

64 6. Logarithmic scaling 

65 

66 Args: 

67 audio: 1D Tensor, int16 audio data in temporal ordering. 

68 sample_rate: Integer, the sample rate of the audio in Hz. 

69 window_size: Integer, length of desired time frames in ms. 

70 window_step: Integer, length of step size for the next frame in ms. 

71 num_channels: Integer, the number of filterbank channels to use. 

72 upper_band_limit: Float, the highest frequency included in the filterbanks. 

73 lower_band_limit: Float, the lowest frequency included in the filterbanks. 

74 smoothing_bits: Int, scale up signal by 2^(smoothing_bits) before reduction. 

75 even_smoothing: Float, smoothing coefficient for even-numbered channels. 

76 odd_smoothing: Float, smoothing coefficient for odd-numbered channels. 

77 min_signal_remaining: Float, fraction of signal to preserve in smoothing. 

78 enable_pcan: Bool, enable PCAN auto gain control. 

79 pcan_strength: Float, gain normalization exponent. 

80 pcan_offset: Float, positive value added in the normalization denominator. 

81 gain_bits: Int, number of fractional bits in the gain. 

82 enable_log: Bool, enable logarithmic scaling of filterbanks. 

83 scale_shift: Integer, scale filterbanks by 2^(scale_shift). 

84 left_context: Integer, number of preceding frames to attach to each frame. 

85 right_context: Integer, number of preceding frames to attach to each frame. 

86 frame_stride: Integer, M frames to skip over, where output[n] = frame[n*M]. 

87 zero_padding: Bool, if left/right context is out-of-bounds, attach frame of 

88 zeroes. Otherwise, frame[0] or frame[size-1] will be copied. 

89 out_scale: Integer, divide all filterbanks by this number. 

90 out_type: DType, type of the output Tensor, defaults to UINT16. 

91 

92 Returns: 

93 filterbanks: 2D Tensor, each row is a time frame, each column is a channel. 

94 

95 Raises: 

96 ValueError: If the audio tensor is not explicitly a vector. 

97 """ 

98 audio_shape = audio.shape 

99 if audio_shape.ndims is None: 

100 raise ValueError("Input to `AudioMicrofrontend` should have known rank.") 

101 if len(audio_shape) > 1: 

102 audio = array_ops.reshape(audio, [-1]) 

103 

104 return gen_audio_microfrontend_op.audio_microfrontend( 

105 audio, sample_rate, window_size, window_step, num_channels, 

106 upper_band_limit, lower_band_limit, smoothing_bits, even_smoothing, 

107 odd_smoothing, min_signal_remaining, enable_pcan, pcan_strength, 

108 pcan_offset, gain_bits, enable_log, scale_shift, left_context, 

109 right_context, frame_stride, zero_padding, out_scale, out_type) 

110 

111 

112ops.NotDifferentiable("AudioMicrofrontend")