1"""Python wrappers around TensorFlow ops.
2
3This file is MACHINE GENERATED! Do not edit.
4"""
5
6import collections
7
8from tensorflow.python import pywrap_tfe as pywrap_tfe
9from tensorflow.python.eager import context as _context
10from tensorflow.python.eager import core as _core
11from tensorflow.python.eager import execute as _execute
12from tensorflow.python.framework import dtypes as _dtypes
13from tensorflow.security.fuzzing.py import annotation_types as _atypes
14
15from tensorflow.python.framework import op_def_registry as _op_def_registry
16from tensorflow.python.framework import ops as _ops
17from tensorflow.python.framework import op_def_library as _op_def_library
18from tensorflow.python.util.deprecation import deprecated_endpoints
19from tensorflow.python.util import dispatch as _dispatch
20from tensorflow.python.util.tf_export import tf_export
21
22from typing import TypeVar
23
24@_dispatch.add_fallback_dispatch_list
25@_dispatch.add_type_based_api_dispatcher
26@tf_export('audio_microfrontend')
27def audio_microfrontend(audio, sample_rate=16000, window_size=25, window_step=10, num_channels=32, upper_band_limit=7500, lower_band_limit=125, smoothing_bits=10, even_smoothing=0.025, odd_smoothing=0.06, min_signal_remaining=0.05, enable_pcan=False, pcan_strength=0.95, pcan_offset=80, gain_bits=21, enable_log=True, scale_shift=6, left_context=0, right_context=0, frame_stride=1, zero_padding=False, out_scale=1, out_type=_dtypes.uint16, name=None):
28 r"""Audio Microfrontend Op.
29
30 This Op converts a sequence of audio data into one or more
31 feature vectors containing filterbanks of the input. The
32 conversion process uses a lightweight library to perform:
33
34 1. A slicing window function
35 2. Short-time FFTs
36 3. Filterbank calculations
37 4. Noise reduction
38 5. PCAN Auto Gain Control
39 6. Logarithmic scaling
40
41 Arguments
42 audio: 1D Tensor, int16 audio data in temporal ordering.
43 sample_rate: Integer, the sample rate of the audio in Hz.
44 window_size: Integer, length of desired time frames in ms.
45 window_step: Integer, length of step size for the next frame in ms.
46 num_channels: Integer, the number of filterbank channels to use.
47 upper_band_limit: Float, the highest frequency included in the filterbanks.
48 lower_band_limit: Float, the lowest frequency included in the filterbanks.
49 smoothing_bits: Int, scale up signal by 2^(smoothing_bits) before reduction.
50 even_smoothing: Float, smoothing coefficient for even-numbered channels.
51 odd_smoothing: Float, smoothing coefficient for odd-numbered channels.
52 min_signal_remaining: Float, fraction of signal to preserve in smoothing.
53 enable_pcan: Bool, enable PCAN auto gain control.
54 pcan_strength: Float, gain normalization exponent.
55 pcan_offset: Float, positive value added in the normalization denominator.
56 gain_bits: Int, number of fractional bits in the gain.
57 enable_log: Bool, enable logarithmic scaling of filterbanks.
58 scale_shift: Integer, scale filterbanks by 2^(scale_shift).
59 left_context: Integer, number of preceding frames to attach to each frame.
60 right_context: Integer, number of preceding frames to attach to each frame.
61 frame_stride: Integer, M frames to skip over, where output[n] = frame[n*M].
62 zero_padding: Bool, if left/right context is out-of-bounds, attach frame of
63 zeroes. Otherwise, frame[0] or frame[size-1] will be copied.
64 out_scale: Integer, divide all filterbanks by this number.
65 out_type: DType, type of the output Tensor, defaults to UINT16.
66
67 Returns
68 filterbanks: 2D Tensor, each row is a time frame, each column is a channel.
69
70 Args:
71 audio: A `Tensor` of type `int16`.
72 sample_rate: An optional `int`. Defaults to `16000`.
73 window_size: An optional `int`. Defaults to `25`.
74 window_step: An optional `int`. Defaults to `10`.
75 num_channels: An optional `int`. Defaults to `32`.
76 upper_band_limit: An optional `float`. Defaults to `7500`.
77 lower_band_limit: An optional `float`. Defaults to `125`.
78 smoothing_bits: An optional `int`. Defaults to `10`.
79 even_smoothing: An optional `float`. Defaults to `0.025`.
80 odd_smoothing: An optional `float`. Defaults to `0.06`.
81 min_signal_remaining: An optional `float`. Defaults to `0.05`.
82 enable_pcan: An optional `bool`. Defaults to `False`.
83 pcan_strength: An optional `float`. Defaults to `0.95`.
84 pcan_offset: An optional `float`. Defaults to `80`.
85 gain_bits: An optional `int`. Defaults to `21`.
86 enable_log: An optional `bool`. Defaults to `True`.
87 scale_shift: An optional `int`. Defaults to `6`.
88 left_context: An optional `int`. Defaults to `0`.
89 right_context: An optional `int`. Defaults to `0`.
90 frame_stride: An optional `int`. Defaults to `1`.
91 zero_padding: An optional `bool`. Defaults to `False`.
92 out_scale: An optional `int`. Defaults to `1`.
93 out_type: An optional `tf.DType` from: `tf.uint16, tf.float32`. Defaults to `tf.uint16`.
94 name: A name for the operation (optional).
95
96 Returns:
97 A `Tensor` of type `out_type`.
98 """
99 _ctx = _context._context or _context.context()
100 tld = _ctx._thread_local_data
101 if tld.is_eager:
102 try:
103 _result = pywrap_tfe.TFE_Py_FastPathExecute(
104 _ctx, "AudioMicrofrontend", name, audio, "sample_rate", sample_rate,
105 "window_size", window_size, "window_step", window_step,
106 "num_channels", num_channels, "upper_band_limit", upper_band_limit,
107 "lower_band_limit", lower_band_limit, "smoothing_bits",
108 smoothing_bits, "even_smoothing", even_smoothing, "odd_smoothing",
109 odd_smoothing, "min_signal_remaining", min_signal_remaining,
110 "enable_pcan", enable_pcan, "pcan_strength", pcan_strength,
111 "pcan_offset", pcan_offset, "gain_bits", gain_bits, "enable_log",
112 enable_log, "scale_shift", scale_shift, "left_context", left_context,
113 "right_context", right_context, "frame_stride", frame_stride,
114 "zero_padding", zero_padding, "out_scale", out_scale, "out_type",
115 out_type)
116 return _result
117 except _core._NotOkStatusException as e:
118 _ops.raise_from_not_ok_status(e, name)
119 except _core._FallbackException:
120 pass
121 try:
122 _result = _dispatcher_for_audio_microfrontend(
123 (audio, sample_rate, window_size, window_step, num_channels,
124 upper_band_limit, lower_band_limit, smoothing_bits, even_smoothing,
125 odd_smoothing, min_signal_remaining, enable_pcan, pcan_strength,
126 pcan_offset, gain_bits, enable_log, scale_shift, left_context,
127 right_context, frame_stride, zero_padding, out_scale, out_type,
128 name,), None)
129 if _result is not NotImplemented:
130 return _result
131 return audio_microfrontend_eager_fallback(
132 audio, sample_rate=sample_rate, window_size=window_size,
133 window_step=window_step, num_channels=num_channels,
134 upper_band_limit=upper_band_limit,
135 lower_band_limit=lower_band_limit, smoothing_bits=smoothing_bits,
136 even_smoothing=even_smoothing, odd_smoothing=odd_smoothing,
137 min_signal_remaining=min_signal_remaining, enable_pcan=enable_pcan,
138 pcan_strength=pcan_strength, pcan_offset=pcan_offset,
139 gain_bits=gain_bits, enable_log=enable_log, scale_shift=scale_shift,
140 left_context=left_context, right_context=right_context,
141 frame_stride=frame_stride, zero_padding=zero_padding,
142 out_scale=out_scale, out_type=out_type, name=name, ctx=_ctx)
143 except _core._SymbolicException:
144 pass # Add nodes to the TensorFlow graph.
145 except (TypeError, ValueError):
146 _result = _dispatch.dispatch(
147 audio_microfrontend, (), dict(audio=audio,
148 sample_rate=sample_rate,
149 window_size=window_size,
150 window_step=window_step,
151 num_channels=num_channels,
152 upper_band_limit=upper_band_limit,
153 lower_band_limit=lower_band_limit,
154 smoothing_bits=smoothing_bits,
155 even_smoothing=even_smoothing,
156 odd_smoothing=odd_smoothing,
157 min_signal_remaining=min_signal_remaining,
158 enable_pcan=enable_pcan,
159 pcan_strength=pcan_strength,
160 pcan_offset=pcan_offset,
161 gain_bits=gain_bits,
162 enable_log=enable_log,
163 scale_shift=scale_shift,
164 left_context=left_context,
165 right_context=right_context,
166 frame_stride=frame_stride,
167 zero_padding=zero_padding,
168 out_scale=out_scale,
169 out_type=out_type, name=name)
170 )
171 if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
172 return _result
173 raise
174 else:
175 _result = _dispatcher_for_audio_microfrontend(
176 (audio, sample_rate, window_size, window_step, num_channels,
177 upper_band_limit, lower_band_limit, smoothing_bits, even_smoothing,
178 odd_smoothing, min_signal_remaining, enable_pcan, pcan_strength,
179 pcan_offset, gain_bits, enable_log, scale_shift, left_context,
180 right_context, frame_stride, zero_padding, out_scale, out_type,
181 name,), None)
182 if _result is not NotImplemented:
183 return _result
184 # Add nodes to the TensorFlow graph.
185 if sample_rate is None:
186 sample_rate = 16000
187 sample_rate = _execute.make_int(sample_rate, "sample_rate")
188 if window_size is None:
189 window_size = 25
190 window_size = _execute.make_int(window_size, "window_size")
191 if window_step is None:
192 window_step = 10
193 window_step = _execute.make_int(window_step, "window_step")
194 if num_channels is None:
195 num_channels = 32
196 num_channels = _execute.make_int(num_channels, "num_channels")
197 if upper_band_limit is None:
198 upper_band_limit = 7500
199 upper_band_limit = _execute.make_float(upper_band_limit, "upper_band_limit")
200 if lower_band_limit is None:
201 lower_band_limit = 125
202 lower_band_limit = _execute.make_float(lower_band_limit, "lower_band_limit")
203 if smoothing_bits is None:
204 smoothing_bits = 10
205 smoothing_bits = _execute.make_int(smoothing_bits, "smoothing_bits")
206 if even_smoothing is None:
207 even_smoothing = 0.025
208 even_smoothing = _execute.make_float(even_smoothing, "even_smoothing")
209 if odd_smoothing is None:
210 odd_smoothing = 0.06
211 odd_smoothing = _execute.make_float(odd_smoothing, "odd_smoothing")
212 if min_signal_remaining is None:
213 min_signal_remaining = 0.05
214 min_signal_remaining = _execute.make_float(min_signal_remaining, "min_signal_remaining")
215 if enable_pcan is None:
216 enable_pcan = False
217 enable_pcan = _execute.make_bool(enable_pcan, "enable_pcan")
218 if pcan_strength is None:
219 pcan_strength = 0.95
220 pcan_strength = _execute.make_float(pcan_strength, "pcan_strength")
221 if pcan_offset is None:
222 pcan_offset = 80
223 pcan_offset = _execute.make_float(pcan_offset, "pcan_offset")
224 if gain_bits is None:
225 gain_bits = 21
226 gain_bits = _execute.make_int(gain_bits, "gain_bits")
227 if enable_log is None:
228 enable_log = True
229 enable_log = _execute.make_bool(enable_log, "enable_log")
230 if scale_shift is None:
231 scale_shift = 6
232 scale_shift = _execute.make_int(scale_shift, "scale_shift")
233 if left_context is None:
234 left_context = 0
235 left_context = _execute.make_int(left_context, "left_context")
236 if right_context is None:
237 right_context = 0
238 right_context = _execute.make_int(right_context, "right_context")
239 if frame_stride is None:
240 frame_stride = 1
241 frame_stride = _execute.make_int(frame_stride, "frame_stride")
242 if zero_padding is None:
243 zero_padding = False
244 zero_padding = _execute.make_bool(zero_padding, "zero_padding")
245 if out_scale is None:
246 out_scale = 1
247 out_scale = _execute.make_int(out_scale, "out_scale")
248 if out_type is None:
249 out_type = _dtypes.uint16
250 out_type = _execute.make_type(out_type, "out_type")
251 try:
252 _, _, _op, _outputs = _op_def_library._apply_op_helper(
253 "AudioMicrofrontend", audio=audio, sample_rate=sample_rate,
254 window_size=window_size,
255 window_step=window_step,
256 num_channels=num_channels,
257 upper_band_limit=upper_band_limit,
258 lower_band_limit=lower_band_limit,
259 smoothing_bits=smoothing_bits,
260 even_smoothing=even_smoothing,
261 odd_smoothing=odd_smoothing,
262 min_signal_remaining=min_signal_remaining,
263 enable_pcan=enable_pcan,
264 pcan_strength=pcan_strength,
265 pcan_offset=pcan_offset, gain_bits=gain_bits,
266 enable_log=enable_log, scale_shift=scale_shift,
267 left_context=left_context,
268 right_context=right_context,
269 frame_stride=frame_stride,
270 zero_padding=zero_padding, out_scale=out_scale,
271 out_type=out_type, name=name)
272 except (TypeError, ValueError):
273 _result = _dispatch.dispatch(
274 audio_microfrontend, (), dict(audio=audio, sample_rate=sample_rate,
275 window_size=window_size,
276 window_step=window_step,
277 num_channels=num_channels,
278 upper_band_limit=upper_band_limit,
279 lower_band_limit=lower_band_limit,
280 smoothing_bits=smoothing_bits,
281 even_smoothing=even_smoothing,
282 odd_smoothing=odd_smoothing,
283 min_signal_remaining=min_signal_remaining,
284 enable_pcan=enable_pcan,
285 pcan_strength=pcan_strength,
286 pcan_offset=pcan_offset,
287 gain_bits=gain_bits,
288 enable_log=enable_log,
289 scale_shift=scale_shift,
290 left_context=left_context,
291 right_context=right_context,
292 frame_stride=frame_stride,
293 zero_padding=zero_padding,
294 out_scale=out_scale,
295 out_type=out_type, name=name)
296 )
297 if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
298 return _result
299 raise
300 _result = _outputs[:]
301 if _execute.must_record_gradient():
302 _attrs = ("sample_rate", _op._get_attr_int("sample_rate"), "window_size",
303 _op._get_attr_int("window_size"), "window_step",
304 _op._get_attr_int("window_step"), "num_channels",
305 _op._get_attr_int("num_channels"), "upper_band_limit",
306 _op.get_attr("upper_band_limit"), "lower_band_limit",
307 _op.get_attr("lower_band_limit"), "smoothing_bits",
308 _op._get_attr_int("smoothing_bits"), "even_smoothing",
309 _op.get_attr("even_smoothing"), "odd_smoothing",
310 _op.get_attr("odd_smoothing"), "min_signal_remaining",
311 _op.get_attr("min_signal_remaining"), "enable_pcan",
312 _op._get_attr_bool("enable_pcan"), "pcan_strength",
313 _op.get_attr("pcan_strength"), "pcan_offset",
314 _op.get_attr("pcan_offset"), "gain_bits",
315 _op._get_attr_int("gain_bits"), "enable_log",
316 _op._get_attr_bool("enable_log"), "scale_shift",
317 _op._get_attr_int("scale_shift"), "left_context",
318 _op._get_attr_int("left_context"), "right_context",
319 _op._get_attr_int("right_context"), "frame_stride",
320 _op._get_attr_int("frame_stride"), "zero_padding",
321 _op._get_attr_bool("zero_padding"), "out_scale",
322 _op._get_attr_int("out_scale"), "out_type",
323 _op._get_attr_type("out_type"))
324 _inputs_flat = _op.inputs
325 _execute.record_gradient(
326 "AudioMicrofrontend", _inputs_flat, _attrs, _result)
327 _result, = _result
328 return _result
329
330AudioMicrofrontend = tf_export("raw_ops.AudioMicrofrontend")(_ops.to_raw_op(audio_microfrontend))
331_dispatcher_for_audio_microfrontend = audio_microfrontend._tf_type_based_dispatcher.Dispatch
332
333
334def audio_microfrontend_eager_fallback(audio, sample_rate, window_size, window_step, num_channels, upper_band_limit, lower_band_limit, smoothing_bits, even_smoothing, odd_smoothing, min_signal_remaining, enable_pcan, pcan_strength, pcan_offset, gain_bits, enable_log, scale_shift, left_context, right_context, frame_stride, zero_padding, out_scale, out_type, name, ctx):
335 if sample_rate is None:
336 sample_rate = 16000
337 sample_rate = _execute.make_int(sample_rate, "sample_rate")
338 if window_size is None:
339 window_size = 25
340 window_size = _execute.make_int(window_size, "window_size")
341 if window_step is None:
342 window_step = 10
343 window_step = _execute.make_int(window_step, "window_step")
344 if num_channels is None:
345 num_channels = 32
346 num_channels = _execute.make_int(num_channels, "num_channels")
347 if upper_band_limit is None:
348 upper_band_limit = 7500
349 upper_band_limit = _execute.make_float(upper_band_limit, "upper_band_limit")
350 if lower_band_limit is None:
351 lower_band_limit = 125
352 lower_band_limit = _execute.make_float(lower_band_limit, "lower_band_limit")
353 if smoothing_bits is None:
354 smoothing_bits = 10
355 smoothing_bits = _execute.make_int(smoothing_bits, "smoothing_bits")
356 if even_smoothing is None:
357 even_smoothing = 0.025
358 even_smoothing = _execute.make_float(even_smoothing, "even_smoothing")
359 if odd_smoothing is None:
360 odd_smoothing = 0.06
361 odd_smoothing = _execute.make_float(odd_smoothing, "odd_smoothing")
362 if min_signal_remaining is None:
363 min_signal_remaining = 0.05
364 min_signal_remaining = _execute.make_float(min_signal_remaining, "min_signal_remaining")
365 if enable_pcan is None:
366 enable_pcan = False
367 enable_pcan = _execute.make_bool(enable_pcan, "enable_pcan")
368 if pcan_strength is None:
369 pcan_strength = 0.95
370 pcan_strength = _execute.make_float(pcan_strength, "pcan_strength")
371 if pcan_offset is None:
372 pcan_offset = 80
373 pcan_offset = _execute.make_float(pcan_offset, "pcan_offset")
374 if gain_bits is None:
375 gain_bits = 21
376 gain_bits = _execute.make_int(gain_bits, "gain_bits")
377 if enable_log is None:
378 enable_log = True
379 enable_log = _execute.make_bool(enable_log, "enable_log")
380 if scale_shift is None:
381 scale_shift = 6
382 scale_shift = _execute.make_int(scale_shift, "scale_shift")
383 if left_context is None:
384 left_context = 0
385 left_context = _execute.make_int(left_context, "left_context")
386 if right_context is None:
387 right_context = 0
388 right_context = _execute.make_int(right_context, "right_context")
389 if frame_stride is None:
390 frame_stride = 1
391 frame_stride = _execute.make_int(frame_stride, "frame_stride")
392 if zero_padding is None:
393 zero_padding = False
394 zero_padding = _execute.make_bool(zero_padding, "zero_padding")
395 if out_scale is None:
396 out_scale = 1
397 out_scale = _execute.make_int(out_scale, "out_scale")
398 if out_type is None:
399 out_type = _dtypes.uint16
400 out_type = _execute.make_type(out_type, "out_type")
401 audio = _ops.convert_to_tensor(audio, _dtypes.int16)
402 _inputs_flat = [audio]
403 _attrs = ("sample_rate", sample_rate, "window_size", window_size,
404 "window_step", window_step, "num_channels", num_channels,
405 "upper_band_limit", upper_band_limit, "lower_band_limit", lower_band_limit,
406 "smoothing_bits", smoothing_bits, "even_smoothing", even_smoothing,
407 "odd_smoothing", odd_smoothing, "min_signal_remaining",
408 min_signal_remaining, "enable_pcan", enable_pcan, "pcan_strength",
409 pcan_strength, "pcan_offset", pcan_offset, "gain_bits", gain_bits,
410 "enable_log", enable_log, "scale_shift", scale_shift, "left_context",
411 left_context, "right_context", right_context, "frame_stride", frame_stride,
412 "zero_padding", zero_padding, "out_scale", out_scale, "out_type", out_type)
413 _result = _execute.execute(b"AudioMicrofrontend", 1, inputs=_inputs_flat,
414 attrs=_attrs, ctx=ctx, name=name)
415 if _execute.must_record_gradient():
416 _execute.record_gradient(
417 "AudioMicrofrontend", _inputs_flat, _attrs, _result)
418 _result, = _result
419 return _result
420