Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/gen_audio_ops.py: 15%

238 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1"""Python wrappers around TensorFlow ops. 

2 

3This file is MACHINE GENERATED! Do not edit. 

4""" 

5 

6import collections 

7 

8from tensorflow.python import pywrap_tfe as pywrap_tfe 

9from tensorflow.python.eager import context as _context 

10from tensorflow.python.eager import core as _core 

11from tensorflow.python.eager import execute as _execute 

12from tensorflow.python.framework import dtypes as _dtypes 

13from tensorflow.security.fuzzing.py import annotation_types as _atypes 

14 

15from tensorflow.python.framework import op_def_registry as _op_def_registry 

16from tensorflow.python.framework import ops as _ops 

17from tensorflow.python.framework import op_def_library as _op_def_library 

18from tensorflow.python.util.deprecation import deprecated_endpoints 

19from tensorflow.python.util import dispatch as _dispatch 

20from tensorflow.python.util.tf_export import tf_export 

21 

22from typing import TypeVar 

23 

24def audio_spectrogram(input, window_size, stride, magnitude_squared=False, name=None): 

25 r"""Produces a visualization of audio data over time. 

26 

27 Spectrograms are a standard way of representing audio information as a series of 

28 slices of frequency information, one slice for each window of time. By joining 

29 these together into a sequence, they form a distinctive fingerprint of the sound 

30 over time. 

31 

32 This op expects to receive audio data as an input, stored as floats in the range 

33 -1 to 1, together with a window width in samples, and a stride specifying how 

34 far to move the window between slices. From this it generates a three 

35 dimensional output. The first dimension is for the channels in the input, so a 

36 stereo audio input would have two here for example. The second dimension is time, 

37 with successive frequency slices. The third dimension has an amplitude value for 

38 each frequency during that time slice. 

39 

40 This means the layout when converted and saved as an image is rotated 90 degrees 

41 clockwise from a typical spectrogram. Time is descending down the Y axis, and 

42 the frequency decreases from left to right. 

43 

44 Each value in the result represents the square root of the sum of the real and 

45 imaginary parts of an FFT on the current window of samples. In this way, the 

46 lowest dimension represents the power of each frequency in the current window, 

47 and adjacent windows are concatenated in the next dimension. 

48 

49 To get a more intuitive and visual look at what this operation does, you can run 

50 tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the 

51 resulting spectrogram as a PNG image. 

52 

53 Args: 

54 input: A `Tensor` of type `float32`. Float representation of audio data. 

55 window_size: An `int`. 

56 How wide the input window is in samples. For the highest efficiency 

57 this should be a power of two, but other values are accepted. 

58 stride: An `int`. 

59 How widely apart the center of adjacent sample windows should be. 

60 magnitude_squared: An optional `bool`. Defaults to `False`. 

61 Whether to return the squared magnitude or just the 

62 magnitude. Using squared magnitude can avoid extra calculations. 

63 name: A name for the operation (optional). 

64 

65 Returns: 

66 A `Tensor` of type `float32`. 

67 """ 

68 _ctx = _context._context or _context.context() 

69 tld = _ctx._thread_local_data 

70 if tld.is_eager: 

71 try: 

72 _result = pywrap_tfe.TFE_Py_FastPathExecute( 

73 _ctx, "AudioSpectrogram", name, input, "window_size", window_size, 

74 "stride", stride, "magnitude_squared", magnitude_squared) 

75 return _result 

76 except _core._NotOkStatusException as e: 

77 _ops.raise_from_not_ok_status(e, name) 

78 except _core._FallbackException: 

79 pass 

80 try: 

81 return audio_spectrogram_eager_fallback( 

82 input, window_size=window_size, stride=stride, 

83 magnitude_squared=magnitude_squared, name=name, ctx=_ctx) 

84 except _core._SymbolicException: 

85 pass # Add nodes to the TensorFlow graph. 

86 # Add nodes to the TensorFlow graph. 

87 window_size = _execute.make_int(window_size, "window_size") 

88 stride = _execute.make_int(stride, "stride") 

89 if magnitude_squared is None: 

90 magnitude_squared = False 

91 magnitude_squared = _execute.make_bool(magnitude_squared, "magnitude_squared") 

92 _, _, _op, _outputs = _op_def_library._apply_op_helper( 

93 "AudioSpectrogram", input=input, window_size=window_size, 

94 stride=stride, 

95 magnitude_squared=magnitude_squared, name=name) 

96 _result = _outputs[:] 

97 if _execute.must_record_gradient(): 

98 _attrs = ("window_size", _op._get_attr_int("window_size"), "stride", 

99 _op._get_attr_int("stride"), "magnitude_squared", 

100 _op._get_attr_bool("magnitude_squared")) 

101 _inputs_flat = _op.inputs 

102 _execute.record_gradient( 

103 "AudioSpectrogram", _inputs_flat, _attrs, _result) 

104 _result, = _result 

105 return _result 

106 

107AudioSpectrogram = tf_export("raw_ops.AudioSpectrogram")(_ops.to_raw_op(audio_spectrogram)) 

108 

109 

110def audio_spectrogram_eager_fallback(input, window_size, stride, magnitude_squared, name, ctx): 

111 window_size = _execute.make_int(window_size, "window_size") 

112 stride = _execute.make_int(stride, "stride") 

113 if magnitude_squared is None: 

114 magnitude_squared = False 

115 magnitude_squared = _execute.make_bool(magnitude_squared, "magnitude_squared") 

116 input = _ops.convert_to_tensor(input, _dtypes.float32) 

117 _inputs_flat = [input] 

118 _attrs = ("window_size", window_size, "stride", stride, "magnitude_squared", 

119 magnitude_squared) 

120 _result = _execute.execute(b"AudioSpectrogram", 1, inputs=_inputs_flat, 

121 attrs=_attrs, ctx=ctx, name=name) 

122 if _execute.must_record_gradient(): 

123 _execute.record_gradient( 

124 "AudioSpectrogram", _inputs_flat, _attrs, _result) 

125 _result, = _result 

126 return _result 

127 

128_DecodeWavOutput = collections.namedtuple( 

129 "DecodeWav", 

130 ["audio", "sample_rate"]) 

131 

132 

133@_dispatch.add_fallback_dispatch_list 

134@_dispatch.add_type_based_api_dispatcher 

135@tf_export('audio.decode_wav') 

136def decode_wav(contents, desired_channels=-1, desired_samples=-1, name=None): 

137 r"""Decode a 16-bit PCM WAV file to a float tensor. 

138 

139 The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float. 

140 

141 When desired_channels is set, if the input contains fewer channels than this 

142 then the last channel will be duplicated to give the requested number, else if 

143 the input has more channels than requested then the additional channels will be 

144 ignored. 

145 

146 If desired_samples is set, then the audio will be cropped or padded with zeroes 

147 to the requested length. 

148 

149 The first output contains a Tensor with the content of the audio samples. The 

150 lowest dimension will be the number of channels, and the second will be the 

151 number of samples. For example, a ten-sample-long stereo WAV file should give an 

152 output shape of [10, 2]. 

153 

154 Args: 

155 contents: A `Tensor` of type `string`. 

156 The WAV-encoded audio, usually from a file. 

157 desired_channels: An optional `int`. Defaults to `-1`. 

158 Number of sample channels wanted. 

159 desired_samples: An optional `int`. Defaults to `-1`. 

160 Length of audio requested. 

161 name: A name for the operation (optional). 

162 

163 Returns: 

164 A tuple of `Tensor` objects (audio, sample_rate). 

165 

166 audio: A `Tensor` of type `float32`. 

167 sample_rate: A `Tensor` of type `int32`. 

168 """ 

169 _ctx = _context._context or _context.context() 

170 tld = _ctx._thread_local_data 

171 if tld.is_eager: 

172 try: 

173 _result = pywrap_tfe.TFE_Py_FastPathExecute( 

174 _ctx, "DecodeWav", name, contents, "desired_channels", 

175 desired_channels, "desired_samples", desired_samples) 

176 _result = _DecodeWavOutput._make(_result) 

177 return _result 

178 except _core._NotOkStatusException as e: 

179 _ops.raise_from_not_ok_status(e, name) 

180 except _core._FallbackException: 

181 pass 

182 try: 

183 _result = _dispatcher_for_decode_wav( 

184 (contents, desired_channels, desired_samples, name,), None) 

185 if _result is not NotImplemented: 

186 return _result 

187 return decode_wav_eager_fallback( 

188 contents, desired_channels=desired_channels, 

189 desired_samples=desired_samples, name=name, ctx=_ctx) 

190 except _core._SymbolicException: 

191 pass # Add nodes to the TensorFlow graph. 

192 except (TypeError, ValueError): 

193 _result = _dispatch.dispatch( 

194 decode_wav, (), dict(contents=contents, 

195 desired_channels=desired_channels, 

196 desired_samples=desired_samples, name=name) 

197 ) 

198 if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: 

199 return _result 

200 raise 

201 else: 

202 _result = _dispatcher_for_decode_wav( 

203 (contents, desired_channels, desired_samples, name,), None) 

204 if _result is not NotImplemented: 

205 return _result 

206 # Add nodes to the TensorFlow graph. 

207 if desired_channels is None: 

208 desired_channels = -1 

209 desired_channels = _execute.make_int(desired_channels, "desired_channels") 

210 if desired_samples is None: 

211 desired_samples = -1 

212 desired_samples = _execute.make_int(desired_samples, "desired_samples") 

213 try: 

214 _, _, _op, _outputs = _op_def_library._apply_op_helper( 

215 "DecodeWav", contents=contents, desired_channels=desired_channels, 

216 desired_samples=desired_samples, name=name) 

217 except (TypeError, ValueError): 

218 _result = _dispatch.dispatch( 

219 decode_wav, (), dict(contents=contents, 

220 desired_channels=desired_channels, 

221 desired_samples=desired_samples, name=name) 

222 ) 

223 if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: 

224 return _result 

225 raise 

226 _result = _outputs[:] 

227 if _execute.must_record_gradient(): 

228 _attrs = ("desired_channels", _op._get_attr_int("desired_channels"), 

229 "desired_samples", _op._get_attr_int("desired_samples")) 

230 _inputs_flat = _op.inputs 

231 _execute.record_gradient( 

232 "DecodeWav", _inputs_flat, _attrs, _result) 

233 _result = _DecodeWavOutput._make(_result) 

234 return _result 

235 

236DecodeWav = tf_export("raw_ops.DecodeWav")(_ops.to_raw_op(decode_wav)) 

237_dispatcher_for_decode_wav = decode_wav._tf_type_based_dispatcher.Dispatch 

238 

239 

240def decode_wav_eager_fallback(contents, desired_channels, desired_samples, name, ctx): 

241 if desired_channels is None: 

242 desired_channels = -1 

243 desired_channels = _execute.make_int(desired_channels, "desired_channels") 

244 if desired_samples is None: 

245 desired_samples = -1 

246 desired_samples = _execute.make_int(desired_samples, "desired_samples") 

247 contents = _ops.convert_to_tensor(contents, _dtypes.string) 

248 _inputs_flat = [contents] 

249 _attrs = ("desired_channels", desired_channels, "desired_samples", 

250 desired_samples) 

251 _result = _execute.execute(b"DecodeWav", 2, inputs=_inputs_flat, 

252 attrs=_attrs, ctx=ctx, name=name) 

253 if _execute.must_record_gradient(): 

254 _execute.record_gradient( 

255 "DecodeWav", _inputs_flat, _attrs, _result) 

256 _result = _DecodeWavOutput._make(_result) 

257 return _result 

258 

259 

260@_dispatch.add_fallback_dispatch_list 

261@_dispatch.add_type_based_api_dispatcher 

262@tf_export('audio.encode_wav') 

263def encode_wav(audio, sample_rate, name=None): 

264 r"""Encode audio data using the WAV file format. 

265 

266 This operation will generate a string suitable to be saved out to create a .wav 

267 audio file. It will be encoded in the 16-bit PCM format. It takes in float 

268 values in the range -1.0f to 1.0f, and any outside that value will be clamped to 

269 that range. 

270 

271 `audio` is a 2-D float Tensor of shape `[length, channels]`. 

272 `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100). 

273 

274 Args: 

275 audio: A `Tensor` of type `float32`. 2-D with shape `[length, channels]`. 

276 sample_rate: A `Tensor` of type `int32`. 

277 Scalar containing the sample frequency. 

278 name: A name for the operation (optional). 

279 

280 Returns: 

281 A `Tensor` of type `string`. 

282 """ 

283 _ctx = _context._context or _context.context() 

284 tld = _ctx._thread_local_data 

285 if tld.is_eager: 

286 try: 

287 _result = pywrap_tfe.TFE_Py_FastPathExecute( 

288 _ctx, "EncodeWav", name, audio, sample_rate) 

289 return _result 

290 except _core._NotOkStatusException as e: 

291 _ops.raise_from_not_ok_status(e, name) 

292 except _core._FallbackException: 

293 pass 

294 try: 

295 _result = _dispatcher_for_encode_wav( 

296 (audio, sample_rate, name,), None) 

297 if _result is not NotImplemented: 

298 return _result 

299 return encode_wav_eager_fallback( 

300 audio, sample_rate, name=name, ctx=_ctx) 

301 except _core._SymbolicException: 

302 pass # Add nodes to the TensorFlow graph. 

303 except (TypeError, ValueError): 

304 _result = _dispatch.dispatch( 

305 encode_wav, (), dict(audio=audio, sample_rate=sample_rate, 

306 name=name) 

307 ) 

308 if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: 

309 return _result 

310 raise 

311 else: 

312 _result = _dispatcher_for_encode_wav( 

313 (audio, sample_rate, name,), None) 

314 if _result is not NotImplemented: 

315 return _result 

316 # Add nodes to the TensorFlow graph. 

317 try: 

318 _, _, _op, _outputs = _op_def_library._apply_op_helper( 

319 "EncodeWav", audio=audio, sample_rate=sample_rate, name=name) 

320 except (TypeError, ValueError): 

321 _result = _dispatch.dispatch( 

322 encode_wav, (), dict(audio=audio, sample_rate=sample_rate, 

323 name=name) 

324 ) 

325 if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: 

326 return _result 

327 raise 

328 _result = _outputs[:] 

329 if _execute.must_record_gradient(): 

330 _attrs = () 

331 _inputs_flat = _op.inputs 

332 _execute.record_gradient( 

333 "EncodeWav", _inputs_flat, _attrs, _result) 

334 _result, = _result 

335 return _result 

336 

337EncodeWav = tf_export("raw_ops.EncodeWav")(_ops.to_raw_op(encode_wav)) 

338_dispatcher_for_encode_wav = encode_wav._tf_type_based_dispatcher.Dispatch 

339 

340 

341def encode_wav_eager_fallback(audio, sample_rate, name, ctx): 

342 audio = _ops.convert_to_tensor(audio, _dtypes.float32) 

343 sample_rate = _ops.convert_to_tensor(sample_rate, _dtypes.int32) 

344 _inputs_flat = [audio, sample_rate] 

345 _attrs = None 

346 _result = _execute.execute(b"EncodeWav", 1, inputs=_inputs_flat, 

347 attrs=_attrs, ctx=ctx, name=name) 

348 if _execute.must_record_gradient(): 

349 _execute.record_gradient( 

350 "EncodeWav", _inputs_flat, _attrs, _result) 

351 _result, = _result 

352 return _result 

353 

354 

355def mfcc(spectrogram, sample_rate, upper_frequency_limit=4000, lower_frequency_limit=20, filterbank_channel_count=40, dct_coefficient_count=13, name=None): 

356 r"""Transforms a spectrogram into a form that's useful for speech recognition. 

357 

358 Mel Frequency Cepstral Coefficients are a way of representing audio data that's 

359 been effective as an input feature for machine learning. They are created by 

360 taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the 

361 higher frequencies that are less significant to the human ear. They have a long 

362 history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum 

363 is a good resource to learn more. 

364 

365 Args: 

366 spectrogram: A `Tensor` of type `float32`. 

367 Typically produced by the Spectrogram op, with magnitude_squared 

368 set to true. 

369 sample_rate: A `Tensor` of type `int32`. 

370 How many samples per second the source audio used. 

371 upper_frequency_limit: An optional `float`. Defaults to `4000`. 

372 The highest frequency to use when calculating the 

373 ceptstrum. 

374 lower_frequency_limit: An optional `float`. Defaults to `20`. 

375 The lowest frequency to use when calculating the 

376 ceptstrum. 

377 filterbank_channel_count: An optional `int`. Defaults to `40`. 

378 Resolution of the Mel bank used internally. 

379 dct_coefficient_count: An optional `int`. Defaults to `13`. 

380 How many output channels to produce per time slice. 

381 name: A name for the operation (optional). 

382 

383 Returns: 

384 A `Tensor` of type `float32`. 

385 """ 

386 _ctx = _context._context or _context.context() 

387 tld = _ctx._thread_local_data 

388 if tld.is_eager: 

389 try: 

390 _result = pywrap_tfe.TFE_Py_FastPathExecute( 

391 _ctx, "Mfcc", name, spectrogram, sample_rate, "upper_frequency_limit", 

392 upper_frequency_limit, "lower_frequency_limit", lower_frequency_limit, 

393 "filterbank_channel_count", filterbank_channel_count, 

394 "dct_coefficient_count", dct_coefficient_count) 

395 return _result 

396 except _core._NotOkStatusException as e: 

397 _ops.raise_from_not_ok_status(e, name) 

398 except _core._FallbackException: 

399 pass 

400 try: 

401 return mfcc_eager_fallback( 

402 spectrogram, sample_rate, 

403 upper_frequency_limit=upper_frequency_limit, 

404 lower_frequency_limit=lower_frequency_limit, 

405 filterbank_channel_count=filterbank_channel_count, 

406 dct_coefficient_count=dct_coefficient_count, name=name, ctx=_ctx) 

407 except _core._SymbolicException: 

408 pass # Add nodes to the TensorFlow graph. 

409 # Add nodes to the TensorFlow graph. 

410 if upper_frequency_limit is None: 

411 upper_frequency_limit = 4000 

412 upper_frequency_limit = _execute.make_float(upper_frequency_limit, "upper_frequency_limit") 

413 if lower_frequency_limit is None: 

414 lower_frequency_limit = 20 

415 lower_frequency_limit = _execute.make_float(lower_frequency_limit, "lower_frequency_limit") 

416 if filterbank_channel_count is None: 

417 filterbank_channel_count = 40 

418 filterbank_channel_count = _execute.make_int(filterbank_channel_count, "filterbank_channel_count") 

419 if dct_coefficient_count is None: 

420 dct_coefficient_count = 13 

421 dct_coefficient_count = _execute.make_int(dct_coefficient_count, "dct_coefficient_count") 

422 _, _, _op, _outputs = _op_def_library._apply_op_helper( 

423 "Mfcc", spectrogram=spectrogram, sample_rate=sample_rate, 

424 upper_frequency_limit=upper_frequency_limit, 

425 lower_frequency_limit=lower_frequency_limit, 

426 filterbank_channel_count=filterbank_channel_count, 

427 dct_coefficient_count=dct_coefficient_count, name=name) 

428 _result = _outputs[:] 

429 if _execute.must_record_gradient(): 

430 _attrs = ("upper_frequency_limit", _op.get_attr("upper_frequency_limit"), 

431 "lower_frequency_limit", _op.get_attr("lower_frequency_limit"), 

432 "filterbank_channel_count", 

433 _op._get_attr_int("filterbank_channel_count"), 

434 "dct_coefficient_count", 

435 _op._get_attr_int("dct_coefficient_count")) 

436 _inputs_flat = _op.inputs 

437 _execute.record_gradient( 

438 "Mfcc", _inputs_flat, _attrs, _result) 

439 _result, = _result 

440 return _result 

441 

442Mfcc = tf_export("raw_ops.Mfcc")(_ops.to_raw_op(mfcc)) 

443 

444 

445def mfcc_eager_fallback(spectrogram, sample_rate, upper_frequency_limit, lower_frequency_limit, filterbank_channel_count, dct_coefficient_count, name, ctx): 

446 if upper_frequency_limit is None: 

447 upper_frequency_limit = 4000 

448 upper_frequency_limit = _execute.make_float(upper_frequency_limit, "upper_frequency_limit") 

449 if lower_frequency_limit is None: 

450 lower_frequency_limit = 20 

451 lower_frequency_limit = _execute.make_float(lower_frequency_limit, "lower_frequency_limit") 

452 if filterbank_channel_count is None: 

453 filterbank_channel_count = 40 

454 filterbank_channel_count = _execute.make_int(filterbank_channel_count, "filterbank_channel_count") 

455 if dct_coefficient_count is None: 

456 dct_coefficient_count = 13 

457 dct_coefficient_count = _execute.make_int(dct_coefficient_count, "dct_coefficient_count") 

458 spectrogram = _ops.convert_to_tensor(spectrogram, _dtypes.float32) 

459 sample_rate = _ops.convert_to_tensor(sample_rate, _dtypes.int32) 

460 _inputs_flat = [spectrogram, sample_rate] 

461 _attrs = ("upper_frequency_limit", upper_frequency_limit, 

462 "lower_frequency_limit", lower_frequency_limit, "filterbank_channel_count", 

463 filterbank_channel_count, "dct_coefficient_count", dct_coefficient_count) 

464 _result = _execute.execute(b"Mfcc", 1, inputs=_inputs_flat, attrs=_attrs, 

465 ctx=ctx, name=name) 

466 if _execute.must_record_gradient(): 

467 _execute.record_gradient( 

468 "Mfcc", _inputs_flat, _attrs, _result) 

469 _result, = _result 

470 return _result 

471