/src/espeak-ng/src/libespeak-ng/klatt.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2008 by Jonathan Duddington |
3 | | * email: jonsd@users.sourceforge.net |
4 | | * Copyright (C) 2013-2016 Reece H. Dunn |
5 | | * |
6 | | * Based on a re-implementation by: |
7 | | * (c) 1993,94 Jon Iles and Nick Ing-Simmons |
8 | | * of the Klatt cascade-parallel formant synthesizer |
9 | | * |
10 | | * This program is free software; you can redistribute it and/or modify |
11 | | * it under the terms of the GNU General Public License as published by |
12 | | * the Free Software Foundation; either version 3 of the License, or |
13 | | * (at your option) any later version. |
14 | | * |
15 | | * This program is distributed in the hope that it will be useful, |
16 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | | * GNU General Public License for more details. |
19 | | * |
20 | | * You should have received a copy of the GNU General Public License |
21 | | * along with this program; if not, see: <http://www.gnu.org/licenses/>. |
22 | | */ |
23 | | |
24 | | // See URL: ftp://svr-ftp.eng.cam.ac.uk/pub/comp.speech/synthesis/klatt.3.04.tar.gz |
25 | | |
26 | | #include "config.h" |
27 | | |
28 | | #include <math.h> |
29 | | #include <stdint.h> |
30 | | #include <stdio.h> |
31 | | #include <stdlib.h> |
32 | | #include <string.h> |
33 | | |
34 | | #include <espeak-ng/espeak_ng.h> |
35 | | #include <espeak-ng/speak_lib.h> |
36 | | |
37 | | #include "klatt.h" |
38 | | #include "common.h" // for espeak_rand |
39 | | #include "synthesize.h" // for frame_t, WGEN_DATA, STEPSIZE, N_KLATTP, echo... |
40 | | #include "voice.h" // for voice_t, N_PEAKS |
41 | | #if USE_SPEECHPLAYER |
42 | | #include "sPlayer.h" |
43 | | #endif |
44 | | |
45 | | extern unsigned char *out_ptr; |
46 | | extern unsigned char *out_end; |
47 | | static int nsamples; |
48 | | static int sample_count; |
49 | | |
50 | 0 | #define getrandom(min, max) espeak_rand((min), (max)) |
51 | | |
52 | | // function prototypes for functions private to this file |
53 | | |
54 | | static void flutter(klatt_frame_ptr); |
55 | | static double sampled_source(int); |
56 | | static double impulsive_source(void); |
57 | | static double natural_source(void); |
58 | | static void pitch_synch_par_reset(klatt_frame_ptr); |
59 | | static double gen_noise(double); |
60 | | static double DBtoLIN(long); |
61 | | static void frame_init(klatt_frame_ptr); |
62 | | static void setabc(long, long, resonator_ptr); |
63 | | static void SetSynth_Klatt(int length, frame_t *fr1, frame_t *fr2, voice_t *v, int control); |
64 | | static void setzeroabc(long, long, resonator_ptr); |
65 | | |
66 | | static klatt_frame_t kt_frame; |
67 | | static klatt_global_t kt_globals; |
68 | | |
69 | 1 | #define NUMBER_OF_SAMPLES 100 |
70 | | |
71 | | static const int scale_wav_tab[] = { 45, 38, 45, 45, 55, 45 }; // scale output from different voicing sources |
72 | | |
73 | | // For testing, this can be overwritten in KlattInit() |
74 | | static const short natural_samples2[256] = { |
75 | | 2583, 2516, 2450, 2384, 2319, 2254, 2191, 2127, |
76 | | 2067, 2005, 1946, 1890, 1832, 1779, 1726, 1675, |
77 | | 1626, 1579, 1533, 1491, 1449, 1409, 1372, 1336, |
78 | | 1302, 1271, 1239, 1211, 1184, 1158, 1134, 1111, |
79 | | 1089, 1069, 1049, 1031, 1013, 996, 980, 965, |
80 | | 950, 936, 921, 909, 895, 881, 869, 855, |
81 | | 843, 830, 818, 804, 792, 779, 766, 754, |
82 | | 740, 728, 715, 702, 689, 676, 663, 651, |
83 | | 637, 626, 612, 601, 588, 576, 564, 552, |
84 | | 540, 530, 517, 507, 496, 485, 475, 464, |
85 | | 454, 443, 434, 424, 414, 404, 394, 385, |
86 | | 375, 366, 355, 347, 336, 328, 317, 308, |
87 | | 299, 288, 280, 269, 260, 250, 240, 231, |
88 | | 220, 212, 200, 192, 181, 172, 161, 152, |
89 | | 142, 133, 123, 113, 105, 94, 86, 76, |
90 | | 67, 57, 49, 39, 30, 22, 11, 4, |
91 | | -5, -14, -23, -32, -41, -50, -60, -69, |
92 | | -78, -87, -96, -107, -115, -126, -134, -144, |
93 | | -154, -164, -174, -183, -193, -203, -213, -222, |
94 | | -233, -242, -252, -262, -271, -281, -291, -301, |
95 | | -310, -320, -330, -339, -349, -357, -368, -377, |
96 | | -387, -397, -406, -417, -426, -436, -446, -456, |
97 | | -467, -477, -487, -499, -509, -521, -532, -543, |
98 | | -555, -567, -579, -591, -603, -616, -628, -641, |
99 | | -653, -666, -679, -692, -705, -717, -732, -743, |
100 | | -758, -769, -783, -795, -808, -820, -834, -845, |
101 | | -860, -872, -885, -898, -911, -926, -939, -955, |
102 | | -968, -986, -999, -1018, -1034, -1054, -1072, -1094, |
103 | | -1115, -1138, -1162, -1188, -1215, -1244, -1274, -1307, |
104 | | -1340, -1377, -1415, -1453, -1496, -1538, -1584, -1631, |
105 | | -1680, -1732, -1783, -1839, -1894, -1952, -2010, -2072, |
106 | | -2133, -2196, -2260, -2325, -2390, -2456, -2522, -2589, |
107 | | }; |
108 | | static const short natural_samples[100] = { |
109 | | -310, -400, 530, 356, 224, 89, 23, -10, -58, -16, 461, 599, 536, 701, 770, |
110 | | 605, 497, 461, 560, 404, 110, 224, 131, 104, -97, 155, 278, -154, -1165, |
111 | | -598, 737, 125, -592, 41, 11, -247, -10, 65, 92, 80, -304, 71, 167, -1, 122, |
112 | | 233, 161, -43, 278, 479, 485, 407, 266, 650, 134, 80, 236, 68, 260, 269, 179, |
113 | | 53, 140, 275, 293, 296, 104, 257, 152, 311, 182, 263, 245, 125, 314, 140, 44, |
114 | | 203, 230, -235, -286, 23, 107, 92, -91, 38, 464, 443, 176, 98, -784, -2449, |
115 | | -1891, -1045, -1600, -1462, -1384, -1261, -949, -730 |
116 | | }; |
117 | | |
118 | | /* |
119 | | function RESONATOR |
120 | | |
121 | | This is a generic resonator function. Internal memory for the resonator |
122 | | is stored in the globals structure. |
123 | | */ |
124 | | |
125 | | static double resonator(resonator_ptr r, double input) |
126 | 0 | { |
127 | 0 | double x; |
128 | |
|
129 | 0 | x = (double)((double)r->a * (double)input + (double)r->b * (double)r->p1 + (double)r->c * (double)r->p2); |
130 | 0 | r->p2 = (double)r->p1; |
131 | 0 | r->p1 = (double)x; |
132 | |
|
133 | 0 | return (double)x; |
134 | 0 | } |
135 | | |
136 | | /* |
137 | | function ANTIRESONATOR |
138 | | |
139 | | This is a generic anti-resonator function. The code is the same as resonator |
140 | | except that a,b,c need to be set with setzeroabc() and we save inputs in |
141 | | p1/p2 rather than outputs. There is currently only one of these - "rnz" |
142 | | Output = (rnz.a * input) + (rnz.b * oldin1) + (rnz.c * oldin2) |
143 | | */ |
144 | | |
145 | | static double antiresonator(resonator_ptr r, double input) |
146 | 0 | { |
147 | 0 | register double x = (double)r->a * (double)input + (double)r->b * (double)r->p1 + (double)r->c * (double)r->p2; |
148 | 0 | r->p2 = (double)r->p1; |
149 | 0 | r->p1 = (double)input; |
150 | 0 | return (double)x; |
151 | 0 | } |
152 | | |
153 | | /* |
154 | | function FLUTTER |
155 | | |
156 | | This function adds F0 flutter, as specified in: |
157 | | |
158 | | "Analysis, synthesis and perception of voice quality variations among |
159 | | female and male talkers" D.H. Klatt and L.C. Klatt JASA 87(2) February 1990. |
160 | | |
161 | | Flutter is added by applying a quasi-random element constructed from three |
162 | | slowly varying sine waves. |
163 | | */ |
164 | | |
165 | | static void flutter(klatt_frame_ptr frame) |
166 | 0 | { |
167 | 0 | static int time_count; |
168 | 0 | double delta_f0; |
169 | 0 | double fla, flb, flc, fld, fle; |
170 | |
|
171 | 0 | fla = (double)kt_globals.f0_flutter / 50; |
172 | 0 | flb = (double)kt_globals.original_f0 / 100; |
173 | 0 | flc = sin(M_PI*12.7*time_count); // because we are calling flutter() more frequently, every 2.9mS |
174 | 0 | fld = sin(M_PI*7.1*time_count); |
175 | 0 | fle = sin(M_PI*4.7*time_count); |
176 | 0 | delta_f0 = fla * flb * (flc + fld + fle) * 10; |
177 | 0 | frame->F0hz10 = frame->F0hz10 + (long)delta_f0; |
178 | 0 | time_count++; |
179 | 0 | } |
180 | | |
181 | | /* |
182 | | function SAMPLED_SOURCE |
183 | | |
184 | | Allows the use of a glottal excitation waveform sampled from a real |
185 | | voice. |
186 | | */ |
187 | | |
188 | | static double sampled_source(int source_num) |
189 | 0 | { |
190 | 0 | int itemp; |
191 | 0 | double ftemp; |
192 | 0 | double result; |
193 | 0 | double diff_value; |
194 | 0 | int current_value; |
195 | 0 | int next_value; |
196 | 0 | double temp_diff; |
197 | 0 | const short *samples; |
198 | |
|
199 | 0 | if (source_num == 0) { |
200 | 0 | samples = natural_samples; |
201 | 0 | kt_globals.num_samples = 100; |
202 | 0 | } else { |
203 | 0 | samples = natural_samples2; |
204 | 0 | kt_globals.num_samples = 256; |
205 | 0 | } |
206 | |
|
207 | 0 | if (kt_globals.T0 != 0) { |
208 | 0 | ftemp = (double)kt_globals.nper; |
209 | 0 | ftemp = ftemp / kt_globals.T0; |
210 | 0 | ftemp = ftemp * kt_globals.num_samples; |
211 | 0 | itemp = (int)ftemp; |
212 | |
|
213 | 0 | temp_diff = ftemp - (double)itemp; |
214 | |
|
215 | 0 | current_value = samples[(itemp) % kt_globals.num_samples]; |
216 | 0 | next_value = samples[(itemp+1) % kt_globals.num_samples]; |
217 | |
|
218 | 0 | diff_value = (double)next_value - (double)current_value; |
219 | 0 | diff_value = diff_value * temp_diff; |
220 | |
|
221 | 0 | result = samples[(itemp) % kt_globals.num_samples] + diff_value; |
222 | 0 | result = result * kt_globals.sample_factor; |
223 | 0 | } else |
224 | 0 | result = 0; |
225 | 0 | return result; |
226 | 0 | } |
227 | | |
228 | | /* |
229 | | function PARWAVE |
230 | | |
231 | | Converts synthesis parameters to a waveform. |
232 | | */ |
233 | | |
234 | | static int parwave(klatt_frame_ptr frame, WGEN_DATA *wdata) |
235 | 0 | { |
236 | 0 | double temp; |
237 | 0 | int value; |
238 | 0 | double outbypas; |
239 | 0 | double out; |
240 | 0 | long n4; |
241 | 0 | double frics; |
242 | 0 | double glotout; |
243 | 0 | double aspiration; |
244 | 0 | double casc_next_in; |
245 | 0 | double par_glotout; |
246 | 0 | static double noise; |
247 | 0 | static double voice; |
248 | 0 | static double vlast; |
249 | 0 | static double glotlast; |
250 | 0 | static double sourc; |
251 | 0 | int ix; |
252 | |
|
253 | 0 | flutter(frame); // add f0 flutter |
254 | | |
255 | | // MAIN LOOP, for each output sample of current frame: |
256 | |
|
257 | 0 | for (kt_globals.ns = 0; kt_globals.ns < kt_globals.nspfr; kt_globals.ns++) { |
258 | | // Get low-passed random number for aspiration and frication noise |
259 | 0 | noise = gen_noise(noise); |
260 | | |
261 | | // Amplitude modulate noise (reduce noise amplitude during |
262 | | // second half of glottal period) if voicing simultaneously present. |
263 | |
|
264 | 0 | if (kt_globals.nper > kt_globals.nmod) |
265 | 0 | noise *= (double)0.5; |
266 | | |
267 | | // Compute frication noise |
268 | 0 | frics = kt_globals.amp_frica * noise; |
269 | | |
270 | | // Compute voicing waveform. Run glottal source simulation at 4 |
271 | | // times normal sample rate to minimize quantization noise in |
272 | | // period of female voice. |
273 | |
|
274 | 0 | for (n4 = 0; n4 < 4; n4++) { |
275 | 0 | switch (kt_globals.glsource) |
276 | 0 | { |
277 | 0 | case IMPULSIVE: |
278 | 0 | voice = impulsive_source(); |
279 | 0 | break; |
280 | 0 | case NATURAL: |
281 | 0 | voice = natural_source(); |
282 | 0 | break; |
283 | 0 | case SAMPLED: |
284 | 0 | voice = sampled_source(0); |
285 | 0 | break; |
286 | 0 | case SAMPLED2: |
287 | 0 | voice = sampled_source(1); |
288 | 0 | break; |
289 | 0 | } |
290 | | |
291 | | // Reset period when counter 'nper' reaches T0 |
292 | 0 | if (kt_globals.nper >= kt_globals.T0) { |
293 | 0 | kt_globals.nper = 0; |
294 | 0 | pitch_synch_par_reset(frame); |
295 | 0 | } |
296 | | |
297 | | // Low-pass filter voicing waveform before downsampling from 4*samrate |
298 | | // to samrate samples/sec. Resonator f=.09*samrate, bw=.06*samrate |
299 | |
|
300 | 0 | voice = resonator(&(kt_globals.rsn[RLP]), voice); |
301 | | |
302 | | // Increment counter that keeps track of 4*samrate samples per sec |
303 | 0 | kt_globals.nper++; |
304 | 0 | } |
305 | | |
306 | 0 | if(kt_globals.glsource==5) { |
307 | 0 | double v=(kt_globals.nper/(double)kt_globals.T0); |
308 | 0 | v=(v*2)-1; |
309 | 0 | voice=v*6000; |
310 | 0 | } |
311 | | |
312 | | // Tilt spectrum of voicing source down by soft low-pass filtering, amount |
313 | | // of tilt determined by TLTdb |
314 | |
|
315 | 0 | voice = (voice * kt_globals.onemd) + (vlast * kt_globals.decay); |
316 | 0 | vlast = voice; |
317 | | |
318 | | // Add breathiness during glottal open phase. Amount of breathiness |
319 | | // determined by parameter Aturb Use nrand rather than noise because |
320 | | // noise is low-passed. |
321 | |
|
322 | 0 | if (kt_globals.nper < kt_globals.nopen) |
323 | 0 | voice += kt_globals.amp_breth * kt_globals.nrand; |
324 | | |
325 | | // Set amplitude of voicing |
326 | 0 | glotout = kt_globals.amp_voice * voice; |
327 | 0 | par_glotout = kt_globals.par_amp_voice * voice; |
328 | | |
329 | | // Compute aspiration amplitude and add to voicing source |
330 | 0 | aspiration = kt_globals.amp_aspir * noise; |
331 | 0 | glotout += aspiration; |
332 | |
|
333 | 0 | par_glotout += aspiration; |
334 | | |
335 | | // Cascade vocal tract, excited by laryngeal sources. |
336 | | // Nasal antiresonator, then formants FNP, F5, F4, F3, F2, F1 |
337 | |
|
338 | 0 | out = 0; |
339 | 0 | if (kt_globals.synthesis_model != ALL_PARALLEL) { |
340 | 0 | casc_next_in = antiresonator(&(kt_globals.rsn[Rnz]), glotout); |
341 | 0 | casc_next_in = resonator(&(kt_globals.rsn[Rnpc]), casc_next_in); |
342 | 0 | casc_next_in = resonator(&(kt_globals.rsn[R8c]), casc_next_in); |
343 | 0 | casc_next_in = resonator(&(kt_globals.rsn[R7c]), casc_next_in); |
344 | 0 | casc_next_in = resonator(&(kt_globals.rsn[R6c]), casc_next_in); |
345 | 0 | casc_next_in = resonator(&(kt_globals.rsn[R5c]), casc_next_in); |
346 | 0 | casc_next_in = resonator(&(kt_globals.rsn[R4c]), casc_next_in); |
347 | 0 | casc_next_in = resonator(&(kt_globals.rsn[R3c]), casc_next_in); |
348 | 0 | casc_next_in = resonator(&(kt_globals.rsn[R2c]), casc_next_in); |
349 | 0 | out = resonator(&(kt_globals.rsn[R1c]), casc_next_in); |
350 | 0 | } |
351 | | |
352 | | // Excite parallel F1 and FNP by voicing waveform |
353 | 0 | sourc = par_glotout; // Source is voicing plus aspiration |
354 | | |
355 | | // Standard parallel vocal tract Formants F6,F5,F4,F3,F2, |
356 | | // outputs added with alternating sign. Sound source for other |
357 | | // parallel resonators is frication plus first difference of |
358 | | // voicing waveform. |
359 | |
|
360 | 0 | out += resonator(&(kt_globals.rsn[R1p]), sourc); |
361 | 0 | out += resonator(&(kt_globals.rsn[Rnpp]), sourc); |
362 | |
|
363 | 0 | sourc = frics + par_glotout - glotlast; |
364 | 0 | glotlast = par_glotout; |
365 | |
|
366 | 0 | for (ix = R2p; ix <= R6p; ix++) |
367 | 0 | out = resonator(&(kt_globals.rsn[ix]), sourc) - out; |
368 | |
|
369 | 0 | outbypas = kt_globals.amp_bypas * sourc; |
370 | |
|
371 | 0 | out = outbypas - out; |
372 | |
|
373 | 0 | out = resonator(&(kt_globals.rsn[Rout]), out); |
374 | 0 | temp = (int)(out * wdata->amplitude * kt_globals.amp_gain0); // Convert back to integer |
375 | | |
376 | | // mix with a recorded WAV if required for this phoneme |
377 | 0 | signed char c; |
378 | 0 | int sample; |
379 | |
|
380 | 0 | if (wdata->mix_wavefile_ix < wdata->n_mix_wavefile) { |
381 | 0 | if (wdata->mix_wave_scale == 0) { |
382 | | // a 16 bit sample |
383 | 0 | c = wdata->mix_wavefile[wdata->mix_wavefile_ix+1]; |
384 | 0 | sample = wdata->mix_wavefile[wdata->mix_wavefile_ix] + (c * 256); |
385 | 0 | wdata->mix_wavefile_ix += 2; |
386 | 0 | } else { |
387 | | // a 8 bit sample, scaled |
388 | 0 | sample = (signed char)wdata->mix_wavefile[wdata->mix_wavefile_ix++] * wdata->mix_wave_scale; |
389 | 0 | } |
390 | 0 | int z2 = sample * wdata->amplitude_v / 1024; |
391 | 0 | z2 = (z2 * wdata->mix_wave_amp)/40; |
392 | 0 | temp += z2; |
393 | 0 | } |
394 | |
|
395 | 0 | if (kt_globals.fadein < 64) { |
396 | 0 | temp = (temp * kt_globals.fadein) / 64; |
397 | 0 | ++kt_globals.fadein; |
398 | 0 | } |
399 | | |
400 | | // if fadeout is set, fade to zero over 64 samples, to avoid clicks at end of synthesis |
401 | 0 | if (kt_globals.fadeout > 0) { |
402 | 0 | kt_globals.fadeout--; |
403 | 0 | temp = (temp * kt_globals.fadeout) / 64; |
404 | 0 | if (kt_globals.fadeout == 0) |
405 | 0 | kt_globals.fadein = 0; |
406 | 0 | } |
407 | |
|
408 | 0 | value = (int)temp + ((echo_buf[echo_tail++]*echo_amp) >> 8); |
409 | 0 | if (echo_tail >= N_ECHO_BUF) |
410 | 0 | echo_tail = 0; |
411 | |
|
412 | 0 | if (value < -32768) |
413 | 0 | value = -32768; |
414 | |
|
415 | 0 | if (value > 32767) |
416 | 0 | value = 32767; |
417 | |
|
418 | 0 | *out_ptr++ = value; |
419 | 0 | *out_ptr++ = value >> 8; |
420 | |
|
421 | 0 | echo_buf[echo_head++] = value; |
422 | 0 | if (echo_head >= N_ECHO_BUF) |
423 | 0 | echo_head = 0; |
424 | |
|
425 | 0 | sample_count++; |
426 | 0 | if (out_ptr + 2 > out_end) |
427 | 0 | return 1; |
428 | 0 | } |
429 | 0 | return 0; |
430 | 0 | } |
431 | | |
432 | | void KlattReset(int control) |
433 | 5.14M | { |
434 | 5.14M | int r_ix; |
435 | | |
436 | | #if USE_SPEECHPLAYER |
437 | | KlattResetSP(); |
438 | | #endif |
439 | | |
440 | 5.14M | if (control == 2) { |
441 | | // Full reset |
442 | 1 | kt_globals.FLPhz = (950 * kt_globals.samrate) / 10000; |
443 | 1 | kt_globals.BLPhz = (630 * kt_globals.samrate) / 10000; |
444 | 1 | kt_globals.minus_pi_t = -M_PI / kt_globals.samrate; |
445 | 1 | kt_globals.two_pi_t = -2.0 * kt_globals.minus_pi_t; |
446 | 1 | setabc(kt_globals.FLPhz, kt_globals.BLPhz, &(kt_globals.rsn[RLP])); |
447 | 1 | } |
448 | | |
449 | 5.14M | if (control > 0) { |
450 | 5.14M | kt_globals.nper = 0; |
451 | 5.14M | kt_globals.T0 = 0; |
452 | 5.14M | kt_globals.nopen = 0; |
453 | 5.14M | kt_globals.nmod = 0; |
454 | | |
455 | 20.5M | for (r_ix = RGL; r_ix < N_RSN; r_ix++) { |
456 | 15.4M | kt_globals.rsn[r_ix].p1 = 0; |
457 | 15.4M | kt_globals.rsn[r_ix].p2 = 0; |
458 | 15.4M | } |
459 | 5.14M | } |
460 | | |
461 | 92.5M | for (r_ix = 0; r_ix <= R6p; r_ix++) { |
462 | 87.3M | kt_globals.rsn[r_ix].p1 = 0; |
463 | 87.3M | kt_globals.rsn[r_ix].p2 = 0; |
464 | 87.3M | } |
465 | 5.14M | } |
466 | | |
467 | | void KlattFini(void) |
468 | 0 | { |
469 | | #if USE_SPEECHPLAYER |
470 | | KlattFiniSP(); |
471 | | #endif |
472 | 0 | } |
473 | | |
474 | | /* |
475 | | function FRAME_INIT |
476 | | |
477 | | Use parameters from the input frame to set up resonator coefficients. |
478 | | */ |
479 | | |
480 | | static void frame_init(klatt_frame_ptr frame) |
481 | 0 | { |
482 | 0 | double amp_par[7]; |
483 | 0 | static const double amp_par_factor[7] = { 0.6, 0.4, 0.15, 0.06, 0.04, 0.022, 0.03 }; |
484 | 0 | long Gain0_tmp; |
485 | 0 | int ix; |
486 | |
|
487 | 0 | kt_globals.original_f0 = frame->F0hz10 / 10; |
488 | |
|
489 | 0 | frame->AVdb_tmp = frame->AVdb - 7; |
490 | 0 | if (frame->AVdb_tmp < 0) |
491 | 0 | frame->AVdb_tmp = 0; |
492 | |
|
493 | 0 | kt_globals.amp_aspir = DBtoLIN(frame->ASP) * 0.05; |
494 | 0 | kt_globals.amp_frica = DBtoLIN(frame->AF) * 0.25; |
495 | 0 | kt_globals.par_amp_voice = DBtoLIN(frame->AVpdb); |
496 | 0 | kt_globals.amp_bypas = DBtoLIN(frame->AB) * 0.05; |
497 | |
|
498 | 0 | for (ix = 0; ix <= 6; ix++) { |
499 | | // parallel amplitudes F1 to F6, and parallel nasal pole |
500 | 0 | amp_par[ix] = DBtoLIN(frame->Ap[ix]) * amp_par_factor[ix]; |
501 | 0 | } |
502 | |
|
503 | 0 | Gain0_tmp = frame->Gain0 - 3; |
504 | 0 | if (Gain0_tmp <= 0) |
505 | 0 | Gain0_tmp = 57; |
506 | 0 | kt_globals.amp_gain0 = DBtoLIN(Gain0_tmp) / kt_globals.scale_wav; |
507 | | |
508 | | // Set coefficients of variable cascade resonators |
509 | 0 | for (ix = 1; ix <= 9; ix++) { |
510 | | // formants 1 to 8, plus nasal pole |
511 | 0 | setabc(frame->Fhz[ix], frame->Bhz[ix], &(kt_globals.rsn[ix])); |
512 | |
|
513 | 0 | if (ix <= 5) { |
514 | 0 | setabc(frame->Fhz_next[ix], frame->Bhz_next[ix], &(kt_globals.rsn_next[ix])); |
515 | |
|
516 | 0 | kt_globals.rsn[ix].a_inc = (kt_globals.rsn_next[ix].a - kt_globals.rsn[ix].a) / 64.0; |
517 | 0 | kt_globals.rsn[ix].b_inc = (kt_globals.rsn_next[ix].b - kt_globals.rsn[ix].b) / 64.0; |
518 | 0 | kt_globals.rsn[ix].c_inc = (kt_globals.rsn_next[ix].c - kt_globals.rsn[ix].c) / 64.0; |
519 | 0 | } |
520 | 0 | } |
521 | | |
522 | | // nasal zero anti-resonator |
523 | 0 | setzeroabc(frame->Fhz[F_NZ], frame->Bhz[F_NZ], &(kt_globals.rsn[Rnz])); |
524 | 0 | setzeroabc(frame->Fhz_next[F_NZ], frame->Bhz_next[F_NZ], &(kt_globals.rsn_next[Rnz])); |
525 | 0 | kt_globals.rsn[F_NZ].a_inc = (kt_globals.rsn_next[F_NZ].a - kt_globals.rsn[F_NZ].a) / 64.0; |
526 | 0 | kt_globals.rsn[F_NZ].b_inc = (kt_globals.rsn_next[F_NZ].b - kt_globals.rsn[F_NZ].b) / 64.0; |
527 | 0 | kt_globals.rsn[F_NZ].c_inc = (kt_globals.rsn_next[F_NZ].c - kt_globals.rsn[F_NZ].c) / 64.0; |
528 | | |
529 | | // Set coefficients of parallel resonators, and amplitude of outputs |
530 | |
|
531 | 0 | for (ix = 0; ix <= 6; ix++) { |
532 | 0 | setabc(frame->Fhz[ix], frame->Bphz[ix], &(kt_globals.rsn[Rparallel+ix])); |
533 | 0 | kt_globals.rsn[Rparallel+ix].a *= amp_par[ix]; |
534 | 0 | } |
535 | | |
536 | | // output low-pass filter |
537 | |
|
538 | 0 | setabc((long)0.0, (long)(kt_globals.samrate/2), &(kt_globals.rsn[Rout])); |
539 | 0 | } |
540 | | |
541 | | /* |
542 | | function IMPULSIVE_SOURCE |
543 | | |
544 | | Generate a low pass filtered train of impulses as an approximation of |
545 | | a natural excitation waveform. Low-pass filter the differentiated impulse |
546 | | with a critically-damped second-order filter, time constant proportional |
547 | | to Kopen. |
548 | | */ |
549 | | |
550 | | static double impulsive_source(void) |
551 | 0 | { |
552 | 0 | static const double doublet[] = { 0.0, 13000000.0, -13000000.0 }; |
553 | 0 | static double vwave; |
554 | |
|
555 | 0 | if (kt_globals.nper < 3) |
556 | 0 | vwave = doublet[kt_globals.nper]; |
557 | 0 | else |
558 | 0 | vwave = 0.0; |
559 | |
|
560 | 0 | return resonator(&(kt_globals.rsn[RGL]), vwave); |
561 | 0 | } |
562 | | |
563 | | /* |
564 | | function NATURAL_SOURCE |
565 | | |
566 | | Vwave is the differentiated glottal flow waveform, there is a weak |
567 | | spectral zero around 800 Hz, magic constants a,b reset pitch synchronously. |
568 | | */ |
569 | | |
570 | | static double natural_source(void) |
571 | 0 | { |
572 | 0 | double lgtemp; |
573 | 0 | static double vwave; |
574 | |
|
575 | 0 | if (kt_globals.nper < kt_globals.nopen) { |
576 | 0 | kt_globals.pulse_shape_a -= kt_globals.pulse_shape_b; |
577 | 0 | vwave += kt_globals.pulse_shape_a; |
578 | 0 | lgtemp = vwave * 0.028; |
579 | |
|
580 | 0 | return lgtemp; |
581 | 0 | } |
582 | 0 | vwave = 0.0; |
583 | 0 | return 0.0; |
584 | 0 | } |
585 | | |
586 | | /* |
587 | | function PITCH_SYNC_PAR_RESET |
588 | | |
589 | | Reset selected parameters pitch-synchronously. |
590 | | |
591 | | |
592 | | Constant B0 controls shape of glottal pulse as a function |
593 | | of desired duration of open phase N0 |
594 | | (Note that N0 is specified in terms of 40,000 samples/sec of speech) |
595 | | |
596 | | Assume voicing waveform V(t) has form: k1 t**2 - k2 t**3 |
597 | | |
598 | | If the radiation characterivative, a temporal derivative |
599 | | is folded in, and we go from continuous time to discrete |
600 | | integers n: dV/dt = vwave[n] |
601 | | = sum over i=1,2,...,n of { a - (i * b) } |
602 | | = a n - b/2 n**2 |
603 | | |
604 | | where the constants a and b control the detailed shape |
605 | | and amplitude of the voicing waveform over the open |
606 | | potion of the voicing cycle "nopen". |
607 | | |
608 | | Let integral of dV/dt have no net dc flow --> a = (b * nopen) / 3 |
609 | | |
610 | | Let maximum of dUg(n)/dn be constant --> b = gain / (nopen * nopen) |
611 | | meaning as nopen gets bigger, V has bigger peak proportional to n |
612 | | |
613 | | Thus, to generate the table below for 40 <= nopen <= 263: |
614 | | |
615 | | B0[nopen - 40] = 1920000 / (nopen * nopen) |
616 | | */ |
617 | | |
618 | | static void pitch_synch_par_reset(klatt_frame_ptr frame) |
619 | 0 | { |
620 | 0 | long temp; |
621 | 0 | double temp1; |
622 | 0 | static long skew; |
623 | 0 | static const short B0[224] = { |
624 | 0 | 1200, 1142, 1088, 1038, 991, 948, 907, 869, 833, 799, 768, 738, 710, 683, 658, |
625 | 0 | 634, 612, 590, 570, 551, 533, 515, 499, 483, 468, 454, 440, 427, 415, 403, |
626 | 0 | 391, 380, 370, 360, 350, 341, 332, 323, 315, 307, 300, 292, 285, 278, 272, |
627 | 0 | 265, 259, 253, 247, 242, 237, 231, 226, 221, 217, 212, 208, 204, 199, 195, |
628 | 0 | 192, 188, 184, 180, 177, 174, 170, 167, 164, 161, 158, 155, 153, 150, 147, |
629 | 0 | 145, 142, 140, 137, 135, 133, 131, 128, 126, 124, 122, 120, 119, 117, 115, |
630 | 0 | 113, 111, 110, 108, 106, 105, 103, 102, 100, 99, 97, 96, 95, 93, 92, 91, 90, |
631 | 0 | 88, 87, 86, 85, 84, 83, 82, 80, 79, 78, 77, 76, 75, 75, 74, 73, 72, 71, |
632 | 0 | 70, 69, 68, 68, 67, 66, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 57, |
633 | 0 | 57, 56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 48, 48, |
634 | 0 | 47, 47, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 41, 41, 40, 40, |
635 | 0 | 39, 39, 38, 38, 38, 38, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 34, 34, 33, |
636 | 0 | 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, |
637 | 0 | 28, 28, 28, 28, 27, 27 |
638 | 0 | }; |
639 | |
|
640 | 0 | if (frame->F0hz10 > 0) { |
641 | | // T0 is 4* the number of samples in one pitch period |
642 | |
|
643 | 0 | kt_globals.T0 = (40 * kt_globals.samrate) / frame->F0hz10; |
644 | |
|
645 | 0 | kt_globals.amp_voice = DBtoLIN(frame->AVdb_tmp); |
646 | | |
647 | | // Duration of period before amplitude modulation |
648 | |
|
649 | 0 | kt_globals.nmod = kt_globals.T0; |
650 | 0 | if (frame->AVdb_tmp > 0) |
651 | 0 | kt_globals.nmod >>= 1; |
652 | | |
653 | | // Breathiness of voicing waveform |
654 | |
|
655 | 0 | kt_globals.amp_breth = DBtoLIN(frame->Aturb) * 0.1; |
656 | | |
657 | | // Set open phase of glottal period where 40 <= open phase <= 263 |
658 | |
|
659 | 0 | kt_globals.nopen = 4 * frame->Kopen; |
660 | |
|
661 | 0 | if ((kt_globals.glsource == IMPULSIVE) && (kt_globals.nopen > 263)) |
662 | 0 | kt_globals.nopen = 263; |
663 | |
|
664 | 0 | if (kt_globals.nopen >= (kt_globals.T0-1)) |
665 | 0 | kt_globals.nopen = kt_globals.T0 - 2; |
666 | |
|
667 | 0 | if (kt_globals.nopen < 40) { |
668 | | // F0 max = 1000 Hz |
669 | 0 | kt_globals.nopen = 40; |
670 | 0 | } |
671 | | |
672 | | // Reset a & b, which determine shape of "natural" glottal waveform |
673 | |
|
674 | 0 | kt_globals.pulse_shape_b = B0[kt_globals.nopen-40]; |
675 | 0 | kt_globals.pulse_shape_a = (kt_globals.pulse_shape_b * kt_globals.nopen) * 0.333; |
676 | | |
677 | | // Reset width of "impulsive" glottal pulse |
678 | |
|
679 | 0 | temp = kt_globals.samrate / kt_globals.nopen; |
680 | |
|
681 | 0 | setabc((long)0, temp, &(kt_globals.rsn[RGL])); |
682 | | |
683 | | // Make gain at F1 about constant |
684 | |
|
685 | 0 | temp1 = kt_globals.nopen *.00833; |
686 | 0 | kt_globals.rsn[RGL].a *= temp1 * temp1; |
687 | | |
688 | | // Truncate skewness so as not to exceed duration of closed phase |
689 | | // of glottal period. |
690 | |
|
691 | 0 | temp = kt_globals.T0 - kt_globals.nopen; |
692 | 0 | if (frame->Kskew > temp) |
693 | 0 | frame->Kskew = temp; |
694 | 0 | if (skew >= 0) |
695 | 0 | skew = frame->Kskew; |
696 | 0 | else |
697 | 0 | skew = -frame->Kskew; |
698 | | |
699 | | // Add skewness to closed portion of voicing period |
700 | 0 | kt_globals.T0 = kt_globals.T0 + skew; |
701 | 0 | skew = -skew; |
702 | 0 | } else { |
703 | 0 | kt_globals.T0 = 4; // Default for f0 undefined |
704 | 0 | kt_globals.amp_voice = 0.0; |
705 | 0 | kt_globals.nmod = kt_globals.T0; |
706 | 0 | kt_globals.amp_breth = 0.0; |
707 | 0 | kt_globals.pulse_shape_a = 0.0; |
708 | 0 | kt_globals.pulse_shape_b = 0.0; |
709 | 0 | } |
710 | | |
711 | | // Reset these pars pitch synchronously or at update rate if f0=0 |
712 | |
|
713 | 0 | if ((kt_globals.T0 != 4) || (kt_globals.ns == 0)) { |
714 | | // Set one-pole low-pass filter that tilts glottal source |
715 | |
|
716 | 0 | kt_globals.decay = (0.033 * frame->TLTdb); |
717 | |
|
718 | 0 | if (kt_globals.decay > 0.0) |
719 | 0 | kt_globals.onemd = 1.0 - kt_globals.decay; |
720 | 0 | else |
721 | 0 | kt_globals.onemd = 1.0; |
722 | 0 | } |
723 | 0 | } |
724 | | |
725 | | /* |
726 | | function SETABC |
727 | | |
728 | | Convert formant frequencies and bandwidth into resonator difference |
729 | | equation constants. |
730 | | */ |
731 | | |
732 | | static void setabc(long int f, long int bw, resonator_ptr rp) |
733 | 1 | { |
734 | 1 | double r; |
735 | 1 | double arg; |
736 | | |
737 | | // Let r = exp(-pi bw t) |
738 | 1 | arg = kt_globals.minus_pi_t * bw; |
739 | 1 | r = exp(arg); |
740 | | |
741 | | // Let c = -r**2 |
742 | 1 | rp->c = -(r * r); |
743 | | |
744 | | // Let b = r * 2*cos(2 pi f t) |
745 | 1 | arg = kt_globals.two_pi_t * f; |
746 | 1 | rp->b = r * cos(arg) * 2.0; |
747 | | |
748 | | // Let a = 1.0 - b - c |
749 | 1 | rp->a = 1.0 - rp->b - rp->c; |
750 | 1 | } |
751 | | |
752 | | /* |
753 | | function SETZEROABC |
754 | | |
755 | | Convert formant frequencies and bandwidth into anti-resonator difference |
756 | | equation constants. |
757 | | */ |
758 | | |
759 | | static void setzeroabc(long int f, long int bw, resonator_ptr rp) |
760 | 0 | { |
761 | 0 | double r; |
762 | 0 | double arg; |
763 | |
|
764 | 0 | f = -f; |
765 | | |
766 | | // First compute ordinary resonator coefficients |
767 | | // Let r = exp(-pi bw t) |
768 | 0 | arg = kt_globals.minus_pi_t * bw; |
769 | 0 | r = exp(arg); |
770 | | |
771 | | // Let c = -r**2 |
772 | 0 | rp->c = -(r * r); |
773 | | |
774 | | // Let b = r * 2*cos(2 pi f t) |
775 | 0 | arg = kt_globals.two_pi_t * f; |
776 | 0 | rp->b = r * cos(arg) * 2.; |
777 | | |
778 | | // Let a = 1.0 - b - c |
779 | 0 | rp->a = 1.0 - rp->b - rp->c; |
780 | | |
781 | | // Now convert to antiresonator coefficients (a'=1/a, b'=b/a, c'=c/a) |
782 | | |
783 | | // If f == 0 then rp->a gets set to 0 which makes a'=1/a set a', b' and c' to |
784 | | // INF, causing an audible sound spike when triggered (e.g. apiration with the |
785 | | // nasal register set to f=0, bw=0). |
786 | 0 | if (rp->a != 0) { |
787 | | // Now convert to antiresonator coefficients (a'=1/a, b'=b/a, c'=c/a) |
788 | 0 | rp->a = 1.0 / rp->a; |
789 | 0 | rp->c *= -rp->a; |
790 | 0 | rp->b *= -rp->a; |
791 | 0 | } |
792 | 0 | } |
793 | | |
794 | | /* |
795 | | function GEN_NOISE |
796 | | |
797 | | Random number generator (return a number between -8191 and +8191) |
798 | | Noise spectrum is tilted down by soft low-pass filter having a pole near |
799 | | the origin in the z-plane, i.e. output = input + (0.75 * lastoutput) |
800 | | */ |
801 | | |
802 | | static double gen_noise(double noise) |
803 | 0 | { |
804 | 0 | long temp; |
805 | 0 | static double nlast; |
806 | |
|
807 | 0 | temp = (long)getrandom(-8191, 8191); |
808 | 0 | kt_globals.nrand = (long)temp; |
809 | |
|
810 | 0 | noise = kt_globals.nrand + (0.75 * nlast); |
811 | 0 | nlast = noise; |
812 | |
|
813 | 0 | return noise; |
814 | 0 | } |
815 | | |
816 | | /* |
817 | | function DBTOLIN |
818 | | |
819 | | Convert from decibels to a linear scale factor |
820 | | |
821 | | |
822 | | Conversion table, db to linear, 87 dB --> 32767 |
823 | | 86 dB --> 29491 (1 dB down = 0.5**1/6) |
824 | | ... |
825 | | 81 dB --> 16384 (6 dB down = 0.5) |
826 | | ... |
827 | | 0 dB --> 0 |
828 | | |
829 | | The just noticeable difference for a change in intensity of a vowel |
830 | | is approximately 1 dB. Thus all amplitudes are quantized to 1 dB |
831 | | steps. |
832 | | */ |
833 | | |
834 | | static double DBtoLIN(long dB) |
835 | 0 | { |
836 | 0 | static const short amptable[88] = { |
837 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, |
838 | 0 | 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 25, 28, 32, |
839 | 0 | 35, 40, 45, 51, 57, 64, 71, 80, 90, 101, 114, 128, |
840 | 0 | 142, 159, 179, 202, 227, 256, 284, 318, 359, 405, |
841 | 0 | 455, 512, 568, 638, 719, 881, 911, 1024, 1137, 1276, |
842 | 0 | 1438, 1622, 1823, 2048, 2273, 2552, 2875, 3244, 3645, |
843 | 0 | 4096, 4547, 5104, 5751, 6488, 7291, 8192, 9093, 10207, |
844 | 0 | 11502, 12976, 14582, 16384, 18350, 20644, 23429, |
845 | 0 | 26214, 29491, 32767 |
846 | 0 | }; |
847 | |
|
848 | 0 | if ((dB < 0) || (dB > 87)) |
849 | 0 | return 0; |
850 | | |
851 | 0 | return (double)(amptable[dB]) * 0.001; |
852 | 0 | } |
853 | | |
854 | | static klatt_peaks_t peaks[N_PEAKS]; |
855 | | static int end_wave; |
856 | | static int klattp[N_KLATTP]; |
857 | | static double klattp1[N_KLATTP]; |
858 | | static double klattp_inc[N_KLATTP]; |
859 | | |
860 | | int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice) |
861 | 0 | { |
862 | | #if USE_SPEECHPLAYER |
863 | | if(wvoice->klattv[0] == 6) |
864 | | return Wavegen_KlattSP(wdata, wvoice, length, resume, fr1, fr2); |
865 | | #endif |
866 | |
|
867 | 0 | if (resume == 0) |
868 | 0 | SetSynth_Klatt(length, fr1, fr2, wvoice, 1); |
869 | |
|
870 | 0 | int pk; |
871 | 0 | int x; |
872 | 0 | int ix; |
873 | 0 | int fade; |
874 | |
|
875 | 0 | if (resume == 0) |
876 | 0 | sample_count = 0; |
877 | |
|
878 | 0 | while (sample_count < nsamples) { |
879 | 0 | kt_frame.F0hz10 = (wdata->pitch * 10) / 4096; |
880 | | |
881 | | // formants F6,F7,F8 are fixed values for cascade resonators, set in KlattInit() |
882 | | // but F6 is used for parallel resonator |
883 | | // F0 is used for the nasal zero |
884 | 0 | for (ix = 0; ix < 6; ix++) { |
885 | 0 | kt_frame.Fhz[ix] = peaks[ix].freq; |
886 | 0 | if (ix < 4) |
887 | 0 | kt_frame.Bhz[ix] = peaks[ix].bw; |
888 | 0 | } |
889 | 0 | for (ix = 1; ix < 7; ix++) |
890 | 0 | kt_frame.Ap[ix] = peaks[ix].ap; |
891 | |
|
892 | 0 | kt_frame.AVdb = klattp[KLATT_AV]; |
893 | 0 | kt_frame.AVpdb = klattp[KLATT_AVp]; |
894 | 0 | kt_frame.AF = klattp[KLATT_Fric]; |
895 | 0 | kt_frame.AB = klattp[KLATT_FricBP]; |
896 | 0 | kt_frame.ASP = klattp[KLATT_Aspr]; |
897 | 0 | kt_frame.Aturb = klattp[KLATT_Turb]; |
898 | 0 | kt_frame.Kskew = klattp[KLATT_Skew]; |
899 | 0 | kt_frame.TLTdb = klattp[KLATT_Tilt]; |
900 | 0 | kt_frame.Kopen = klattp[KLATT_Kopen]; |
901 | | |
902 | | // advance formants |
903 | 0 | for (pk = 0; pk < N_PEAKS; pk++) { |
904 | 0 | peaks[pk].freq1 += peaks[pk].freq_inc; |
905 | 0 | peaks[pk].freq = (int)peaks[pk].freq1; |
906 | 0 | peaks[pk].bw1 += peaks[pk].bw_inc; |
907 | 0 | peaks[pk].bw = (int)peaks[pk].bw1; |
908 | 0 | peaks[pk].bp1 += peaks[pk].bp_inc; |
909 | 0 | peaks[pk].bp = (int)peaks[pk].bp1; |
910 | 0 | peaks[pk].ap1 += peaks[pk].ap_inc; |
911 | 0 | peaks[pk].ap = (int)peaks[pk].ap1; |
912 | 0 | } |
913 | | |
914 | | // advance other parameters |
915 | 0 | for (ix = 0; ix < N_KLATTP; ix++) { |
916 | 0 | klattp1[ix] += klattp_inc[ix]; |
917 | 0 | klattp[ix] = (int)klattp1[ix]; |
918 | 0 | } |
919 | |
|
920 | 0 | for (ix = 0; ix <= 6; ix++) { |
921 | 0 | kt_frame.Fhz_next[ix] = peaks[ix].freq; |
922 | 0 | if (ix < 4) |
923 | 0 | kt_frame.Bhz_next[ix] = peaks[ix].bw; |
924 | 0 | } |
925 | | |
926 | | // advance the pitch |
927 | 0 | wdata->pitch_ix += wdata->pitch_inc; |
928 | 0 | if ((ix = wdata->pitch_ix>>8) > 127) ix = 127; |
929 | 0 | x = wdata->pitch_env[ix] * wdata->pitch_range; |
930 | 0 | wdata->pitch = (x>>8) + wdata->pitch_base; |
931 | |
|
932 | 0 | kt_globals.nspfr = (nsamples - sample_count); |
933 | 0 | if (kt_globals.nspfr > STEPSIZE) |
934 | 0 | kt_globals.nspfr = STEPSIZE; |
935 | |
|
936 | 0 | frame_init(&kt_frame); // get parameters for next frame of speech |
937 | |
|
938 | 0 | if (parwave(&kt_frame, wdata) == 1) |
939 | 0 | return 1; // output buffer is full |
940 | 0 | } |
941 | | |
942 | 0 | if (end_wave > 0) { |
943 | 0 | fade = 64; // not followed by formant synthesis |
944 | | |
945 | | // fade out to avoid a click |
946 | 0 | kt_globals.fadeout = fade; |
947 | 0 | end_wave = 0; |
948 | 0 | sample_count -= fade; |
949 | 0 | kt_globals.nspfr = fade; |
950 | 0 | if (parwave(&kt_frame, wdata) == 1) |
951 | 0 | return 1; // output buffer is full |
952 | 0 | } |
953 | | |
954 | 0 | return 0; |
955 | 0 | } |
956 | | |
957 | | static void SetSynth_Klatt(int length, frame_t *fr1, frame_t *fr2, voice_t *wvoice, int control) |
958 | 0 | { |
959 | 0 | int ix; |
960 | 0 | double next; |
961 | 0 | int qix; |
962 | 0 | int cmd; |
963 | 0 | frame_t *fr3; |
964 | 0 | static frame_t prev_fr; |
965 | |
|
966 | 0 | if (wvoice != NULL) { |
967 | 0 | if ((wvoice->klattv[0] > 0) && (wvoice->klattv[0] <= 5 )) { |
968 | 0 | kt_globals.glsource = wvoice->klattv[0]; |
969 | 0 | kt_globals.scale_wav = scale_wav_tab[kt_globals.glsource]; |
970 | 0 | } |
971 | 0 | kt_globals.f0_flutter = wvoice->flutter/32; |
972 | 0 | } |
973 | |
|
974 | 0 | end_wave = 0; |
975 | 0 | if (control & 2) |
976 | 0 | end_wave = 1; // fadeout at the end |
977 | 0 | if (control & 1) { |
978 | 0 | end_wave = 1; |
979 | 0 | for (qix = wcmdq_head+1;; qix++) { |
980 | 0 | if (qix >= N_WCMDQ) qix = 0; |
981 | 0 | if (qix == wcmdq_tail) break; |
982 | | |
983 | 0 | cmd = wcmdq[qix][0]; |
984 | 0 | if (cmd == WCMD_KLATT) { |
985 | 0 | end_wave = 0; // next wave generation is from another spectrum |
986 | |
|
987 | 0 | fr3 = (frame_t *)wcmdq[qix][2]; |
988 | 0 | for (ix = 1; ix < 6; ix++) { |
989 | 0 | if (fr3->ffreq[ix] != fr2->ffreq[ix]) { |
990 | | // there is a discontinuity in formants |
991 | 0 | end_wave = 2; |
992 | 0 | break; |
993 | 0 | } |
994 | 0 | } |
995 | 0 | break; |
996 | 0 | } |
997 | 0 | if ((cmd == WCMD_WAVE) || (cmd == WCMD_PAUSE)) |
998 | 0 | break; // next is not from spectrum, so continue until end of wave cycle |
999 | 0 | } |
1000 | |
|
1001 | 0 | for (ix = 1; ix < 6; ix++) { |
1002 | 0 | if (prev_fr.ffreq[ix] != fr1->ffreq[ix]) { |
1003 | | // Discontinuity in formants. |
1004 | | // end_wave was set in SetSynth_Klatt() to fade out the previous frame |
1005 | 0 | KlattReset(0); |
1006 | 0 | break; |
1007 | 0 | } |
1008 | 0 | } |
1009 | 0 | memcpy(&prev_fr, fr2, sizeof(prev_fr)); |
1010 | 0 | } |
1011 | |
|
1012 | 0 | for (ix = 0; ix < N_KLATTP; ix++) { |
1013 | 0 | if ((ix >= 5) || ((fr1->frflags & FRFLAG_KLATT) == 0)) { |
1014 | 0 | klattp1[ix] = klattp[ix] = 0; |
1015 | 0 | klattp_inc[ix] = 0; |
1016 | 0 | } else { |
1017 | 0 | klattp1[ix] = klattp[ix] = fr1->klattp[ix]; |
1018 | 0 | klattp_inc[ix] = (double)((fr2->klattp[ix] - klattp[ix]) * STEPSIZE)/length; |
1019 | 0 | } |
1020 | 0 | } |
1021 | |
|
1022 | 0 | nsamples = length; |
1023 | |
|
1024 | 0 | for (ix = 1; ix < 6; ix++) { |
1025 | 0 | peaks[ix].freq1 = (fr1->ffreq[ix] * wvoice->freq[ix] / 256.0) + wvoice->freqadd[ix]; |
1026 | 0 | peaks[ix].freq = (int)peaks[ix].freq1; |
1027 | 0 | next = (fr2->ffreq[ix] * wvoice->freq[ix] / 256.0) + wvoice->freqadd[ix]; |
1028 | 0 | peaks[ix].freq_inc = ((next - peaks[ix].freq1) * STEPSIZE) / length; |
1029 | |
|
1030 | 0 | if (ix < 4) { |
1031 | | // klatt bandwidth for f1, f2, f3 (others are fixed) |
1032 | 0 | peaks[ix].bw1 = fr1->bw[ix] * 2 * (wvoice->width[ix] / 256.0); |
1033 | 0 | peaks[ix].bw = (int)peaks[ix].bw1; |
1034 | 0 | next = fr2->bw[ix] * 2; |
1035 | 0 | peaks[ix].bw_inc = ((next - peaks[ix].bw1) * STEPSIZE) / length; |
1036 | 0 | } |
1037 | 0 | } |
1038 | | |
1039 | | // nasal zero frequency |
1040 | 0 | peaks[0].freq1 = fr1->klattp[KLATT_FNZ] * 2; |
1041 | 0 | if (peaks[0].freq1 == 0) |
1042 | 0 | peaks[0].freq1 = kt_frame.Fhz[F_NP]; // if no nasal zero, set it to same freq as nasal pole |
1043 | |
|
1044 | 0 | peaks[0].freq = (int)peaks[0].freq1; |
1045 | 0 | next = fr2->klattp[KLATT_FNZ] * 2; |
1046 | 0 | if (next == 0) |
1047 | 0 | next = kt_frame.Fhz[F_NP]; |
1048 | |
|
1049 | 0 | peaks[0].freq_inc = ((next - peaks[0].freq1) * STEPSIZE) / length; |
1050 | |
|
1051 | 0 | peaks[0].bw1 = 89; |
1052 | 0 | peaks[0].bw = 89; |
1053 | 0 | peaks[0].bw_inc = 0; |
1054 | |
|
1055 | 0 | if (fr1->frflags & FRFLAG_KLATT) { |
1056 | | // the frame contains additional parameters for parallel resonators |
1057 | 0 | for (ix = 1; ix < 7; ix++) { |
1058 | 0 | peaks[ix].bp1 = fr1->klatt_bp[ix] * 4; // parallel bandwidth |
1059 | 0 | peaks[ix].bp = (int)peaks[ix].bp1; |
1060 | 0 | next = fr2->klatt_bp[ix] * 4; |
1061 | 0 | peaks[ix].bp_inc = ((next - peaks[ix].bp1) * STEPSIZE) / length; |
1062 | |
|
1063 | 0 | peaks[ix].ap1 = fr1->klatt_ap[ix]; // parallal amplitude |
1064 | 0 | peaks[ix].ap = (int)peaks[ix].ap1; |
1065 | 0 | next = fr2->klatt_ap[ix]; |
1066 | 0 | peaks[ix].ap_inc = ((next - peaks[ix].ap1) * STEPSIZE) / length; |
1067 | 0 | } |
1068 | 0 | } |
1069 | 0 | } |
1070 | | |
1071 | | void KlattInit(void) |
1072 | 1 | { |
1073 | | |
1074 | 1 | static const short formant_hz[10] = { 280, 688, 1064, 2806, 3260, 3700, 6500, 7000, 8000, 280 }; |
1075 | 1 | static const short bandwidth[10] = { 89, 160, 70, 160, 200, 200, 500, 500, 500, 89 }; |
1076 | 1 | static const short parallel_amp[10] = { 0, 59, 59, 59, 59, 59, 59, 0, 0, 0 }; |
1077 | 1 | static const short parallel_bw[10] = { 59, 59, 89, 149, 200, 200, 500, 0, 0, 0 }; |
1078 | | |
1079 | 1 | int ix; |
1080 | | |
1081 | | #if USE_SPEECHPLAYER |
1082 | | KlattInitSP(); |
1083 | | #endif |
1084 | | |
1085 | 1 | sample_count = 0; |
1086 | | |
1087 | 1 | kt_globals.synthesis_model = CASCADE_PARALLEL; |
1088 | 1 | kt_globals.samrate = 22050; |
1089 | | |
1090 | 1 | kt_globals.glsource = IMPULSIVE; |
1091 | 1 | kt_globals.scale_wav = scale_wav_tab[kt_globals.glsource]; |
1092 | 1 | kt_globals.natural_samples = natural_samples; |
1093 | 1 | kt_globals.num_samples = NUMBER_OF_SAMPLES; |
1094 | 1 | kt_globals.sample_factor = 3.0; |
1095 | 1 | kt_globals.nspfr = (kt_globals.samrate * 10) / 1000; |
1096 | 1 | kt_globals.outsl = 0; |
1097 | 1 | kt_globals.f0_flutter = 20; |
1098 | | |
1099 | 1 | KlattReset(2); |
1100 | | |
1101 | | // set default values for frame parameters |
1102 | 11 | for (ix = 0; ix <= 9; ix++) { |
1103 | 10 | kt_frame.Fhz[ix] = formant_hz[ix]; |
1104 | 10 | kt_frame.Bhz[ix] = bandwidth[ix]; |
1105 | 10 | kt_frame.Ap[ix] = parallel_amp[ix]; |
1106 | 10 | kt_frame.Bphz[ix] = parallel_bw[ix]; |
1107 | 10 | } |
1108 | 1 | kt_frame.Bhz_next[F_NZ] = bandwidth[F_NZ]; |
1109 | | |
1110 | 1 | kt_frame.F0hz10 = 1000; |
1111 | 1 | kt_frame.AVdb = 59; |
1112 | 1 | kt_frame.ASP = 0; |
1113 | 1 | kt_frame.Kopen = 40; |
1114 | 1 | kt_frame.Aturb = 0; |
1115 | 1 | kt_frame.TLTdb = 0; |
1116 | 1 | kt_frame.AF = 50; |
1117 | 1 | kt_frame.Kskew = 0; |
1118 | 1 | kt_frame.AB = 0; |
1119 | 1 | kt_frame.AVpdb = 0; |
1120 | 1 | kt_frame.Gain0 = 62; |
1121 | 1 | } |