1
2 import re
3 import math
4
5 import numpy
6
7 from gmisclib import die
8 from gmisclib import cache as CC
9 from gmisclib import gpkmisc
10 from gmisclib import g_encode
11 from gmisclib import erb_scale
12 from gmisclib import Numeric_gpk as NG
13 from gpk_voicing import voice_misc
14 from gpk_voicing import cached_ps as PSC
15 from gpk_voicing import percep_spec as PS
16 try:
17 from gpk_voicing import pseudo_dur as PD
18 except ImportError:
19 PD = None
20
21 pylab = None
22
23
24 DB = math.sqrt(0.5)
25 NSV = 0.75
26
27 C = 10.0
28
29
33
34
35
36
37
38
39
40
41
42
54
55
56
57
58
59
68
69
71 if not n > 0:
72 raise SillyWidthException, n
73 tmp = numpy.zeros((n,), numpy.float)
74 tmp[i] = 1.0
75 neg = win(n)
76 nneg = neg/neg.sum()
77 return (tmp - nneg, nneg)
78
79
81 assert numpy.greater(w, 0.0).all()
82 p = w/numpy.sum(w)
83 return -numpy.sum(p*numpy.log(p))
84
86 """Returns something the length of the signal, by zero padding."""
87 if signal.shape[0] > kernel.shape[0]:
88 return numpy.convolve(signal, kernel, 1)
89 m = 1+kernel.shape[0]-signal.shape[0]
90 tmp = numpy.concatenate((signal, numpy.zeros((m,), numpy.float)))
91 die.info('Narrow signal: padding from %d to %d' % (signal.shape[0], m))
92 return numpy.convolve(tmp, kernel, 1)[:signal.shape[0]]
93
94
95 -def vowel(width, ectrs, neural, neural_now, Nsv,
96 formant_low=None, formant_high=None):
97 VFAC = 0.75
98 cs = win(width)
99 css = cs.sum()
100
101 nns = convolve(neural_now, cs/css)
102 norm = numpy.hypot(nns, Nsv * numpy.square(nns).sum()/numpy.sum(nns))
103
104
105
106
107 o = []
108 descr = []
109 for (i, e) in enumerate(ectrs):
110 if e['type']=='band' and formant_low < e['erb'] < formant_high:
111 tmp = (VFAC/css)*convolve(neural[i], cs)/norm
112
113
114 dtmp = e.copy()
115 dtmp['type'] = 'vowel'
116 dtmp['width'] = width
117 dtmp['Kentropy'] = entropy(cs)
118 dtmp['Fentropy'] = 0.0
119 dtmp['t_symmetry'] = 1
120 dtmp['a_scaling'] = 1
121
122
123 o.append(tmp)
124 dtmp['id'] = '%s:%d:%.1f' % (dtmp['type'], width, e['erb'])
125 descr.append( dtmp )
126
127 return (o, descr)
128
129
131 """This is inspired (but only loosely) by
132 "Robust Entropy-based Endpoint Detection for Speech Recognition
133 in Noisy Environments." by Jia-lin Shen and Jeih-weih Hung
134 and Lin-shan Lee, http://www.ee.columbia.edu/~dpwe/papers/ShenHL98-endpoint.pdf
135 International Conference on Spoken Language Processing, 1998.
136 """
137 assert Nsv > 0.0
138 SEF = 1.0
139 EPS = 1e-6
140 cs = win(width)
141 assert numpy.greater(cs, 0.0).all()
142
143 nnt = []
144 nns = numpy.zeros(neural.shape[1], numpy.float)
145 for (i, e) in enumerate(ectrs):
146 if e['type']=='band':
147 assert numpy.greater_equal(neural[i], 0.0).all()
148 tmp = convolve(neural[i], cs)
149 nnt.append(tmp)
150 numpy.add(nns, tmp, nns)
151
152 np = len(nnt)
153 ent_sum = numpy.zeros(neural.shape[1], numpy.float) + math.log(4.0)
154 for tmp in nnt:
155 numpy.divide(tmp, nns, tmp)
156 numpy.add(tmp, EPS*nns, tmp)
157 etmp = tmp*numpy.log(tmp)
158 numpy.add(ent_sum, etmp, ent_sum)
159 numpy.multiply(ent_sum, -SEF, ent_sum)
160
161
162 assert np > 0
163 dtmp = {'type': 'spectral_entropy',
164 'width': width,
165 'Kentropy': entropy(cs),
166 'Fentropy': math.log(np),
167 'id': 'Sentropy:%d' % width,
168 't_symmetry': 1,
169 'a_scaling': 0
170 }
171 return ([ent_sum], [dtmp])
172
173
175 assert Nsv > 0.0
176 SEF = 1.0
177 EPS = 1e-6
178 cs = win(width)
179
180 nnt = []
181 nns = numpy.zeros(neural.shape[1], numpy.float)
182 for (i, e) in enumerate(ectrs):
183 if e['type']=='band':
184 assert numpy.greater_equal(neural[i], 0.0).all()
185 nnt.append(neural[i])
186 numpy.add(nns, neural[i], nns)
187
188 np = len(nnt)
189 nns = convolve(nns, cs)
190 ent_sum = numpy.zeros(neural.shape[1], numpy.float) + math.log(3.0*np)
191 for tmp in nnt:
192 numpy.divide(tmp, nns, tmp)
193 numpy.add(tmp, EPS*nns, tmp)
194 etmp = convolve(tmp*numpy.log(tmp), cs)
195 numpy.add(ent_sum, etmp, ent_sum)
196 numpy.multiply(ent_sum, -SEF, ent_sum)
197
198
199 assert np > 0
200 dtmp = {'type': 'space_time_entropy',
201 'width': width,
202 'Kentropy': entropy(cs),
203 'Fentropy': math.log(np),
204 'id': 'STentropy:%d' % width,
205 't_symmetry': 1,
206 'a_scaling': 0
207 }
208 return ([ent_sum], [dtmp])
209
210
211
212 -def haspitch(width, ectrs, neural, neural_now, Nsv):
213 HFAC = {'high': 20.0, 'all': 10.0, 1: 10.0}
214 cs = win(width)
215 css = cs.sum()
216 nns = convolve(neural_now, cs)/cs.sum()
217 norm = numpy.hypot(nns, Nsv * numpy.square(nns).sum()/nns.sum())
218 o = []
219 descr = []
220 for (i, e) in enumerate(ectrs):
221 if e['type'].startswith('haspitch'):
222 tmp = (HFAC[e['variant']]/css)*convolve(neural[i], cs)/norm
223
224
225
226
227
228 o.append(tmp)
229
230 dtmp = e.copy()
231 dtmp['id'] = '%s:%d' % (dtmp['id'], width)
232 dtmp['Kentropy'] = entropy(cs)
233 assert 'Fentropy' in dtmp
234 dtmp['width'] = width
235 dtmp['t_symmetry'] = 1
236 dtmp['a_scaling'] = 1
237 descr.append( dtmp )
238
239 return (o, descr)
240
241
242
243 -def peakiness(width, ectrs, neural, neural_now, Nsv):
244 HFAC = 10
245 cs = win(width)
246 css = cs.sum()
247 nns = convolve(neural_now, cs)/css
248 norm = numpy.hypot(nns, Nsv * numpy.square(nns).sum()/numpy.sum(nns))
249 o = []
250 descr = []
251 for (i, e) in enumerate(ectrs):
252 if e['type'].startswith('peakalign'):
253 tmp = (HFAC/css)*convolve(neural[i], cs)/norm
254
255
256
257
258
259 o.append(tmp)
260
261 dtmp = e.copy()
262 dtmp['id'] = '%s:%d' % (dtmp['id'], width)
263 dtmp['Kentropy'] = entropy(cs)
264 assert 'Fentropy' in dtmp
265 dtmp['width'] = width
266 dtmp['t_symmetry'] = 1
267 dtmp['a_scaling'] = 1
268 descr.append( dtmp )
269
270 return (o, descr)
271
272
273
274
275
276 -def pdur(ectrs, neural, neural_now, Dt, Cfac=1.0):
277 if PD is None:
278 return ([], [])
279 TYPICAL = 0.100
280 c = C * Cfac
281 norm2 = (numpy.square(neural_now).sum()/numpy.sum(neural_now))**2
282 no = neural_now.shape[0]
283 out = numpy.zeros((no,), numpy.float)
284 for t in range(no):
285 plm, pcm = PD.pdur_guts(neural.transpose(), t, -1, Dt, c/norm2)
286 plp, pcp = PD.pdur_guts(neural.transpose(), t, 1, Dt, c/norm2)
287 out[t] = plm + plp
288 if pylab:
289 pylab.figure()
290 pylab.title('pdur')
291 pylab.plot(out)
292 pylab.show()
293 nbands = 0
294 for e in ectrs:
295 if e['type']=='band':
296 nbands += 1
297 assert nbands > 0
298 typical = NG.N_median(out)
299 descr = [ {'id': 'pseudoduration:%f' % c,
300 'Kentropy': NG.N_median(numpy.log(out)),
301 'Fentropy': math.log(nbands),
302 'width': typical,
303 't_symmetry': 1, 'a_scaling': 0
304 } ]
305 return ([numpy.log(out/TYPICAL)], descr)
306
307
308 -def roughness(width, ectrs, neural, neural_now, Nsv):
309 HFAC = {1: 4.0}
310 cs = win(width)
311 css = cs.sum()
312 nns = convolve(neural_now, cs)/css
313 norm = numpy.hypot(nns, Nsv * numpy.square(nns).sum()/nns.sum())
314 o = []
315 descr = []
316 for (i, e) in enumerate(ectrs):
317 if e['type'].startswith('roughness'):
318 tmp = (HFAC[e['variant']]/css)*convolve(neural[i], cs)/norm
319
320
321
322
323
324 o.append(tmp)
325
326 dtmp = e.copy()
327 dtmp['id'] = '%s:%d' % (dtmp['id'], width)
328 dtmp['Kentropy'] = entropy(cs)
329 assert 'Fentropy' in dtmp
330 dtmp['width'] = width
331 dtmp['t_symmetry'] = 1
332 dtmp['a_scaling'] = 1
333 descr.append( dtmp )
334
335 return (o, descr)
336
337 dissonance = roughness
338
339
340
341 -def vowel_edge(width, ectrs, neural, neural_now, Nsv, do_abs=False,
342 formant_low=None, formant_high=None):
343 VEfac = 0.7
344 ce, cs = edgepair_win(width)
345 css = cs.sum()
346 nns = convolve(neural_now, cs)/css
347 norm = numpy.hypot(nns, Nsv * numpy.square(nns).sum()/nns.sum())
348
349
350 o = []
351 descr = []
352 for (i, e) in enumerate(ectrs):
353 if e['type']=='band' and formant_low < e['erb'] < formant_high:
354 tmp = VEfac * convolve(neural[i], ce)/norm
355
356
357 dtmp = e.copy()
358 dtmp['width'] = 2*width
359 dtmp['Kentropy'] = entropy(cs)
360 dtmp['Fentropy'] = 0.0
361 dtmp['t_symmetry'] = -1
362 dtmp['a_scaling'] = 1
363 if do_abs:
364 o.append( numpy.absolute(tmp) )
365 dtmp['type'] = 'vowel |edge|'
366 else:
367 o.append( tmp )
368 dtmp['type'] = 'vowel edge'
369 dtmp['id'] = '%s:%d:%.1f' % (dtmp['type'], width, e['erb'])
370 descr.append( dtmp )
371
372 return (o, descr)
373
374
375 -def fricative(width, ectrs, neural, neural_now, Nsv):
376 CSSE = 0.7
377 N = neural[0].shape[0]
378 cs = win(width)
379 css = cs.sum()
380 nns = convolve(neural_now, cs)/css
381 norm = numpy.hypot(nns, Nsv * numpy.square(nns).sum()/nns.sum())
382
383
384 o = []
385 descr = []
386 for (rs, re, fac) in [(70.0, 700.0, 0.6), (700.0, 1500.0, 0.6),
387 (1500.0, 2500.0, 0.9), (2500.0, 4000.0, 1.1),
388 (4000.0, 6000.0, 1.3)
389 ]:
390 tsum = numpy.zeros((N,), numpy.float)
391 elow = erb_scale.f_to_erb(rs)
392 ehigh = erb_scale.f_to_erb(re)
393 included = []
394 nq = 0
395 for (i, e) in enumerate(ectrs):
396 if e['type']=='band' and elow < e['erb'] < ehigh:
397 numpy.add(tsum, neural[i], tsum)
398 included.append(e)
399 nq += 1
400 tmp = (fac/css**CSSE) * convolve(tsum, cs)/(norm*nq)
401
402
403 included.sort(key=lambda x:x['erb'])
404 dtmp = {'width': width,
405 'type': 'fricative', 't_symmetry': 1,
406 'erbs': [e['erb'] for e in included],
407 'fcs' : [e['fc'] for e in included],
408 'Kentropy': entropy(cs),
409 'Fentropy': -math.log(len(included)),
410 'a_scaling': 1
411 }
412 o.append(tmp)
413 dtmp['id'] = '%s:%d:%.1f-%.1f' % (dtmp['type'], width, included[0]['erb'], included[-1]['erb'])
414 descr.append( dtmp )
415
416 return (o, descr)
417
418
420 WIDTH = 20
421 N = neural[0].shape[0]
422 ce, cs = edgepair_win(WIDTH)
423 css = cs.sum()
424 nns = convolve(neural_now, cs)
425 norm = numpy.hypot(nns, Nsv * numpy.square(nns).sum()/nns.sum())
426
427
428 o = []
429 descr = []
430 included = []
431 for (rs, re, fac) in [ (70.0, 600.0, 0.22), (600.0, 2000.0, 0.25),
432 (2000.0, 3000.0, 0.32), (3000.0, 6000.0, 0.35) ]:
433 tsum = numpy.zeros((N,), numpy.float)
434 elow = erb_scale.f_to_erb(rs)
435 ehigh = erb_scale.f_to_erb(re)
436 for (i, e) in enumerate(ectrs):
437 if e['type']=='band' and elow < e['erb'] < ehigh:
438 numpy.add(tsum, neural[i], tsum)
439 included.append(e)
440 tmp = (fac/css) * convolve(tsum, ce)/norm
441
442
443 included.sort(key=lambda x:x['erb'])
444 dtmp = {'type': 'prominence', 'width': WIDTH,
445 'erbs': [e['erb'] for e in included],
446 'fcs' : [e['fc'] for e in included],
447 'Kentropy': entropy(cs),
448 'Fentropy': -math.log(len(included)),
449 'id': 'prominence:%d:%.1f-%.1f' % (WIDTH, included[0]['erb'], included[-1]['erb']),
450 't_symmetry': 1, 'a_scaling': 1
451 }
452 o.append( tmp )
453 descr.append( dtmp )
454
455 return (o, descr)
456
457
458
459 -def fricative_edge(width, ectrs, neural, neural_now, Nsv, do_abs=False, version=1):
460 CSSE = 1.6
461 N = neural[0].shape[0]
462 ce, cs = edgepair_win(width)
463 css = cs.sum()
464 nns = convolve(neural_now, cs)/css
465 norm = numpy.hypot(nns, Nsv * numpy.square(nns).sum()/nns.sum())
466
467
468 o = []
469 descr = []
470 if version==1:
471 flist = [(70.0, 700.0, 7), (700.0, 1500.0, 7),
472 (1500.0, 2500.0, 7), (2500.0, 4000.0, 7),
473 (4000.0, 6000.0, 7)]
474 elif version==2:
475 flist = [(100.0, 300.0, 7), (300.0, 550.0, 7), (550.0, 900.0, 7),
476 (900.0, 1400.0, 7),
477 (1400.0, 2800.0, 7), (2800.0, 5000.0, 7)]
478 for (rs, re, fac) in flist:
479 tsum = numpy.zeros((N,), numpy.float)
480 elow = erb_scale.f_to_erb(rs)
481 ehigh = erb_scale.f_to_erb(re)
482 included = []
483 nq = 0
484 for (i, e) in enumerate(ectrs):
485 if e['type']=='band' and elow < e['erb'] < ehigh:
486 numpy.add(tsum, neural[i], tsum)
487 included.append(e)
488 nq += 1
489 tmp = (fac/css**CSSE) * convolve(tsum, ce)/(norm*nq)
490
491
492 included.sort(key=lambda x:x['erb'])
493 dtmp = {'width': width,
494 'erbs': [e['erb'] for e in included],
495 'fcs' : [e['fc'] for e in included],
496 'Kentropy': entropy(cs),
497 'Fentropy': -math.log(len(included)),
498 'a_scaling': 1
499 }
500 if do_abs:
501 o.append( numpy.absolute(tmp) )
502 dtmp['type'] = 'fricative |edge|'
503 dtmp['t_symmetry'] = 1
504 else:
505 o.append( tmp )
506 dtmp['type'] = 'fricative edge'
507 dtmp['t_symmetry'] = -1
508 dtmp['id'] = '%s:%d:%.1f-%.1f' % (dtmp['type'], width, included[0]['erb'], included[-1]['erb'])
509 descr.append( dtmp )
510
511 return (o, descr)
512
513
514 -def burst(ectrs, neural, neural_now, Nsv):
515 BKGWIDTH = 8
516 WIDTH = 1
517 N = neural[0].shape[0]
518 ce,cs = burst_win(BKGWIDTH, 2)
519 nns = convolve(neural_now, cs)/cs.sum()
520 norm = numpy.hypot(nns, Nsv * numpy.square(nns).sum()/nns.sum())
521
522
523 o = []
524 descr = []
525 included = []
526
527
528 for (rs, re, fac) in [ (150.0, 500.0, 1.0), (500.0, 1200.0, 1.0),
529 (1200.0, 2800.0, 1.0), (2800.0, 6000.0, 1.0) ]:
530 tsum = numpy.zeros((N,), numpy.float)
531 elow = erb_scale.f_to_erb(rs)
532 ehigh = erb_scale.f_to_erb(re)
533 for (i, e) in enumerate(ectrs):
534 if e['type']=='band' and elow < e['erb'] < ehigh:
535 numpy.add(tsum, neural[i], tsum)
536 included.append( e )
537 tss = fac * convolve(tsum, ce)/norm
538
539
540 included.sort(key=lambda x:x['erb'])
541 o.append( tss )
542 descr.append( {'type': 'burst', 'width': WIDTH,
543 'bkgwidth': BKGWIDTH,
544 'erbs': [e['erb'] for e in included],
545 'fcs' : [e['fc'] for e in included],
546 'Kentropy': entropy(cs),
547 'Fentropy': -math.log(len(included)),
548 'id': 'burst:%d:%.1f-%.1f'
549 % (WIDTH, included[0]['erb'], included[-1]['erb']),
550 't_symmetry': 1, 'a_scaling': 1
551 }
552 )
553
554 return (o, descr)
555
556
557
594
595
596 -def normalize_neural(data, dt, Dt, bmin=None, bmax=None, db=DB, do_mod=0, do_dissonance=False,
597 PlompBouman=False, cache_info=None,
598 norm_kernel=None, Nsv=None, t_kernel=None, f_kernel=None
599 ):
600 if cache_info is not None:
601 assert isinstance(cache_info, CC.cache_info)
602 all_ectrs, all_ps, t0 = PSC.perceptual_spec(data, dt, Dt,
603 bmin=bmin, bmax=bmax, db=db,
604 do_mod=do_mod,
605 do_dissonance=do_dissonance,
606 PlompBouman=PlompBouman,
607 cache_info=cache_info
608 )
609 else:
610 all_ectrs, all_ps, t0 = PS.block_percep_spec(data, dt, Dt,
611 bmin=bmin, bmax=bmax, db=db,
612 do_mod=do_mod,
613 do_dissonance=do_dissonance,
614 PlompBouman=PlompBouman
615 )
616
617 lng = all_ps.shape[1]
618 def ctr_freq(x):
619 return x[1]['erb']
620 ecbsorted = sorted([(i,ec) for (i,ec) in enumerate(all_ectrs) if ec['type']=='band'],
621 key=ctr_freq)
622 band_indices = [i for (i,ec) in ecbsorted]
623 ectrs = [ec for (i,ec) in ecbsorted]
624 neural = all_ps.take(band_indices, axis=0)
625
626 assert neural.shape[1]==lng
627 assert len(data.shape) == 1
628 assert abs(lng*Dt-data.shape[0]*dt) < 0.1*data.shape[0]*dt
629
630 neural_now = numpy.average(neural, axis=0)
631 assert neural_now.shape[0] == lng
632 neural_avg = numpy.square(neural_now).sum()/neural_now.sum()
633 numpy.divide(neural, neural_avg, neural)
634 numpy.divide(neural_now, neural_avg, neural_now)
635 if t_kernel is not None:
636 for f in range(neural.shape[0]):
637 neural[f,:] = numpy.convolve(neural[f,:], t_kernel, mode="same")
638 if f_kernel is not None:
639 for t in range(lng):
640 neural[:,t] = numpy.convolve(neural[:,t], f_kernel, mode="same")
641
642 nband_indices = [i for (i,ec) in enumerate(all_ectrs) if ec['type']!='band']
643 nneural = all_ps.take(nband_indices, axis=0)
644 nectrs = [ec for ec in all_ectrs if ec['type']!='band']
645
646 nns = convolve(neural_now, norm_kernel)
647 norm = numpy.hypot(nns, Nsv * numpy.square(nns).sum()/nns.sum())
648
649 numpy.divide(neural, norm[numpy.newaxis,:], neural)
650
651 assert neural.shape[0]+nneural.shape[0] == all_ps.shape[0]
652 assert nneural.shape[1]==neural.shape[1]
653 assert nneural.shape[0] < nneural.shape[1]
654 assert len(nectrs) == nneural.shape[0]
655 for (i,e) in enumerate(nectrs):
656 assert e['type'] == 'band'
657
658 return (t0, neural, ectrs, nneural, nectrs)
659