1
2
3 """Duration estimator for speech.
4
5 It takes a local spectrum,
6 bins it onto the Bark scale,
7 converts to perceptual loudness (via **E).
8 Then, it computes a measure of how far you can go
9 from each point before the spectrum changes too much.
10 """
11
12 import numpy
13 from gmisclib import cache as CC
14 from gmisclib import erb_scale
15
16 from gpk_voicing import cached_ps as PSC
17 from gpk_voicing import percep_spec as PS
18 from gpk_voicing import fv_misc as FVM
19 SillyWidthException = FVM.SillyWidthException
20 NSV = 0.75
21
23 return max(1, int(round(a)))
24
28 FORMANT_LOW = erb_scale.f_to_erb(200.0)
29 FORMANT_HIGH = erb_scale.f_to_erb(4000.0)
30 assert Dt > 0.0 and float(Dt)>0.0
31 bmin = erb_scale.f_to_erb(100.0)
32 bmax = erb_scale.f_to_erb(6000.0)
33
34 if cache_info is not None:
35 assert isinstance(cache_info, CC.cache_info)
36 all_ectrs, all_ps, t0 = PSC.perceptual_spec(data, dt, Dt,
37 bmin=bmin, bmax=bmax, db=FVM.DB,
38 do_mod=0,
39 do_dissonance=False,
40 PlompBouman=False,
41 cache_info=cache_info.copy()
42 )
43 else:
44 all_ectrs, all_ps, t0 = PS.block_percep_spec(data, dt, Dt,
45 bmin=bmin, bmax=bmax, db=FVM.DB,
46 do_mod=0,
47 do_dissonance=False,
48 PlompBouman=False
49 )
50
51 band_indices = [i for (i,ec) in enumerate(all_ectrs) if ec['type']=='band']
52 neural = all_ps.take(band_indices, axis=0)
53 ectrs = [ec for ec in all_ectrs if ec['type']=='band']
54 nband_indices = [i for (i,ec) in enumerate(all_ectrs) if ec['type']!='band']
55 nneural = all_ps.take(nband_indices, axis=0)
56 nectrs = [ec for ec in all_ectrs if ec['type']!='band']
57
58 assert nneural.shape[1]==neural.shape[1]
59 assert neural.shape[1]==all_ps.shape[1]
60 assert neural.shape[0]+nneural.shape[0] == all_ps.shape[0]
61 assert len(data.shape) == 1
62 assert abs(all_ps.shape[1]*Dt-data.shape[0]*dt) < 0.1*data.shape[0]*dt
63
64 neural_now = numpy.average(neural, axis=0)
65 assert neural_now.shape[0] == neural.shape[1]
66 neural_avg = numpy.square(neural_now).sum()/numpy.sum(neural_now)
67 numpy.divide(neural, neural_avg, neural)
68 numpy.divide(neural_now, neural_avg, neural_now)
69
70 assert nneural.shape[0] < nneural.shape[1]
71 assert len(nectrs) == nneural.shape[0]
72 for (i,e) in enumerate(nectrs):
73 assert e['type'] == 'band'
74
75
76 w = 0.04 * LF / Dt
77 if do_irx:
78 w = _irx(w)
79 o, descr = FVM.vowel(w, ectrs, neural, neural_now, Nsv,
80 formant_low=FORMANT_LOW,
81 formant_high=FORMANT_HIGH
82 )
83 assert len(descr)==len(o), "Descriptor mismatch"
84 return (o, descr, Dt, t0)
85
86
87 -def feature_vec(data, dt, DT, LF=1.0, Nsv=NSV, do_irx=True, cache_info=None):
88 o = None
89 ci = None
90 if cache_info is not None:
91 assert isinstance(cache_info, CC.cache_info)
92 ci = cache_info.addinfo(dt, DT, LF, Nsv, 'fv_pdur')
93 try:
94 o = ci.load()
95 except ci.Errors:
96 pass
97 if o is None:
98 o = feature_vec_guts(data, dt, DT, LF=LF, Nsv=Nsv,
99 cache_info=cache_info
100 )
101 if ci is not None:
102 ci.bg_dump(o)
103 return o
104