1
2
3 """Duration estimator for speech.
4
5 Usage: s_slope [flags]
6 Flags: -f FFF opens file FFF for reading.
7 -c XXX selects column XXX (either a data name or an integer).
8 -o FFF sets the output file.
9 -dt #.## Sets the interval for calculating the output.
10
11 It takes a local spectrum,
12 bins it onto the Bark scale,
13 converts to perceptual loudness (via **E).
14 Then, it computes a measure of how far you can go
15 from each point before the spectrum changes too much.
16 """
17
18 import numpy
19
20 from gmisclib import cache as CC
21 from gmisclib import gpkmisc
22 from gmisclib import erb_scale
23
24 from gpk_voicing import percep_spec as PS
25 from gpk_voicing import fv_misc as FVM
26 from gpk_voicing import voice_misc as VM
27 SillyWidthException = FVM.SillyWidthException
28 NSV = 0.75
29
30 _wincache = {}
31 _WIN_CACHE_SIZE = 20
32 -def win(n, tdif, tdif2):
33 key = (n, tdif, tdif2)
34 if len(_wincache) > _WIN_CACHE_SIZE:
35 _wincache.pop()
36 if key in _wincache:
37 return _wincache[key]
38 Norm = 1
39 if not n > 0:
40 raise SillyWidthException, n
41 def inner_window(n, k, norm=2):
42 assert k==0
43 opn2 = VM.cont_kernel_c(n+4)
44 opn1 = VM.cont_kernel_c(n+2)
45 opn0 = VM.cont_kernel_c(n)
46 w = tdif2 * opn2.P(2)
47 w1 = tdif * opn1.P(1)
48 w0 = opn0.P(0)
49
50 numpy.add(w[1:-1], w1, w[1:-1])
51 numpy.add(w[2:-2], w0, w[2:-2])
52 if norm > 0:
53 numpy.divide(w, (numpy.absolute(w)**norm).sum()**(1.0/Norm), w)
54 return w
55 rv = VM.cont_kernel(n, 0, norm=Norm, kernel=inner_window)
56 _wincache[key] = rv
57 return rv
58
59
60 -def feature_vec_guts(data, dt, Dt, do_irx=False,
61 LF=1.0, fdif2=0.0, tdif=0.0, tdif2=0.0, Nsv=NSV,
62 FORMANT_LOW=None, FORMANT_HIGH=None,
63 cache_info=None, ps_cache_info=None
64 ):
65 VFAC = 0.75
66 FORMANT_LOW = erb_scale.f_to_erb(200.0) if FORMANT_LOW is None else erb_scale.f_to_erb(FORMANT_LOW)
67 FORMANT_HIGH = erb_scale.f_to_erb(4000.0) if FORMANT_HIGH is None else erb_scale.f_to_erb(FORMANT_HIGH)
68 assert Dt > 0.0 and float(Dt)>0.0
69 bmin = FORMANT_LOW - 0.5*FVM.DB + 0.0071793
70 bmax = FORMANT_HIGH + 0.5*FVM.DB
71 width = 0.04 * LF / Dt
72 if do_irx:
73 width = max(1, int(round(width)))
74 cs = win(width, tdif, tdif2)
75 css = cs.sum()
76 f_kernel = numpy.array([-fdif2, 1+2*fdif2, -fdif2])
77
78 if ps_cache_info is not None:
79 ic_i = ps_cache_info
80 else:
81 ic_i = cache_info
82
83
84 t0, neural, ectrs, nneural, nectrs = \
85 FVM.normalize_neural(data, dt, Dt, bmin=bmin, bmax=bmax, db=FVM.DB,
86 do_mod=0, do_dissonance=False,
87 PlompBouman=False,
88 norm_kernel=cs/css, Nsv=Nsv, t_kernel=cs*(VFAC/css),
89 f_kernel=f_kernel,
90 cache_info=ic_i
91 )
92 o = []
93 descr = []
94 for (i, e) in enumerate(ectrs):
95 if e['type']=='band' and FORMANT_LOW < e['erb'] < FORMANT_HIGH:
96 tmp = neural[i,:]
97
98
99 dtmp = e.copy()
100 dtmp['type'] = 'vowel'
101 dtmp['width'] = width
102 dtmp['Kentropy'] = gpkmisc.entropy(numpy.absolute(cs))
103 dtmp['Fentropy'] = 0.0
104 dtmp['t_symmetry'] = 1
105 dtmp['a_scaling'] = 1
106
107
108 o.append(tmp)
109 dtmp['id'] = '%s:%d:%.1f' % (dtmp['type'], width, e['erb'])
110 descr.append( dtmp )
111 assert len(descr)==len(o), "Descriptor mismatch"
112 return (o, descr, Dt, t0)
113
114
115 -def feature_vec(data, dt, DT, LF=1.0, Nsv=NSV, fdif2=0.0, tdif=0.0, tdif2=0.0,
116 FORMANT_LOW=None, FORMANT_HIGH=None,
117 do_irx=False, cache_info=None, ps_cache_info=None):
118 o = None
119 ci = None
120 if cache_info is not None:
121 assert isinstance(cache_info, CC.cache_info)
122 ci = cache_info.addinfo(dt, DT, LF, Nsv, fdif2, tdif, tdif2, do_irx, 'fv201102')
123 try:
124 o = ci.load()
125 except ci.Errors:
126 pass
127 if o is None:
128 o = feature_vec_guts(data, dt, DT, LF=LF, Nsv=Nsv,
129 do_irx=do_irx, fdif2=fdif2,
130 tdif=tdif, tdif2=tdif2,
131 FORMANT_LOW=FORMANT_LOW, FORMANT_HIGH=FORMANT_HIGH,
132 cache_info=cache_info, ps_cache_info=ps_cache_info
133 )
134 if ci is not None:
135 ci.bg_dump(o)
136 return o
137
138
139
140
141 Opt_text = """
142 # RUN 1
143 # summarize_logs -uid UID -fromstart -best logS_advLImMDREf,-fv201102_4.av
144 # len(currentlist)= 26 tail= 0.0 nsamp= 26
145 # max(logS_advLImMDREf,-fv201102_4.av) = 376.74
146 # samples used = 1 filename= logS_advLImMDREf,-fv201102_4.av
147 376.74 logp
148 # n= 39
149 26.031 Scale,vowel:.*:10.4
150 0.650176 Scale,vowel:.*:11.1
151 0.994825 Scale,vowel:.*:11.9
152 3.65683 Scale,vowel:.*:12.6
153 12.2741 Scale,vowel:.*:13.3
154 0.23901 Scale,vowel:.*:14.0
155 1.53292 Scale,vowel:.*:14.7
156 7.75427 Scale,vowel:.*:15.4
157 0.784154 Scale,vowel:.*:16.1
158 3.23822 Scale,vowel:.*:16.8
159 3.84116 Scale,vowel:.*:17.5
160 12.1566 Scale,vowel:.*:18.2
161 6.9634 Scale,vowel:.*:18.9
162 4.90421 Scale,vowel:.*:19.6
163 3.72337 Scale,vowel:.*:20.3
164 0.0150866 Scale,vowel:.*:21.0
165 6.29358 Scale,vowel:.*:21.8
166 3.50677 Scale,vowel:.*:22.5
167 1.94812 Scale,vowel:.*:23.2
168 17.1486 Scale,vowel:.*:23.9
169 1.76959 Scale,vowel:.*:24.6
170 1.27658 Scale,vowel:.*:25.3
171 0.439394 Scale,vowel:.*:26.0
172 0.185877 Scale,vowel:.*:26.7
173 12.2699 Scale,vowel:.*:6.2
174 10.2536 Scale,vowel:.*:6.9
175 5.84667 Scale,vowel:.*:7.6
176 3.5585 Scale,vowel:.*:8.3
177 0.405896 Scale,vowel:.*:9.0
178 2.4074 Scale,vowel:.*:9.7
179 2.94796 lf,lf
180 -0.302882 norm,fdif2
181 1.28682 norm,nsv
182 4.824 norm,tdif
183 0.16849 norm,tdif2
184 #
185 # RUN 2
186 # summarize_logs -best -fromstart -uid UID logS_advLImMDREf,-fv201102_2.av
187 # len(currentlist)= 20 tail= 0.0 nsamp= 20
188 # max(logS_advLImMDREf,-fv201102_2.av) = 348.56
189 # samples used = 1 filename= logS_advLImMDREf,-fv201102_2.av
190 348.56 logp
191 # n= 39
192 0.367388 Scale,vowel:.*:10.4
193 0.668657 Scale,vowel:.*:11.1
194 3.73234 Scale,vowel:.*:11.9
195 9.59746 Scale,vowel:.*:12.6
196 45.2418 Scale,vowel:.*:13.3
197 12.4549 Scale,vowel:.*:14.0
198 5.18612 Scale,vowel:.*:14.7
199 8.10955 Scale,vowel:.*:15.4
200 16.1181 Scale,vowel:.*:16.1
201 8.2858 Scale,vowel:.*:16.8
202 6.15389 Scale,vowel:.*:17.5
203 2.11999 Scale,vowel:.*:18.2
204 8.04381 Scale,vowel:.*:18.9
205 3.69372 Scale,vowel:.*:19.6
206 0.302413 Scale,vowel:.*:20.3
207 3.42239 Scale,vowel:.*:21.0
208 18.7842 Scale,vowel:.*:21.8
209 7.25486 Scale,vowel:.*:22.5
210 0.435129 Scale,vowel:.*:23.2
211 2.46633 Scale,vowel:.*:23.9
212 0.828264 Scale,vowel:.*:24.6
213 0.604598 Scale,vowel:.*:25.3
214 15.0723 Scale,vowel:.*:26.0
215 2.12652 Scale,vowel:.*:26.7
216 0.258391 Scale,vowel:.*:6.2
217 3.00883 Scale,vowel:.*:6.9
218 19.7981 Scale,vowel:.*:7.6
219 0.098635 Scale,vowel:.*:8.3
220 0.144953 Scale,vowel:.*:9.0
221 21.9714 Scale,vowel:.*:9.7
222 3.96667 lf,lf
223 -0.523753 norm,fdif2
224 0.703901 norm,nsv
225 4.49354 norm,tdif
226 -0.147517 norm,tdif2
227 4000 FORMANT_HIGH
228 """
229
230 Scale = FVM.scale_xform(Opt_text,
231 name="2011-04-17 optimization on mace and cayenne:/home/gpk/ItakuraSaitoDistance/IS-DTW/logS_advLImMDREf,-fv201102/{1,2}"
232 )
233