1
2
3 """Feature vectors for optimized DTW alignment.
4 Exemplar project with Ladan Baghai-Ravary.
5 """
6
7 import math
8 import numpy
9 from gmisclib import die
10 from gmisclib import cache as CC
11 from gmisclib import gpkmisc
12 from gmisclib import erb_scale
13
14 from gpk_voicing import cached_ps as PS
15 from gpk_voicing import fv_misc as M
16 from gpk_voicing import zero as Z
17 SillyWidthException = M.SillyWidthException
18
19 DB = 0.95
20
21 Opt_text = """
22 # summarize_logs -uid UID -best -fromstart advdIrL,-fv201003opt_0.av
23 # len(currentlist)= 190 tail= 0.0 nsamp= 190
24 # max(advdIrL,-fv201003opt_0.av) = 84.47
25 # samples used = 190 filename= advdIrL,-fv201003opt_0.av
26 84.47 logp
27 # n= 41
28 3.02401 Scale,fricative%Sedge:.*:11.9-14.8
29 14.2481 Scale,fricative%Sedge:.*:15.7-17.6
30 16.3675 Scale,fricative%Sedge:.*:18.6-23.3
31 0.925029 Scale,fricative%Sedge:.*:24.3-29.0
32 6.3387 Scale,fricative%Sedge:.*:4.3-7.2
33 22.348 Scale,fricative%Sedge:.*:8.1-11.0
34 0.798545 Scale,haspitch1:.*
35 3.12358 Scale,vowel:.*:10.0
36 1.17288 Scale,vowel:.*:11.0
37 1.01182 Scale,vowel:.*:11.9
38 2.37575 Scale,vowel:.*:12.9
39 14.409 Scale,vowel:.*:13.8
40 1.01614 Scale,vowel:.*:14.8
41 3.41617 Scale,vowel:.*:15.7
42 2.49094 Scale,vowel:.*:16.7
43 7.17409 Scale,vowel:.*:17.6
44 2.301 Scale,vowel:.*:18.6
45 3.45347 Scale,vowel:.*:19.5
46 5.5532 Scale,vowel:.*:20.5
47 1.77577 Scale,vowel:.*:21.4
48 0.365305 Scale,vowel:.*:22.4
49 5.62289 Scale,vowel:.*:23.3
50 2.07074 Scale,vowel:.*:24.3
51 0.757549 Scale,vowel:.*:25.2
52 2.91112 Scale,vowel:.*:26.2
53 3.47592 Scale,vowel:.*:27.1
54 1.79999 Scale,vowel:.*:28.1
55 10.2112 Scale,vowel:.*:29.0
56 1.37166 Scale,vowel:.*:4.3
57 1.8372 Scale,vowel:.*:5.3
58 0.646851 Scale,vowel:.*:6.2
59 0.480479 Scale,vowel:.*:7.2
60 12.4389 Scale,vowel:.*:8.1
61 6.54689 Scale,vowel:.*:9.1
62 0.0053143 border_shift,beginning
63 2.98189 dist,alpha
64 0.619647 dist,beta
65 1.35488 len_exp
66 0.468854 lf,elf
67 0.00879217 lf,lf
68 0.00784977 nsv
69 """
70
71 Scale = M.scale_xform(Opt_text,
72 name="2010-06-05 optimization in cayenne:/home/gpk/ItakuraSaitoDistance/IS-DTW/dvdIrL,-fv201003opt_0.av"
73 )
74
75
76 -def feature_vec_guts(data, dt, DT,
77 LF=1.0, Nsv=M.NSV, ELF=1.0,
78 do_voicing=0, do_dissonance=False,
79 PlompBouman=False, do_pdur=False,
80 zerosub=0.0, do_irx=False, E=None, cache_info=None):
81 assert not do_irx, "Do_irx=%s" % do_irx
82 assert not do_pdur and not do_dissonance
83 FORMANT_LOW = erb_scale.f_to_erb(130.0)
84 FORMANT_HIGH = erb_scale.f_to_erb(5000.0)
85 assert float(DT)>0.0
86 assert float(LF)>0.0
87 bmin = erb_scale.f_to_erb(100.0)
88 bmax = erb_scale.f_to_erb(5500.0)
89 ectrs, neural, t0 = PS.perceptual_spec(data, dt, DT,
90 bmin, bmax, DB,
91 do_mod=do_voicing,
92 do_dissonance=do_dissonance,
93 do_peakalign=False,
94 PlompBouman=PlompBouman,
95 e=E,
96 cache_info = cache_info.copy() if cache_info is not None else None
97 )
98
99 assert 1 < neural.shape[0] < 200
100 band_indices = [i for (i,ec) in enumerate(ectrs) if ec['type']=='band']
101 neural_b = neural.take(band_indices, axis=0)
102 assert neural_b.shape[1]==neural.shape[1]
103 if zerosub != 0.0:
104 neural_b = neural_b.copy()
105 try:
106 z = Z.PS(neural_b)
107 except Z.ZeroProblem:
108 die.warn("Bad utterance: zero does not converge.")
109 z = numpy.zeros((neural_b.shape[0],))
110 assert neural_b.shape[0]==z.shape[0]
111 numpy.subtract(neural_b, z[:,numpy.newaxis], neural_b)
112 assert neural_b.shape[1]==neural.shape[1]
113 assert neural_b.shape[0]<=neural.shape[0]
114
115 neural_now = numpy.average(neural_b, axis=0)
116 assert neural_now.shape[0] == neural.shape[1]
117 neural_avg = numpy.square(neural_now).sum()/numpy.sum(neural_now)
118
119 numpy.divide(neural, neural_avg, neural)
120
121 numpy.divide(neural_now, neural_avg, neural_now)
122
123
124 for (i,e) in enumerate(ectrs):
125 if e['type'] == 'haspitch':
126 numpy.divide(neural[i,:], neural_avg, neural[i,:])
127 if e['type'] == 'dissonance':
128 numpy.divide(neural[i,:], neural_avg, neural[i,:])
129 if e['type'] == 'peakalign':
130 numpy.divide(neural[i,:], numpy.square(neural_avg), neural[i,:])
131
132 o = []
133 descr = []
134 w = 0.04*LF/DT
135 tmpo, tmpd = M.vowel(w, ectrs, neural, neural_now, Nsv,
136 formant_low=FORMANT_LOW,
137 formant_high=FORMANT_HIGH
138 )
139 o.extend(tmpo)
140 descr.extend(tmpd)
141 assert len(descr)==len(o), "Descriptor mismatch"
142
143 w = 0.04*ELF/DT
144 tmpo, tmpd = M.fricative_edge(w, ectrs, neural, neural_now, Nsv,
145 do_abs=False, version=2
146 )
147 o.extend(tmpo)
148 descr.extend(tmpd)
149 assert len(descr)==len(o), "Descriptor mismatch"
150
151 if do_voicing:
152 w = 0.02*math.sqrt(LF)/DT
153 tmpo, tmpd = M.haspitch(w, ectrs, neural, neural_now, Nsv)
154 o.extend(tmpo)
155 descr.extend(tmpd)
156 assert len(descr)==len(o), "Descriptor mismatch"
157
158 w = 0.03*ELF/DT
159 tmpo, tmpd = M.peakiness(w, ectrs, neural, neural_now, Nsv)
160 o.extend(tmpo)
161 descr.extend(tmpd)
162 assert len(descr)==len(o), "Descriptor mismatch"
163
164 N = neural[0].shape[0]
165 for (i, (tmp, dsc)) in enumerate(zip(o, descr)):
166 assert tmp.shape == (N,), "Wrong size: %d, %s = %d vs. %d" % (i, str(dsc), tmp.shape[0], N)
167
168 return (o, descr, DT, t0)
169
170
171 -def feature_vec(data, dt, DT,
172 LF=1.0, Nsv=M.NSV, ELF=1.0,
173 do_voicing=0, do_dissonance=False,
174 PlompBouman=False, do_pdur=False,
175 zerosub=0.0, do_irx=False, E=None, cache_info=None):
176 o = None
177 ci = None
178 if cache_info is not None:
179 assert isinstance(cache_info, CC.cache_info)
180 ci = cache_info.addinfo(dt, DT, LF, Nsv, ELF, do_voicing, do_dissonance, PlompBouman,
181 do_pdur, zerosub, do_irx, E, 'fv201003opt')
182 try:
183 o = ci.load()
184 except ci.Errors:
185 pass
186 if o is None:
187 o = feature_vec_guts(data, dt, DT, LF=LF, Nsv=Nsv, ELF=ELF, do_voicing=do_voicing,
188 do_dissonance=do_dissonance, PlompBouman=PlompBouman,
189 do_pdur=do_pdur, zerosub=zerosub, do_irx=do_irx, E=E,
190 cache_info=cache_info
191 )
192 if ci is not None:
193 ci.dump(o)
194 return o
195