1
2
3 """Feature vectors for optimized DTW alignment.
4 Exemplar project with Ladan Baghai-Ravary.
5 """
6
7 import math
8 import numpy
9 from gmisclib import die
10 from gmisclib import erb_scale
11
12 from gpk_voicing import cached_ps as PS
13 from gpk_voicing import fv_misc as M
14 from gpk_voicing import zero as Z
15 SillyWidthException = M.SillyWidthException
16
17 DB = 0.85
18
19 Opt_text="""# Trigger="run_to_bottom finished"
20 # Trigger match
21 # len(currentlist)= 198 tail= 0.0 nsamp= 198
22 # max(log_adv,,-fv200909opt_0.av) = 43.27
23 # samples used = 198 filename= log_adv,,-fv200909opt_0.av
24 43.27 logp
25 # n= 44
26 1.73693 dist,alpha
27 2.13363 dist,beta
28 1.08861 len_exp
29 1.55219 lf,elf
30 0.154333 lf,lf
31 0.140556 nsv
32 3.53085 scale,fricative%Sedge:.*:13.0-18.8
33 2.8547 scale,fricative%Sedge:.*:18.8-23.0
34 3.30231 scale,fricative%Sedge:.*:2.5-13.0
35 -3.87654 scale,fricative%Sedge:.*:23.0-27.1
36 -0.0238129 scale,fricative%Sedge:.*:27.1-30.7
37 -2.76415 scale,haspitch1:.*
38 -3.13785 scale,peakalign1:.*
39 -2.65753 scale,vowel:.*:10.4
40 2.32077 scale,vowel:.*:11.3
41 -1.12553 scale,vowel:.*:12.1
42 0.0529135 scale,vowel:.*:13.0
43 -0.568936 scale,vowel:.*:13.8
44 0.622019 scale,vowel:.*:14.7
45 0.18557 scale,vowel:.*:15.5
46 1.24299 scale,vowel:.*:16.4
47 -3.96151 scale,vowel:.*:17.2
48 0.793534 scale,vowel:.*:18.1
49 0.0448047 scale,vowel:.*:18.9
50 1.33993 scale,vowel:.*:19.8
51 2.00955 scale,vowel:.*:20.6
52 -0.858686 scale,vowel:.*:21.5
53 3.48095 scale,vowel:.*:22.3
54 -3.47638 scale,vowel:.*:23.2
55 -0.966758 scale,vowel:.*:24.0
56 -1.98064 scale,vowel:.*:24.9
57 2.39958 scale,vowel:.*:25.7
58 0.406809 scale,vowel:.*:26.6
59 2.68029 scale,vowel:.*:27.4
60 -2.71892 scale,vowel:.*:28.3
61 0.715675 scale,vowel:.*:29.1
62 1.1363 scale,vowel:.*:30.0
63 -3.78759 scale,vowel:.*:4.5
64 1.14752 scale,vowel:.*:5.3
65 -0.108174 scale,vowel:.*:6.2
66 -1.18802 scale,vowel:.*:7.0
67 -0.0958434 scale,vowel:.*:7.9
68 0.643672 scale,vowel:.*:8.7
69 2.52746 scale,vowel:.*:9.6
70 """
71 Scale = M.scale_xform(Opt_text,
72 name="299010 optimization in /proj/exemplar/ItakuraSaitoDistance/IS-DTW/log_adv,,-fv200909opt_0.av"
73 )
74
76 return max(1, int(round(a)))
77
78
79 -def feature_vec(data, dt, DT,
80 LF=1.0, Nsv=M.NSV, ELF=1.0,
81 do_voicing=1, do_dissonance=False,
82 PlompBouman=False, do_pdur=False,
83 zerosub=0.0, do_irx=True, cachename=None):
84 assert not do_pdur and not do_dissonance
85 FORMANT_LOW = erb_scale.f_to_erb(120.0)
86 FORMANT_HIGH = erb_scale.f_to_erb(6000.0)
87 assert float(DT)>0.0
88 assert float(LF)>0.0
89 bmin = erb_scale.f_to_erb(80.0)
90 bmax = erb_scale.f_to_erb(6000.0)
91 ectrs, neural, t0 = PS.perceptual_spec(data, dt, DT,
92 bmin, bmax, DB,
93 do_mod=do_voicing,
94 do_dissonance=do_dissonance,
95 do_peakalign=True,
96 PlompBouman=PlompBouman,
97 cachename=cachename
98 )
99
100 assert 1 < neural.shape[0] < 200
101 band_indices = [i for (i,ec) in enumerate(ectrs) if ec['type']=='band']
102 neural_b = neural.take(band_indices, axis=0)
103 assert neural_b.shape[1]==neural.shape[1]
104 if zerosub != 0.0:
105 neural_b = neural_b.copy()
106 try:
107 z = Z.PS(neural_b)
108 except Z.ZeroProblem:
109 die.warn("Bad utterance: zero does not converge.")
110 z = numpy.zeros((neural_b.shape[0],))
111 assert neural_b.shape[0]==z.shape[0]
112 numpy.subtract(neural_b, z[:,numpy.newaxis], neural_b)
113 assert neural_b.shape[1]==neural.shape[1]
114 assert neural_b.shape[0]<=neural.shape[0]
115
116 neural_now = numpy.average(neural_b, axis=0)
117 assert neural_now.shape[0] == neural.shape[1]
118 neural_avg = numpy.sum(neural_now**2)/numpy.sum(neural_now)
119
120 numpy.divide(neural, neural_avg, neural)
121
122 numpy.divide(neural_now, neural_avg, neural_now)
123
124
125 for (i,e) in enumerate(ectrs):
126 if e['type'] == 'haspitch':
127 numpy.divide(neural[i,:], neural_avg, neural[i,:])
128 if e['type'] == 'dissonance':
129 numpy.divide(neural[i,:], neural_avg, neural[i,:])
130 if e['type'] == 'peakalign':
131 numpy.divide(neural[i,:], neural_avg**2, neural[i,:])
132
133 if do_irx:
134 irx = _irx
135 else:
136 irx = lambda x: x
137
138
139 o = []
140 descr = []
141 w = irx(0.04*LF/DT)
142 tmpo, tmpd = M.vowel(w, ectrs, neural, neural_now, Nsv,
143 formant_low=FORMANT_LOW,
144 formant_high=FORMANT_HIGH
145 )
146 o.extend(tmpo)
147 descr.extend(tmpd)
148 assert len(descr)==len(o), "Descriptor mismatch"
149
150 w = irx(0.04*ELF/DT)
151 tmpo, tmpd = M.fricative_edge(w, ectrs, neural, neural_now, Nsv,
152 do_abs=False
153 )
154 o.extend(tmpo)
155 descr.extend(tmpd)
156 assert len(descr)==len(o), "Descriptor mismatch"
157
158 if do_voicing:
159 w = irx(0.02*math.sqrt(LF)/DT)
160 tmpo, tmpd = M.haspitch(w, ectrs, neural, neural_now, Nsv)
161 o.extend(tmpo)
162 descr.extend(tmpd)
163 assert len(descr)==len(o), "Descriptor mismatch"
164
165 w = irx(0.03*ELF/DT)
166 tmpo, tmpd = M.peakiness(w, ectrs, neural, neural_now, Nsv)
167 o.extend(tmpo)
168 descr.extend(tmpd)
169 assert len(descr)==len(o), "Descriptor mismatch"
170
171 N = neural[0].shape[0]
172 for (i, (tmp, dsc)) in enumerate(zip(o, descr)):
173 assert tmp.shape == (N,), "Wrong size: %d, %s = %d vs. %d" % (i, str(dsc), tmp.shape[0], N)
174
175 return (o, descr, DT, t0)
176