1
2
3 """Feature vectors for optimized DTW alignment.
4 Exemplar project with Ladan Baghai-Ravary.
5 """
6
7 import math
8 import numpy
9 from gmisclib import die
10 from gmisclib import erb_scale
11 from gpk_voicing import percep_spec
12 from gpk_voicing import fv_misc as M
13 from gpk_voicing import zero as Z
14 SillyWidthException = M.SillyWidthException
15
16 DB = 0.85
17
18
19
20
21
22
23
24 -def feature_vec(data, dt, DT,
25 LF=1.0, Nsv=M.NSV, ELF=1.0,
26 do_voicing=1, do_dissonance=False,
27 PlompBouman=False, do_pdur=False, zerosub=0.0):
28 assert not do_pdur and not do_dissonance
29 FORMANT_LOW = erb_scale.f_to_erb(120.0)
30 FORMANT_HIGH = erb_scale.f_to_erb(5000.0)
31 assert float(DT)>0.0
32 assert float(LF)>0.0
33 bmin = erb_scale.f_to_erb(80.0)
34 bmax = erb_scale.f_to_erb(6000.0)
35 ectrs, neural, t0 = percep_spec.perceptual_spec(data, dt, DT,
36 bmin, bmax, DB,
37 do_mod=do_voicing,
38 do_dissonance=do_dissonance,
39 do_peakalign=True,
40 PlompBouman=PlompBouman
41 )
42
43 assert 1 < neural.shape[0] < 200
44 band_indices = [i for (i,ec) in enumerate(ectrs) if ec['type']=='band']
45 neural_b = neural.take(band_indices, axis=0)
46 assert neural_b.shape[1]==neural.shape[1]
47 if zerosub != 0.0:
48 neural_b = neural_b.copy()
49 try:
50 z = Z.percep_spec_zero(neural_b)
51 except Z.ZeroProblem:
52 die.warn("Bad utterance: zero does not converge.")
53 z = numpy.zeros((neural_b.shape[0],))
54 assert neural_b.shape[0]==z.shape[0]
55 numpy.subtract(neural_b, z[:,numpy.newaxis], neural_b)
56 assert neural_b.shape[1]==neural.shape[1]
57 assert neural_b.shape[0]<=neural.shape[0]
58
59 neural_now = numpy.average(neural_b, axis=0)
60 assert neural_now.shape[0] == neural.shape[1]
61 neural_avg = numpy.sum(neural_now**2)/numpy.sum(neural_now)
62
63 numpy.divide(neural, neural_avg, neural)
64
65 numpy.divide(neural_now, neural_avg, neural_now)
66
67
68 for (i,e) in enumerate(ectrs):
69 if e['type'] == 'haspitch':
70 numpy.divide(neural[i,:], neural_avg, neural[i,:])
71 if e['type'] == 'dissonance':
72 numpy.divide(neural[i,:], neural_avg, neural[i,:])
73 if e['type'] == 'peakalign':
74 numpy.divide(neural[i,:], neural_avg**2, neural[i,:])
75
76
77 o = []
78 descr = []
79 w = 0.04*LF/DT
80 tmpo, tmpd = M.vowel_edge(w, ectrs, neural, neural_now, Nsv,
81 do_abs=False,
82 formant_low=FORMANT_LOW,
83 formant_high=FORMANT_HIGH
84 )
85 o.extend(tmpo)
86 descr.extend(tmpd)
87 assert len(descr)==len(o), "Descriptor mismatch"
88
89 w = 0.03*ELF/DT
90 tmpo, tmpd = M.fricative(w, ectrs, neural, neural_now, Nsv)
91 o.extend(tmpo)
92 descr.extend(tmpd)
93 assert len(descr)==len(o), "Descriptor mismatch"
94
95 if do_voicing:
96 w = 0.02*math.sqrt(LF)/DT
97 tmpo, tmpd = M.haspitch(w, ectrs, neural, neural_now, Nsv)
98 o.extend(tmpo)
99 descr.extend(tmpd)
100 assert len(descr)==len(o), "Descriptor mismatch"
101
102 w = 0.03*ELF/DT
103 tmpo, tmpd = M.peakiness(w, ectrs, neural, neural_now, Nsv)
104 o.extend(tmpo)
105 descr.extend(tmpd)
106 assert len(descr)==len(o), "Descriptor mismatch"
107
108 N = neural[0].shape[0]
109 for (i, (tmp, dsc)) in enumerate(zip(o, descr)):
110 assert tmp.shape == (N,), "Wrong size: %d, %s = %d vs. %d" % (i, str(dsc), tmp.shape[0], N)
111
112 return (o, descr, DT, t0)
113