1
2
3 """Duration estimator for speech.
4
5 It takes a local spectrum, bins it onto the Bark scale,
6 converts to perceptual loudness (via **E).
7 Then, it computes a measure of how far you can go
8 from each point before the spectrum changes too much.
9 """
10
11 import math as M
12 import numpy
13 from gmisclib import die
14 from gmisclib import cache as CC
15 from gmisclib import gpkmisc
16 from gpk_voicing import fv_pdur as FVP
17 import gpk_voicing.fv_misc as FVM
18
19 TYP_DUR = 0.1
20
21
23 """t is an integer; an index into the data.
24 S is the normalized perceptual spectrum."""
25 n = len(s)
26 assert s.shape[1] < 200, "Implausibly long feature vector. Is s transposed?"
27 sumdiff = 0.0
28 len_sum = 0.5
29 ctr_sum = 0.125*dir
30 i = t + dir
31 while i>=0 and i<n and sumdiff<8:
32 delta_diff = Dt * C * numpy.square( numpy.absolute(s[i]-s[t])).sum()
33
34
35
36
37
38
39
40
41 if delta_diff <= 0:
42
43 lsd = M.exp(-sumdiff)
44 len_sum += lsd
45
46
47 slopeint = M.exp(-sumdiff) * 0.5
48 ctr_sum += (i-t-0.5*dir)*lsd + dir * slopeint
49 i += dir
50 else:
51 lsd = M.exp(-sumdiff) * (1.0-M.exp(-delta_diff))/delta_diff
52 len_sum += lsd
53
54
55 slopeint = M.exp(-sumdiff) \
56 * (1.0-M.exp(-delta_diff)*(1+delta_diff))/delta_diff**2
57 ctr_sum += (i-t-0.5*dir)*lsd + dir * slopeint
58 i += dir
59 sumdiff += delta_diff
60
61
62 return (len_sum*Dt, ctr_sum*Dt)
63
64
65 -def pdur(data, dt, Dt, out, c=None, lfac=None, Nsv=None, cache_info=None):
66 """Note that if C{cache_info} is non-Null, it *must* uniquely identify the data.
67 """
68
69 if Nsv is None:
70 Nsv = FVM.NSV
71 o = None
72 ci = None
73 if cache_info is not None:
74 assert isinstance(cache_info, CC.cache_info)
75 ci = cache_info.addinfo(dt, Dt, c, Nsv, 'pseudoduration', out)
76 try:
77 o = ci.load()
78 except ci.Errors:
79 pass
80
81 if o is None:
82 sp, descr, Dt, t0 = FVP.feature_vec(data, dt, Dt, Nsv=Nsv, cache_info=cache_info)
83 nfvc = len(sp)
84 sp = numpy.transpose(sp)
85 assert sp.shape[1] == nfvc
86
87 ns = sp.shape[0]
88 assert abs(ns*Dt - data.shape[0]*dt) < 0.1*ns*Dt
89 o = numpy.zeros((ns,), numpy.float)
90 if out=='pseudoduration' or out=='log(pseudoduration)':
91 for i in range(ns):
92 plp, pcp = pdur_guts(sp, i, 1, Dt, c)
93 plm, pcm = pdur_guts(sp, i, -1, Dt, c)
94 o[i] = plp + plm
95 elif out=='center_time':
96 for i in range(ns):
97 plp, pcp = pdur_guts(sp, i, 1, Dt, c)
98 plm, pcm = pdur_guts(sp, i, -1, Dt, c)
99 o[i] = Dt*(pcp + pcm)/(plp+plm)
100 else:
101 die.die('Whoops: out=%s' % out)
102
103 if ci is not None:
104 ci.dump(o)
105
106 if out == 'pseudoduration':
107 numpy.multiply(o, lfac, o)
108 elif out == 'log(pseudoduration)':
109 o = numpy.log((lfac/TYP_DUR)*o)
110
111 return o
112