1 """This is code that, when given a non-negative time-series
2 corresponding to speech loudness, computes the background noise,
3 which is assumed to be constant.
4 It uses a simple model based on a Gaussian and exponential."""
5
6 import math
7 import numpy
8 import fpconst
9 from gmisclib import die
10 from gmisclib import load_mod
11 from gmisclib import Numeric_gpk
12
13
14
18
19
20 PASSLIM = 80
21
23 return numpy.exp(numpy.maximum(x, -50.0))
24
26 """Models the loudness as being drawn either from a Gaussian
27 distribution (corresponding to the background noise level)
28 or a positive exponential distribtion (corresponding to speech).
29 It returns parameters of these distributions.
30
31 @param x: a time-series of loudness.
32 @return: four floats: (1) the estimate of the background noise level,
33 (2) the width of the noise Gaussian, (3) the mean speech level,
34 (4) the fraction of time with no speech (just background noise).
35 return (zz, sz, e, Pz)
36 @rtype: tuple(float, float, float, float)
37 """
38 zz = Numeric_gpk.N_minimum(x)
39 sigma2 = numpy.var(x)
40 if not (sigma2 > 0.0):
41 raise ZeroProblem, "Loudness zero has sigma=zero -- seemingly no signal here."
42 sz2 = sigma2/64.0
43 sv2 = sigma2 * 2.0
44
45
46 fz = 0.5
47 ipass = 0
48 q = 1000.0
49 while ipass<PASSLIM and q>0.0001:
50 assert 0.0 <= fz <= 1.0
51 ozz = zz
52 osz2 = sz2
53 osv2 = sv2
54 ofz = fz
55
56 gzz = numpy.greater(x, zz)
57 assert sv2 > 0.0
58 assert sz2 > 0.0
59 wv = gzz * exp(-numpy.square(x-zz)/(2*sv2))/math.sqrt(2*math.pi*sv2)
60 wz = exp(-numpy.square(x-zz)/(2*sz2))/math.sqrt(2*math.pi*sz2)
61
62
63 flatfudge = exp(-numpy.square(x[1:]-x[:-1])/(32*sz2))
64 numpy.multiply(wz[1:], flatfudge, wz[1:])
65 numpy.multiply(wz[:-1], flatfudge, wz[:-1])
66 assert 0.0 < fz < 1.0
67 pz = fz*wz / (fz*wz + (1-fz)*wv)
68
69 speed = math.exp(-2.5*ipass/float(PASSLIM))
70 fz = (1-speed)*fz + speed*min(0.99, max(0.01, numpy.average(pz)))
71 pv = 1.0 -pz
72 sumPv = numpy.sum(pv)
73 sumPz = numpy.sum(pz)
74 if fpconst.isNaN(sumPz) or fpconst.isNaN(sumPv):
75 raise ZeroProblem, "NaN while computing zero: sumPz sumPv: ipass=%d" % ipass
76 zz = (1-speed)*zz + speed * numpy.sum(pz*x) / sumPz
77 sz2 = max(1e-4*sigma2, (1-speed)*sz2 + speed*numpy.sum(pz*numpy.square(x-zz))/sumPz)
78 sv2 = max(sz2, (1-speed)*sv2 + speed*numpy.sum(pv*numpy.square(x-zz))/sumPv)
79 if debug is not None:
80 debug(x, pz, zz, fz, sz2, sv2, ipass)
81
82 q = (ozz-zz)**2/(sz2+osz2) + math.log(sz2/osz2)**2 + math.log(sv2/osv2)**2 + math.log(fz/ofz)**2
83 ipass += 1
84 if debug is not None:
85 debug(x, pz, zz, fz, sz2, sv2, -1)
86 sz = math.sqrt(sz2)
87 sv = math.sqrt(sv2)
88 if ipass >= PASSLIM:
89 raise ZeroProblem, "Loudness zero does not converge: %g %g %g %g" % (zz, sz, sv, fz)
90 if sv<3*sz or zz<0.10*sz or zz>sv/2.0 or fz>0.9:
91 raise ZeroProblem, "Strange loudness zero: %g %g %g %g" % (zz, sz, sv, fz)
92 return (zz, sz, sv, fz)
93
94
96 """@param x: a time-series of loudness.
97 @return: An estimate of the background noise level
98 @rtype: float
99 """
100 zz, sz, e, Pz = find_zero(x, debug=debug)
101 return zz
102
103
105 return numpy.maximum(x**3 - loud_zero(x)**3, 0.0)**(1./3.)
106
107
109 assert len(x.shape)==2 and x.shape[0]<60
110 loud = numpy.sum(x, axis=0)
111 zz, sz, e, Pz = find_zero(loud)
112 fz = Pz.sum()
113 return numpy.sum(x*Pz[numpy.newaxis,:], axis=1)/fz
114
115
117 pylab = load_mod.load_named('pylab')
118 for i in range(x.shape[0]):
119 pylab.plot(x[i,:])
120 pylab.title(str(ex))
121 pylab.show()
122
123
124 if __name__ == '__main__':
125 import sys
126 import pylab
127 import gpkimgclass
128 d = gpkimgclass.read(sys.argv[1])
129 x = d.column(0)/numpy.average(d.column(0))
130 - def debug(x, pz, zz, fz, sz2, sv2, i):
131 if debug < 0:
132 return
133 pylab.figure(i)
134 pylab.plot(x, 'b-')
135 pylab.plot(pz, 'r-')
136 pylab.title('zz=%g fz=%.3f sz=%.3f sv=%.3f, i=%d' % (zz, fz, math.sqrt(sz2), math.sqrt(sv2), i))
137 try:
138 zz, sz, sv, pz = find_zero(x, debug)
139 except ZeroProblem, x:
140 print "Zero problem: %s" % x
141 pylab.show()
142