Package lib :: Module emphasis
[frames] | no frames]

Source Code for Module lib.emphasis

  1  #!/usr/bin/env python 
  2   
  3  """Perceptual loudness measurement. 
  4  This is an implementation of 
  5  "Percieved Level of Noise by Mark VII and Decibels (E)" 
  6  S. S. Stevens, 
  7  J. Acoustical Soc. Am. v. 51(2, part 2) 1972 
  8  pages 575-602. 
  9   
 10  The algorithm matches the paper for FractOct==ThirdOct or OneOct. 
 11  The algorithm assumes that the level defined by SIXTY_EIGHT 
 12  corresponds to 68dB relative to 20 micro-Newtons per square meter 
 13  sound pressure. 
 14   
 15  USER INFORMATION: 
 16   
 17  You'll find, somewhere, a subdirectory named 
 18  speechresearch/voicing . 
 19  Within this is a script called emphasis_stevens.py . 
 20  This can be run as 
 21  python emphasis_stevens.py -o outputfile -write BITPIX=0 -c channelnumber inputfile 
 22  (there are a couple of other flags, too). 
 23   
 24  It will then read the input file (which is an audio recording) 
 25  and produce a time-series of the loudness.   Data input and 
 26  output is via the gpkio library, in .../speechresearch/gpkio. 
 27  That library has many virtues, but reading WAV files is not 
 28  one of them, so you have to convert .wav files to 
 29  the "GPK ASCII image" format (or one of several other formats), 
 30  using  .../speechresearch/lib/wavio.py . 
 31   
 32  The output will be in an ASCII version of the format, 
 33  which should be reasonably intelligible.    (The GPK ASCII image 
 34  format is based on the FITS astronomy format from NASA.) 
 35   
 36  However, that script uses the gpkio and gpklib libraries 
 37  (also under ../speechresearch).  These need to be compiled, 
 38  and it uses the gpk_img_python package that needs to be 
 39  installed via   "python setup.py install". 
 40  Oh, and .../speechresearch/gmisclib needs to be in 
 41  your PYTHONPATH. 
 42   
 43  Give it a try, and I'll be happy to help, and will 
 44  incorporate the troubles you have into some form of 
 45  documentation.    Sorry, I have nothing better yet. 
 46  """ 
 47   
 48  import math 
 49  import numpy 
 50  from gmisclib import Numeric_gpk 
 51  from gmisclib import die 
 52  from gmisclib import gpkmisc 
 53   
 54  from gpk_voicing import voice_misc 
 55   
 56   
 57   
 58  FAC = 0.5 
 59  ThirdOct = 2.0**(1.0/3.0) 
 60  OneOct = 2.0 
 61  FracOct = 2.0**0.7 
 62  Fmax = 12500.0 
 63  Fmin = 50.0 
 64  EXFAC = 2 
 65  E = 0.3333 
 66   
 67  # 1.0 would cheat on the low frequency end a little. 
 68  TAUFAC = 1.2 
 69   
 70   
 71  SIXTY_EIGHT = 6000.0 
 72  E = 0.3333 
 73   
 74   
 75   
76 -class extra_c:
77 - def __init__(self, fmin, fmax, frac_oct, ffac):
78 """ 79 @param ffac: How far apart should be frequency bins, relative to their width. 80 """ 81 self.frac_oct = frac_oct 82 self.fmin = fmin * frac_oct**0.5 # Put lower edge at fmin 83 fmax *= frac_oct**-0.5 # Put upper edge at fmax 84 self.n = int(round(math.log(fmax/fmin)/(math.log(frac_oct)*ffac))) 85 #: Fsum is correct at 68Db SOUND PRESSURE LEVEL AT 3150Hz 86 self.Fsum = 0.23*((1+math.log(self.frac_oct)/math.log(ThirdOct))/2.0) 87 self.Band_ctr = [ self.band_ctr(i*FAC) for i in range(self.n) ] 88 self.Band_h_width = [ 0.5*(self.band_ctr(i*FAC+0.5) - self.band_ctr(i*FAC-0.5)) for i in range(self.n) ] 89 self.ffac = ffac 90 self.Ear = self.calc_ear()
91
92 - def band_ctr(self, i):
93 return self.fmin * (self.frac_oct**i)
94 95
96 - def calc_ear(self):
97 """At 68dB SPL.""" 98 F0 = 50.0 99 app_A = [0.729, 1.33, 2.28, 2.92, 3.65, 4.47, 5.40, 100 6.40, 7.48, 8.64, 8.64, 8.64, 8.64, 8.64, 8.64, 101 10.1, 11.8, 13.7, 16.0, 16.0, 16.0, 16.0, 16.0, 102 11.8, 8.64 ] 103 o = numpy.zeros((self.n,)) 104 for i in range(self.n): 105 f = self.Band_ctr[i] 106 idx = math.log(f/F0)/math.log(self.frac_oct) 107 iidx = int(round(idx)) 108 # print i, f, iidx, len(app_A) 109 assert iidx>=0 and iidx<len(app_A) 110 fidx = idx - iidx 111 if abs(fidx) < 0.01: 112 o[i] = app_A[iidx] 113 elif fidx < 0: 114 assert i > 0 115 o[i] = app_A[iidx]*(1-abs(fidx)) + app_A[iidx-1]*abs(fidx) 116 else: 117 assert i+1 < len(app_A) 118 o[i] = app_A[iidx]*(1-abs(fidx)) + app_A[iidx+1]*abs(fidx) 119 return o
120 121 Fullband = extra_c(Fmin, Fmax, FracOct, FAC) 122 Speechband = extra_c(300.0, 4000.0, FracOct, FAC) 123
124 -def filter_fcn(f, fc, w):
125 Exp = 6 # 18 dB/octave 126 return 1.0/(((f-fc)/w)**Exp + 1)
127 128 129 _ff_cache = {}
130 -def cached_filter_fcn(f, fc, w):
131 key = (f.shape, fc, w) 132 try: 133 return _ff_cache[key] 134 except KeyError: 135 pass 136 tmp = filter_fcn(f, fc, w) 137 _ff_cache[key] = tmp 138 return tmp
139 140 141 142
143 -def one_loud(d, extra):
144 """Approximate loudness of the sound in data array d. 145 Extra contains misc. parameters.""" 146 147 n = d.shape[0] 148 dt = extra.dt 149 assert d.shape == (n,) 150 ss = numpy.fft.fft(Numeric_gpk.zero_pad_end(d*voice_misc.window(n, 0), EXFAC-1)) 151 ss[0] = 0 # Remove DC offset. 152 # print "# ss.shape=", ss.shape 153 f = numpy.absolute(voice_misc.fft_freq(ss.shape[0], d=dt)) 154 # assert abs(gpkmisc.N_maximum(f)-0.5/dt) < 0.05/dt 155 nyquist = 0.5/dt 156 q = numpy.zeros((extra.n,)) 157 for i in range( extra.n ): 158 fc = extra.Band_ctr[i] 159 w = extra.Band_h_width[i] 160 if fc+w > nyquist: 161 break 162 q[i] = numpy.square(numpy.absolute(ss * cached_filter_fcn(f, fc, w))).sum() 163 S = extra.Ear * (q**E) 164 Smax = S[S.argmax()] 165 Ssum = S.sum()/extra.ffac 166 loud = Smax + extra.Fsum*(Ssum - Smax) 167 return loud
168 169
170 -def printit(title, vec):
171 vt = ' '.join(["%6g"%q for q in vec]) 172 print title, vt
173 174
175 -def emphasis(data, extra, DT=0.01):
176 """Measure the time-series of loudness of a data array data. 177 DT is the sampling interval for the resulting loudness time series.""" 178 ns = int(math.floor(extra.dt*data.shape[0]/DT)) 179 o = numpy.zeros((ns,)) 180 for t in range(ns): 181 s, e = voice_misc.start_end(t*DT, extra.dt, extra.window, data.shape[0]) 182 o[t] = one_loud(data[s:e], extra) 183 return o
184 185
186 -def simple_emphasis(data, dt, DT=0.01, extra=Fullband):
187 extra.dt = dt 188 wmin = 2*extra.Band_h_width[0] 189 # Ideally, we'd use a different window size for each frequency... 190 taumax = max(TAUFAC / wmin, 1.5*DT) 191 extra.window = voice_misc.near_win_size(near=taumax/extra.dt, tol=0.1*taumax/extra.dt) 192 # print 'win', extra.window, taumax/extra.dt, wmin, extra.Band_h_width[0], extra.Band_ctr[0] 193 return emphasis(data/SIXTY_EIGHT, extra, DT)
194 195
196 -def speechband_loudness(data, dt, DT=0.01):
197 return simple_emphasis(data, dt, DT, extra=Speechband)
198