Package lib :: Module fv201105
[frames] | no frames]

Source Code for Module lib.fv201105

  1  #!/usr/bin/env python 
  2   
  3  """Duration estimator for speech. 
  4   
  5  Usage: s_slope [flags] 
  6  Flags: -f FFF opens file FFF for reading. 
  7          -c XXX  selects column XXX (either a data name or an integer). 
  8          -o FFF  sets the output file. 
  9          -dt #.##   Sets the interval for calculating the output. 
 10   
 11  It takes a local spectrum, 
 12  bins it onto the Bark scale, 
 13  converts to perceptual loudness (via **E). 
 14  Then, it computes a measure of how far you can go 
 15  from each point before the spectrum changes too much. 
 16  """ 
 17   
 18  import math 
 19  import numpy 
 20  # from gmisclib import die 
 21  from gmisclib import cache as CC 
 22  from gmisclib import gpkmisc 
 23  from gmisclib import erb_scale 
 24   
 25  from gpk_voicing import percep_spec as PS 
 26  from gpk_voicing import fv_misc as FVM 
 27  from gpk_voicing import voice_misc as VM 
 28  SillyWidthException = FVM.SillyWidthException 
 29  NSV = 0.75 
 30   
 31  _wincache = {} 
 32  _WIN_CACHE_SIZE = 20 
33 -def win(n):
34 key = n 35 if len(_wincache) > _WIN_CACHE_SIZE: 36 _wincache.pop() 37 if key in _wincache: 38 return _wincache[key] 39 Norm = 1 40 if not n > 0: 41 raise SillyWidthException, n 42 def inner_window(n, k, norm=2): 43 assert k==0 44 opn0 = VM.cont_kernel_c(n) 45 w = opn0.P(0) 46 # die.info("win.inner_window.w.shape=%s w0.shape=%s" % (w.shape, w0.shape)) 47 if norm > 0: 48 numpy.divide(w, (numpy.absolute(w)**norm).sum()**(1.0/Norm), w) 49 return w
50 rv = VM.cont_kernel(n, 0, norm=Norm, kernel=inner_window) 51 _wincache[key] = rv 52 return rv 53 54
55 -def avg_spec(b):
56 ALPHA = -1.0 57 E = 0.333 58 f = erb_scale.erb_to_f(b)/1000.0 59 bw = erb_scale.ebw(b)/100.0 60 return (f**ALPHA * bw)**E
61 62
63 -def feature_vec_guts(data, dt, Dt, do_irx=False, 64 LF=1.0, fdif2=0.0, fdif2x=0.0, Nsv=NSV, 65 cache_info=None, ps_cache_info=None 66 ):
67 FORMANT_LOW = erb_scale.f_to_erb(200.0) 68 FORMANT_HIGH = erb_scale.f_to_erb(6000.0) 69 assert Dt > 0.0 and float(Dt)>0.0 70 bmin = FORMANT_LOW - 0.5*FVM.DB 71 bmax = FORMANT_HIGH + 0.5*FVM.DB 72 width = 0.04 * LF / Dt 73 if do_irx: 74 width = max(1, int(round(width))) 75 cs = win(width) 76 css = cs.sum() 77 f_kernel = numpy.array([-fdif2x, -fdif2, 1+2*fdif2+2*fdif2x, -fdif2, -fdif2x]) 78 79 if ps_cache_info is not None: 80 ic_i = ps_cache_info 81 else: 82 ic_i = cache_info 83 84 t0, neural, ectrs, nneural, nectrs = \ 85 FVM.normalize_neural(data, dt, Dt, bmin=bmin, bmax=bmax, db=FVM.DB, 86 do_mod=0, do_dissonance=False, 87 PlompBouman=False, 88 norm_kernel=cs/css, Nsv=Nsv, t_kernel=cs/css, 89 f_kernel=f_kernel, 90 cache_info=ic_i 91 ) 92 o = [] 93 descr = [] 94 nsum = numpy.zeros((neural.shape[1],)) 95 nna = 0 96 avss = 0.0 97 for (i, e) in enumerate(ectrs): 98 if e['type']=='band' and FORMANT_LOW < e['erb'] < FORMANT_HIGH: 99 numpy.add(nsum, neural[i,:], nsum) 100 avss += avg_spec(e['erb']) 101 nna += 1 102 dtmp = e.copy() 103 del dtmp['erb'] 104 del dtmp['fc'] 105 dtmp['type'] = 'loudness' 106 dtmp['width'] = width 107 dtmp['Kentropy'] = gpkmisc.entropy(numpy.absolute(cs)) 108 dtmp['Fentropy'] = math.log(nna) 109 dtmp['t_symmetry'] = 1 110 dtmp['id'] = '%s:%d' % (dtmp['type'], width) 111 descr.append(dtmp) 112 o.append(nsum/nna) 113 for (i, e) in enumerate(ectrs): 114 if e['type']=='band' and FORMANT_LOW < e['erb'] < FORMANT_HIGH: 115 tmp = neural[i,:] - nsum * (avg_spec(e['erb'])/avss) 116 # pylab.plot(tmp) 117 # print 'RMS', math.sqrt(Num.square(tmp).sum()/tmp.shape[0]) 118 dtmp = e.copy() 119 dtmp['type'] = 'vowel' 120 dtmp['width'] = width 121 dtmp['Kentropy'] = gpkmisc.entropy(numpy.absolute(cs)) 122 dtmp['Fentropy'] = 0.0 123 dtmp['t_symmetry'] = 1 124 dtmp['a_scaling'] = 1 125 # pylab.title('vowel %s' % dtmp['id']) 126 # pylab.figure() 127 o.append(tmp) 128 dtmp['id'] = '%s:%d:%.1f' % (dtmp['type'], width, e['erb']) 129 descr.append( dtmp ) 130 assert len(descr)==len(o), "Descriptor mismatch" 131 return (o, descr, Dt, t0)
132 133
134 -def feature_vec(data, dt, DT, LF=1.0, Nsv=NSV, fdif2=0.0, fdif2x=0.0, 135 do_irx=False, cache_info=None, ps_cache_info=None):
136 o = None 137 ci = None 138 if cache_info is not None: 139 assert isinstance(cache_info, CC.cache_info) 140 ci = cache_info.addinfo(dt, DT, LF, Nsv, fdif2, fdif2x, do_irx, 'fv201105-2') 141 try: 142 o = ci.load() 143 except ci.Errors: 144 pass 145 if o is None: 146 o = feature_vec_guts(data, dt, DT, LF=LF, Nsv=Nsv, 147 do_irx=do_irx, fdif2=fdif2, fdif2x=fdif2x, 148 cache_info=cache_info, ps_cache_info=ps_cache_info 149 ) 150 if ci is not None: 151 ci.bg_dump(o) 152 return o
153 154 155 156 157 Opt_text = """ 158 # RUN 1 159 # summarize_logs -uid UID -fromstart -best logS_advLImMDREf,-fv201102_4.av 160 # len(currentlist)= 26 tail= 0.0 nsamp= 26 161 # max(logS_advLImMDREf,-fv201102_4.av) = 376.74 162 # samples used = 1 filename= logS_advLImMDREf,-fv201102_4.av 163 376.74 logp 164 # n= 39 165 26.031 Scale,vowel:.*:10.4 166 0.650176 Scale,vowel:.*:11.1 167 0.994825 Scale,vowel:.*:11.9 168 3.65683 Scale,vowel:.*:12.6 169 12.2741 Scale,vowel:.*:13.3 170 0.23901 Scale,vowel:.*:14.0 171 1.53292 Scale,vowel:.*:14.7 172 7.75427 Scale,vowel:.*:15.4 173 0.784154 Scale,vowel:.*:16.1 174 3.23822 Scale,vowel:.*:16.8 175 3.84116 Scale,vowel:.*:17.5 176 12.1566 Scale,vowel:.*:18.2 177 6.9634 Scale,vowel:.*:18.9 178 4.90421 Scale,vowel:.*:19.6 179 3.72337 Scale,vowel:.*:20.3 180 0.0150866 Scale,vowel:.*:21.0 181 6.29358 Scale,vowel:.*:21.8 182 3.50677 Scale,vowel:.*:22.5 183 1.94812 Scale,vowel:.*:23.2 184 17.1486 Scale,vowel:.*:23.9 185 1.76959 Scale,vowel:.*:24.6 186 1.27658 Scale,vowel:.*:25.3 187 0.439394 Scale,vowel:.*:26.0 188 0.185877 Scale,vowel:.*:26.7 189 12.2699 Scale,vowel:.*:6.2 190 10.2536 Scale,vowel:.*:6.9 191 5.84667 Scale,vowel:.*:7.6 192 3.5585 Scale,vowel:.*:8.3 193 0.405896 Scale,vowel:.*:9.0 194 2.4074 Scale,vowel:.*:9.7 195 2.94796 lf,lf 196 -0.302882 norm,fdif2 197 1.28682 norm,nsv 198 4.824 norm,tdif 199 0.16849 norm,tdif2 200 # 201 # RUN 2 202 # summarize_logs -best -fromstart -uid UID logS_advLImMDREf,-fv201102_2.av 203 # len(currentlist)= 20 tail= 0.0 nsamp= 20 204 # max(logS_advLImMDREf,-fv201102_2.av) = 348.56 205 # samples used = 1 filename= logS_advLImMDREf,-fv201102_2.av 206 348.56 logp 207 # n= 39 208 0.367388 Scale,vowel:.*:10.4 209 0.668657 Scale,vowel:.*:11.1 210 3.73234 Scale,vowel:.*:11.9 211 9.59746 Scale,vowel:.*:12.6 212 45.2418 Scale,vowel:.*:13.3 213 12.4549 Scale,vowel:.*:14.0 214 5.18612 Scale,vowel:.*:14.7 215 8.10955 Scale,vowel:.*:15.4 216 16.1181 Scale,vowel:.*:16.1 217 8.2858 Scale,vowel:.*:16.8 218 6.15389 Scale,vowel:.*:17.5 219 2.11999 Scale,vowel:.*:18.2 220 8.04381 Scale,vowel:.*:18.9 221 3.69372 Scale,vowel:.*:19.6 222 0.302413 Scale,vowel:.*:20.3 223 3.42239 Scale,vowel:.*:21.0 224 18.7842 Scale,vowel:.*:21.8 225 7.25486 Scale,vowel:.*:22.5 226 0.435129 Scale,vowel:.*:23.2 227 2.46633 Scale,vowel:.*:23.9 228 0.828264 Scale,vowel:.*:24.6 229 0.604598 Scale,vowel:.*:25.3 230 15.0723 Scale,vowel:.*:26.0 231 2.12652 Scale,vowel:.*:26.7 232 0.258391 Scale,vowel:.*:6.2 233 3.00883 Scale,vowel:.*:6.9 234 19.7981 Scale,vowel:.*:7.6 235 0.098635 Scale,vowel:.*:8.3 236 0.144953 Scale,vowel:.*:9.0 237 21.9714 Scale,vowel:.*:9.7 238 3.96667 lf,lf 239 -0.523753 norm,fdif2 240 0.703901 norm,nsv 241 4.49354 norm,tdif 242 -0.147517 norm,tdif2 243 4000 FORMANT_HIGH 244 """ 245 246 Scale = FVM.scale_xform(Opt_text, 247 name="2011-04-17 optimization on mace and cayenne:/home/gpk/ItakuraSaitoDistance/IS-DTW/logS_advLImMDREf,-fv201102/{1,2}" 248 ) 249