Module feature_vec
[frames] | no frames]

Source Code for Module feature_vec

  1  #!/usr/bin/env python 
  2   
  3  import os 
  4  import sys 
  5  import math as M 
  6  from gmisclib import Num 
  7  from gmisclib import die 
  8  # from gmisclib import erb_scale 
  9  from gmisclib import avio 
 10  from gmisclib import cache as CC 
 11  import gpkavg 
 12  import gpkimgclass 
 13   
 14  # from gpk_voicing import percep_spec 
 15  # from gpk_voicing import voice_misc 
 16  from gpk_voicing import fv_misc as FVM 
 17  # import irregularity 
 18  # sys.path.insert(0, '%s/tick1/bin' % os.environ['MRImodel']) 
 19   
 20  if 'PYLABDISP' in os.environ: 
 21          if os.environ['PYLABDISP'] == 'pylab': 
 22                  import pylab 
 23          else: 
 24                  import g_pylab as pylab 
 25          PLOT = True 
 26  else: 
 27          PLOT = False 
 28   
 29  ROOT = '/tmp/fv_cache' 
 30   
 31   
 32   
 33  # SpecExp = 0.0 
 34   
 35   
 36   
 37   
38 -def average(x):
39 vv = [] 40 ww = [] 41 for (v,wt) in x: 42 vv.append(v) 43 ww.append(wt) 44 return gpkavg.avg(vv, ww, 0.0)[0]
45
46 -def median(x):
47 vv = [] 48 ww = [] 49 for (v,wt) in x: 50 vv.append(v) 51 ww.append(wt) 52 return gpkavg.avg(vv, ww, 0.499)[0]
53 54
55 -def weight(x):
56 sum = 0.0 57 for (v,wt) in x: 58 sum += wt**2 59 return M.sqrt(sum)
60 61
62 -class matmult_xform(object):
63 from gmisclib import chunkio
64 - def __init__(self, filename):
65 self._name = filename 66 self.xform = self.chunkio.datachunk(open(filename, 'r')).read_NumArray()
67
68 - def name(self):
69 return self._name
70
71 - def describe_xform(self, descr):
72 pass
73
74 - def operate(self, o, descr):
75 Num.matrixmultiply(o, self.xform, o) 76 for i in range(o.shape[1]): 77 descr[i] = {'id': 'Mixture%d'% i}
78 79 80
81 -def describe_xform(descr, fac):
82 nin, nout = fac.shape 83 assert len(descr) == nin 84 for i in range(nout): 85 norm = M.sqrt(Num.average(fac[:,i]**2)) 86 nf = fac[:,i]/norm 87 tmp = [] 88 fwt = [] 89 wwt = [] 90 edginess = [] 91 voiciness = [] 92 burstiness = [] 93 friciness = [] 94 vness = [] 95 for j in range(nin): 96 dj = descr[j] 97 if dj.has_key('erb'): 98 fwt.append( (dj['erb'], abs(nf[j])) ) 99 if dj.has_key('width'): 100 wwt.append( (dj['width'], abs(nf[j])) ) 101 if 'edge' in dj['type']: 102 edginess.append( abs(nf[j]) ) 103 if 'burst' in dj['type']: 104 burstiness.append( abs(nf[j]) ) 105 if 'fricative' in dj['type']: 106 friciness.append( abs(nf[j]) ) 107 if 'vowel' in dj['type']: 108 vness.append( abs(nf[j]) ) 109 if 'haspitch' in dj['type']: 110 voiciness.append( abs(nf[j]) ) 111 if abs(nf[j]) > 0.1: 112 tmp.append('%+.2f*%s' % (nf[j], dj['id']) ) 113 print '# FV[%d]= %s' % (i, ' '.join(tmp)) 114 fb = {'voicing': M.sqrt(Num.sum(Num.array(voiciness)**2)), 115 'edges': M.sqrt(Num.sum(Num.array(edginess)**2)), 116 'burst': M.sqrt(Num.sum(Num.array(burstiness)**2)), 117 'fricative': M.sqrt(Num.sum(Num.array(friciness)**2)), 118 'vowel': M.sqrt(Num.sum(Num.array(vness)**2)), 119 'meanfreq': average(fwt), 'medfreq': median(fwt), 120 'wt_freq': weight(fwt), 121 'meanwidth': average(wwt), 'medwidth': median(wwt), 122 'wt_width': weight(wwt), 123 'channel': i, 'description': ' '.join(tmp), 124 'ltype': 'fb' 125 } 126 print avio.concoct(fb)
127 128 129 130
131 -def run(argv):
132 Lf = 1.5 133 Elf = 1.5 134 Nsv = FVM.NSV 135 # Nsv and LF updated from the 5 April 2007 optimization 136 # in .../m/ASR/OPT 137 DT = 0.01 # Seconds. Default output sampling interval. 138 Scale = None 139 arglist = argv[1:] 140 arglist0 = arglist 141 column = None 142 signalfile = None 143 outfile = None 144 Cache = None 145 verbose = 0 146 kwargs = {} 147 import gpk_voicing.fvcurrent as fvm 148 extrahdr = {} 149 while arglist and arglist[0].startswith('-'): 150 arg = arglist.pop(0) 151 if arg == '--': 152 break 153 elif arg == '-DT' or arg == '-dt': 154 DT = float(arglist.pop(0)) 155 elif arg == '-f': 156 signalfile = arglist.pop(0) 157 elif arg == '-c' or arg == '-col': 158 tmp = arglist.pop(0) 159 try: 160 column = int( tmp ) 161 except ValueError: 162 column = tmp 163 die.note("signalfile", signalfile) 164 elif arg == '-o': 165 outfile = arglist.pop(0) 166 elif arg == '-v': 167 verbose += 1 168 elif arg == '-cache': 169 Cache = True 170 elif arg == '-Nsv': 171 FVM.NSV = float(arglist.pop(0)) 172 # elif arg == '-SpecExp': 173 # SpecExp = float(arglist.pop(0)) 174 elif arg == '-sfv': 175 import gpk_voicing.fvsimple as fvm 176 kwargs['do_voicing'] = 1 177 kwargs['do_dissonance'] = True 178 kwargs['ELF'] = Elf 179 elif arg == '-ssfv': 180 import gpk_voicing.fvss as fvm 181 kwargs['do_dissonance'] = False 182 kwargs['do_voicing'] = 1 183 kwargs['ELF'] = Elf 184 elif arg == '-specfv': 185 import gpk_voicing.fvss as fvm 186 kwargs['do_dissonance'] = False 187 kwargs['do_voicing'] = 0 188 kwargs['PlompBouman'] = False 189 kwargs['ELF'] = Elf 190 elif arg == '-fv20071030': 191 import gpk_voicing.fv20071030 as fvm 192 kwargs['do_dissonance'] = True 193 kwargs['do_voicing'] = 1 194 kwargs['ELF'] = Elf 195 elif arg == '-fv20080325': 196 import gpk_voicing.fv20080325 as fvm 197 kwargs['do_dissonance'] = True 198 kwargs['do_voicing'] = 1 199 kwargs['ELF'] = Elf 200 elif arg == '-fv200807jyuan': 201 import gpk_voicing.fv200807jyuan as fvm 202 kwargs['do_voicing'] = 1 203 kwargs['do_dissonance'] = True 204 kwargs['ELF'] = Elf 205 elif arg == '-fv200811align': 206 import gpk_voicing.fv200811align as fvm 207 kwargs['do_voicing'] = 2 208 kwargs['do_dissonance'] = True 209 kwargs['ELF'] = Elf 210 elif arg == '-fv200812align': 211 import gpk_voicing.fv200812align as fvm 212 kwargs['do_voicing'] = 2 213 kwargs['do_dissonance'] = True 214 Lf = 1.0 215 Elf = 1.0 216 kwargs['ELF'] = Elf 217 elif arg == '-fv200908opt': 218 import gpk_voicing.fv200908opt as fvm 219 kwargs['do_voicing'] = 1 220 kwargs['do_dissonance'] = False 221 kwargs['ELF'] = Elf 222 elif arg.startswith('-fv200909opt'): 223 import gpk_voicing.fv200909opt as fvm 224 kwargs['do_voicing'] = 1 225 kwargs['do_dissonance'] = False 226 Nsv, Lf, Elf = fvm.Scale.get_many('nsv', 'lf,lf', 'lf,elf') 227 if arg.endswith('S'): 228 Scale = fvm.Scale 229 kwargs['ELF'] = Elf 230 elif arg.startswith('-fv201003opt'): 231 import gpk_voicing.fv201003opt as fvm 232 kwargs['do_voicing'] = 1 233 kwargs['do_dissonance'] = False 234 Nsv, Lf, Elf = fvm.Scale.get_many('nsv', 'lf,lf', 'lf,elf') 235 if arg.endswith('S'): 236 Scale = fvm.Scale 237 kwargs['ELF'] = Elf 238 elif arg.startswith('-fv201102'): 239 import gpk_voicing.fv201102 as fvm 240 Nsv, Lf = fvm.Scale.get_many('norm,nsv', 'lf,lf') 241 kwargs['tdif'], kwargs['tdif2'] = fvm.Scale.get_many('norm,tdif', 'norm,tdif2') 242 kwargs['fdif2'], kwargs['FORMANT_HIGH'] = fvm.Scale.get_many('norm,fdif2', 'FORMANT_HIGH') 243 if arg.endswith('S'): 244 Scale = fvm.Scale 245 elif arg == '-LF': 246 Lf = float(arglist.pop(0)) 247 elif arg == '-ELF': 248 Elf = float(arglist.pop(0)) 249 elif arg == '-novoicing': 250 kwargs['do_voicing'] = 0 251 elif arg == '-Xform': 252 Scale = matmult_xform(arglist.pop(0)) 253 elif arg == '-write': 254 extrahdr = dict( [q.strip().split('=', 1) 255 for q in arglist.pop(0).split(';') ] 256 ) 257 else: 258 die.info("Unrecognized flag: %s" % arg) 259 print __doc__ 260 die.exit(1) 261 if arglist and signalfile is None: 262 signalfile = arglist.pop(0) 263 if arglist and outfile is None: 264 outfile = arglist.pop(0) 265 elif outfile is None: 266 outfile = 'fv.dat' 267 if signalfile is None: 268 die.info("No signal file specified.") 269 print __doc__ 270 die.exit(1) 271 if arglist: 272 die.info('Extra arguments!') 273 print __doc__ 274 die.exit(1) 275 signal = gpkimgclass.read(signalfile) 276 if column is None and signal.d.shape[1]==1: 277 column = 0 278 elif column is None: 279 die.die("There are %d channels in %s, so you must specify one with '-c'." % (signal.d.shape[1], signalfile)) 280 try: 281 signal.column(column) 282 except KeyError: 283 die.info("File has %d columns" % signal.n[1]) 284 die.die("Bad column: %s" % str(column)) 285 286 if Cache is not None: 287 kwargs['cache_info'] = CC.cache_info(root=ROOT, file=signalfile, info=(column,)) 288 o, descr, DTx, tshift = fvm.feature_vec(signal.column(column), signal.dt(), DT, 289 Nsv=Nsv, LF=Lf, **kwargs 290 ) 291 o = Num.transpose(o) 292 hdr = signal.hdr.copy() 293 if Scale is not None: 294 Scale.operate(o, descr) 295 hdr['XFORM'] = Scale.name() 296 if verbose: 297 Scale.describe_xform(descr) 298 299 hdr['program'] = argv[0] 300 hdr['ARGV'] = arglist0 301 hdr['input_file'] = signalfile 302 hdr['column'] = column 303 hdr['CDELT2'] = DTx 304 hdr['CRPIX2'] = 1 305 hdr['CRVAL2'] = signal.start() + tshift 306 hdr['CDELT1'] = 1 307 hdr['BITPIX'] = -32 308 hdr['DataSamplingFreq'] = 1.0/signal.dt() 309 hdr['Nsv'] = FVM.NSV 310 hdr['LF'] = Lf 311 hdr['ELF'] = Elf 312 hdr.update( extrahdr ) 313 314 if PLOT: 315 pylab.matshow(Num.transpose(o)) 316 pylab.show() 317 assert o.shape[1] == len(descr), "Length mismatch data=%s descr=%d" % ( 318 o.shape, len(descr) 319 ) 320 for (i,d) in enumerate(descr): 321 hdr['TTYPE%d' % (i+1)] = d['id'] 322 hdr['F_INFO%d' % (i+1)] = avio.concoct(d) 323 hdr['RMS%d' % (i+1)] = M.sqrt(Num.average(o[:,i]**2)) 324 gpkimgclass.gpk_img(hdr, o).write(outfile)
325 326 327 328 if __name__ == '__main__': 329 # try: 330 # import psyco 331 # psyco.full() 332 # except ImportError: 333 # pass 334 335 run(sys.argv) 336