Package lib :: Module fv200811align
[frames] | no frames]

Source Code for Module lib.fv200811align

  1  #!/usr/bin/env python 
  2   
  3   
  4  from gmisclib import erb_scale 
  5  from gmisclib import Num 
  6  from gpk_voicing import percep_spec 
  7  from gpk_voicing import fv_misc as M 
  8  SillyWidthException = M.SillyWidthException 
  9   
 10   
11 -def feature_vec(data, dt, DT, 12 LF=1.0, Nsv=M.NSV, ELF=1.0, 13 do_voicing=1, do_dissonance=True, 14 PlompBouman=False, do_pdur=True):
15 FORMANT_LOW = erb_scale.f_to_erb(200.0) 16 FORMANT_HIGH = erb_scale.f_to_erb(4000.0) 17 assert DT > 0.0 and float(DT)>0.0 18 assert LF > 0.0 and float(LF)>0.0 19 bmin = erb_scale.f_to_erb(80.0) 20 bmax = erb_scale.f_to_erb(6000.0) 21 ectrs, neural, t0 = percep_spec.perceptual_spec(data, dt, DT, 22 bmin, bmax, M.DB, 23 do_mod=do_voicing, 24 do_dissonance=do_dissonance, 25 PlompBouman=PlompBouman 26 ) 27 28 band_indices = [i for (i,ec) in enumerate(ectrs) if ec['type']=='band'] 29 neural_b = neural.take(band_indices, axis=0) 30 assert neural.shape[1]==neural.shape[1] 31 32 neural_now = Num.average(neural_b, axis=0) # Average over frequency. 33 assert neural_now.shape[0] == neural.shape[1] 34 neural_avg = Num.sum(neural_now**2)/Num.sum(neural_now) # Average over time. 35 # neural_avg is a scalar, grand average. 36 Num.divide(neural, neural_avg, neural) 37 # Now, we've normalized by an over-all average loudness. 38 Num.divide(neural_now, neural_avg, neural_now) 39 # Now, we've normalized by an over-all average loudness. 40 41 assert neural_b.shape[0] <= neural.shape[1] 42 for (i,e) in enumerate(ectrs): 43 if e['type'] == 'haspitch': 44 Num.divide(neural[i,:], neural_avg, neural[i,:]) 45 if e['type'] == 'dissonance': 46 Num.divide(neural[i,:], neural_avg, neural[i,:]) 47 48 # print '# neural_avg=', neural_avg 49 o = [] 50 descr = [] 51 wset = set() 52 for vl in [0.06]: 53 # print 'vl=', vl, type(vl), 'LF=', LF, type(LF), 'DT=', DT, type(DT) 54 w = int(round(vl*LF/DT)) 55 if not w in wset: 56 tmpo, tmpd = M.vowel(w, ectrs, neural, neural_now, Nsv, 57 formant_low=FORMANT_LOW, 58 formant_high=FORMANT_HIGH 59 ) 60 o.extend(tmpo) 61 descr.extend(tmpd) 62 wset.add(w) 63 assert len(descr)==len(o), "Descriptor mismatch" 64 65 wset = set() 66 for fl in [0.02]: 67 w = int(round(fl*ELF/DT)) 68 if not w in wset: 69 tmpo, tmpd = M.fricative(w, ectrs, neural, neural_now, Nsv) 70 o.extend(tmpo) 71 descr.extend(tmpd) 72 wset.add(w) 73 assert len(descr)==len(o), "Descriptor mismatch" 74 75 wset = set() 76 for fel in [0.04]: 77 w = int(round(fel*ELF/DT)) 78 if not w in wset: 79 tmpo, tmpd = M.fricative_edge(w, ectrs, neural, neural_now, Nsv, 80 do_abs=False 81 ) 82 o.extend(tmpo) 83 descr.extend(tmpd) 84 wset.add(w) 85 assert len(descr)==len(o), "Descriptor mismatch" 86 87 wset = set() 88 for sel in [0.03]: 89 w = int(round(sel*LF/DT)) 90 if not w in wset: 91 tmpo, tmpd = M.spectral_entropy(w, ectrs, neural, neural_now, Nsv) 92 o.extend(tmpo) 93 descr.extend(tmpd) 94 wset.add(w) 95 assert len(descr)==len(o), "Descriptor mismatch" 96 assert len(descr)==len(o), "Descriptor mismatch" 97 98 99 if do_voicing: 100 wset = set() 101 for hpl in [0.02]: 102 w = int(round(hpl*LF/DT)) 103 if not w in wset: 104 tmpo, tmpd = M.haspitch(w, ectrs, neural, neural_now, Nsv) 105 o.extend(tmpo) 106 descr.extend(tmpd) 107 wset.add(w) 108 assert len(descr)==len(o), "Descriptor mismatch" 109 if do_dissonance: 110 wset = set() 111 for dsl in [0.06]: 112 w = int(round(dsl*LF/DT)) 113 if not w in wset: 114 tmpo, tmpd = M.dissonance(w, ectrs, neural, neural_now, Nsv) 115 o.extend(tmpo) 116 descr.extend(tmpd) 117 wset.add(w) 118 119 assert len(descr)==len(o), "Descriptor mismatch" 120 if do_pdur: 121 tmpo, tmpd = M.pdur(ectrs, neural, neural_now, DT) 122 o.extend(tmpo) 123 descr.extend(tmpd) 124 125 assert len(descr)==len(o), "Descriptor mismatch" 126 N = neural[0].shape[0] 127 for (i, (tmp, dsc)) in enumerate(zip(o, descr)): 128 assert tmp.shape == (N,), "Wrong size: %d, %s = %d vs. %d" % (i, str(dsc), tmp.shape[0], N) 129 130 return (o, descr, DT, t0)
131