Package lib :: Module fv200912opt
[frames] | no frames]

Source Code for Module lib.fv200912opt

  1  #!/usr/bin/env python 
  2   
  3  """Feature vectors for optimized DTW alignment. 
  4  Exemplar project with Ladan Baghai-Ravary. 
  5  """ 
  6   
  7  import math 
  8  import numpy 
  9  from gmisclib import die 
 10  from gmisclib import erb_scale 
 11  from gpk_voicing import percep_spec 
 12  from gpk_voicing import fv_misc as M 
 13  from gpk_voicing import zero as Z 
 14  SillyWidthException = M.SillyWidthException 
 15   
 16  DB = 0.85 
 17   
 18  # Scale = M.scale_xform(Opt_text, 
 19                          # name="299010 optimization in /proj/exemplar/ItakuraSaitoDistance/IS-DTW/log_adv,,-fv200909opt_0.av" 
 20                          # ) 
 21   
 22   
 23   
24 -def feature_vec(data, dt, DT, 25 LF=1.0, Nsv=M.NSV, ELF=1.0, 26 do_voicing=1, do_dissonance=False, 27 PlompBouman=False, do_pdur=False, zerosub=0.0):
28 assert not do_pdur and not do_dissonance 29 FORMANT_LOW = erb_scale.f_to_erb(120.0) 30 FORMANT_HIGH = erb_scale.f_to_erb(5000.0) 31 assert float(DT)>0.0 32 assert float(LF)>0.0 33 bmin = erb_scale.f_to_erb(80.0) 34 bmax = erb_scale.f_to_erb(6000.0) 35 ectrs, neural, t0 = percep_spec.perceptual_spec(data, dt, DT, 36 bmin, bmax, DB, 37 do_mod=do_voicing, 38 do_dissonance=do_dissonance, 39 do_peakalign=True, 40 PlompBouman=PlompBouman 41 ) 42 43 assert 1 < neural.shape[0] < 200 44 band_indices = [i for (i,ec) in enumerate(ectrs) if ec['type']=='band'] 45 neural_b = neural.take(band_indices, axis=0) 46 assert neural_b.shape[1]==neural.shape[1] 47 if zerosub != 0.0: 48 neural_b = neural_b.copy() 49 try: 50 z = Z.percep_spec_zero(neural_b) 51 except Z.ZeroProblem: 52 die.warn("Bad utterance: zero does not converge.") 53 z = numpy.zeros((neural_b.shape[0],)) 54 assert neural_b.shape[0]==z.shape[0] 55 numpy.subtract(neural_b, z[:,numpy.newaxis], neural_b) 56 assert neural_b.shape[1]==neural.shape[1] 57 assert neural_b.shape[0]<=neural.shape[0] 58 59 neural_now = numpy.average(neural_b, axis=0) # Average over frequency. 60 assert neural_now.shape[0] == neural.shape[1] 61 neural_avg = numpy.sum(neural_now**2)/numpy.sum(neural_now) # Average over time. 62 # neural_avg is a scalar, grand average. 63 numpy.divide(neural, neural_avg, neural) 64 # Now, we've normalized by an over-all average loudness. 65 numpy.divide(neural_now, neural_avg, neural_now) 66 # Now, we've normalized by an over-all average loudness. 67 68 for (i,e) in enumerate(ectrs): 69 if e['type'] == 'haspitch': 70 numpy.divide(neural[i,:], neural_avg, neural[i,:]) 71 if e['type'] == 'dissonance': 72 numpy.divide(neural[i,:], neural_avg, neural[i,:]) 73 if e['type'] == 'peakalign': 74 numpy.divide(neural[i,:], neural_avg**2, neural[i,:]) 75 76 # print '# neural_avg=', neural_avg 77 o = [] 78 descr = [] 79 w = 0.04*LF/DT 80 tmpo, tmpd = M.vowel_edge(w, ectrs, neural, neural_now, Nsv, 81 do_abs=False, 82 formant_low=FORMANT_LOW, 83 formant_high=FORMANT_HIGH 84 ) 85 o.extend(tmpo) 86 descr.extend(tmpd) 87 assert len(descr)==len(o), "Descriptor mismatch" 88 89 w = 0.03*ELF/DT 90 tmpo, tmpd = M.fricative(w, ectrs, neural, neural_now, Nsv) 91 o.extend(tmpo) 92 descr.extend(tmpd) 93 assert len(descr)==len(o), "Descriptor mismatch" 94 95 if do_voicing: 96 w = 0.02*math.sqrt(LF)/DT 97 tmpo, tmpd = M.haspitch(w, ectrs, neural, neural_now, Nsv) 98 o.extend(tmpo) 99 descr.extend(tmpd) 100 assert len(descr)==len(o), "Descriptor mismatch" 101 102 w = 0.03*ELF/DT 103 tmpo, tmpd = M.peakiness(w, ectrs, neural, neural_now, Nsv) 104 o.extend(tmpo) 105 descr.extend(tmpd) 106 assert len(descr)==len(o), "Descriptor mismatch" 107 108 N = neural[0].shape[0] 109 for (i, (tmp, dsc)) in enumerate(zip(o, descr)): 110 assert tmp.shape == (N,), "Wrong size: %d, %s = %d vs. %d" % (i, str(dsc), tmp.shape[0], N) 111 112 return (o, descr, DT, t0)
113