Package lib :: Module fv201003opt
[frames] | no frames]

Source Code for Module lib.fv201003opt

  1  #!/usr/bin/env python 
  2   
  3  """Feature vectors for optimized DTW alignment. 
  4  Exemplar project with Ladan Baghai-Ravary. 
  5  """ 
  6   
  7  import math 
  8  import numpy 
  9  from gmisclib import die 
 10  from gmisclib import cache as CC 
 11  from gmisclib import gpkmisc 
 12  from gmisclib import erb_scale 
 13  # from gpk_voicing import percep_spec as PS 
 14  from gpk_voicing import cached_ps as PS 
 15  from gpk_voicing import fv_misc as M 
 16  from gpk_voicing import zero as Z 
 17  SillyWidthException = M.SillyWidthException 
 18   
 19  DB = 0.95 
 20   
 21  Opt_text = """ 
 22  # summarize_logs -uid UID -best -fromstart advdIrL,-fv201003opt_0.av 
 23  # len(currentlist)= 190 tail= 0.0 nsamp= 190 
 24  # max(advdIrL,-fv201003opt_0.av) = 84.47 
 25  #  samples used = 190 filename= advdIrL,-fv201003opt_0.av 
 26  84.47 logp 
 27  # n= 41 
 28  3.02401 Scale,fricative%Sedge:.*:11.9-14.8 
 29  14.2481 Scale,fricative%Sedge:.*:15.7-17.6 
 30  16.3675 Scale,fricative%Sedge:.*:18.6-23.3 
 31  0.925029 Scale,fricative%Sedge:.*:24.3-29.0 
 32  6.3387 Scale,fricative%Sedge:.*:4.3-7.2 
 33  22.348 Scale,fricative%Sedge:.*:8.1-11.0 
 34  0.798545 Scale,haspitch1:.* 
 35  3.12358 Scale,vowel:.*:10.0 
 36  1.17288 Scale,vowel:.*:11.0 
 37  1.01182 Scale,vowel:.*:11.9 
 38  2.37575 Scale,vowel:.*:12.9 
 39  14.409 Scale,vowel:.*:13.8 
 40  1.01614 Scale,vowel:.*:14.8 
 41  3.41617 Scale,vowel:.*:15.7 
 42  2.49094 Scale,vowel:.*:16.7 
 43  7.17409 Scale,vowel:.*:17.6 
 44  2.301 Scale,vowel:.*:18.6 
 45  3.45347 Scale,vowel:.*:19.5 
 46  5.5532 Scale,vowel:.*:20.5 
 47  1.77577 Scale,vowel:.*:21.4 
 48  0.365305 Scale,vowel:.*:22.4 
 49  5.62289 Scale,vowel:.*:23.3 
 50  2.07074 Scale,vowel:.*:24.3 
 51  0.757549 Scale,vowel:.*:25.2 
 52  2.91112 Scale,vowel:.*:26.2 
 53  3.47592 Scale,vowel:.*:27.1 
 54  1.79999 Scale,vowel:.*:28.1 
 55  10.2112 Scale,vowel:.*:29.0 
 56  1.37166 Scale,vowel:.*:4.3 
 57  1.8372 Scale,vowel:.*:5.3 
 58  0.646851 Scale,vowel:.*:6.2 
 59  0.480479 Scale,vowel:.*:7.2 
 60  12.4389 Scale,vowel:.*:8.1 
 61  6.54689 Scale,vowel:.*:9.1 
 62  0.0053143 border_shift,beginning 
 63  2.98189 dist,alpha 
 64  0.619647 dist,beta 
 65  1.35488 len_exp 
 66  0.468854 lf,elf 
 67  0.00879217 lf,lf 
 68  0.00784977 nsv 
 69  """ 
 70   
 71  Scale = M.scale_xform(Opt_text, 
 72                          name="2010-06-05 optimization in cayenne:/home/gpk/ItakuraSaitoDistance/IS-DTW/dvdIrL,-fv201003opt_0.av" 
 73                          ) 
 74   
 75   
76 -def feature_vec_guts(data, dt, DT, 77 LF=1.0, Nsv=M.NSV, ELF=1.0, 78 do_voicing=0, do_dissonance=False, 79 PlompBouman=False, do_pdur=False, 80 zerosub=0.0, do_irx=False, E=None, cache_info=None):
81 assert not do_irx, "Do_irx=%s" % do_irx 82 assert not do_pdur and not do_dissonance 83 FORMANT_LOW = erb_scale.f_to_erb(130.0) 84 FORMANT_HIGH = erb_scale.f_to_erb(5000.0) 85 assert float(DT)>0.0 86 assert float(LF)>0.0 87 bmin = erb_scale.f_to_erb(100.0) 88 bmax = erb_scale.f_to_erb(5500.0) 89 ectrs, neural, t0 = PS.perceptual_spec(data, dt, DT, 90 bmin, bmax, DB, 91 do_mod=do_voicing, 92 do_dissonance=do_dissonance, 93 do_peakalign=False, 94 PlompBouman=PlompBouman, 95 e=E, 96 cache_info = cache_info.copy() if cache_info is not None else None 97 ) 98 99 assert 1 < neural.shape[0] < 200 100 band_indices = [i for (i,ec) in enumerate(ectrs) if ec['type']=='band'] 101 neural_b = neural.take(band_indices, axis=0) 102 assert neural_b.shape[1]==neural.shape[1] 103 if zerosub != 0.0: 104 neural_b = neural_b.copy() 105 try: 106 z = Z.PS(neural_b) 107 except Z.ZeroProblem: 108 die.warn("Bad utterance: zero does not converge.") 109 z = numpy.zeros((neural_b.shape[0],)) 110 assert neural_b.shape[0]==z.shape[0] 111 numpy.subtract(neural_b, z[:,numpy.newaxis], neural_b) 112 assert neural_b.shape[1]==neural.shape[1] 113 assert neural_b.shape[0]<=neural.shape[0] 114 115 neural_now = numpy.average(neural_b, axis=0) # Average over frequency. 116 assert neural_now.shape[0] == neural.shape[1] 117 neural_avg = numpy.square(neural_now).sum()/numpy.sum(neural_now) # Average over time. 118 # neural_avg is a scalar, grand average. 119 numpy.divide(neural, neural_avg, neural) 120 # Now, we've normalized by an over-all average loudness. 121 numpy.divide(neural_now, neural_avg, neural_now) 122 # Now, we've normalized by an over-all average loudness. 123 124 for (i,e) in enumerate(ectrs): 125 if e['type'] == 'haspitch': 126 numpy.divide(neural[i,:], neural_avg, neural[i,:]) 127 if e['type'] == 'dissonance': 128 numpy.divide(neural[i,:], neural_avg, neural[i,:]) 129 if e['type'] == 'peakalign': 130 numpy.divide(neural[i,:], numpy.square(neural_avg), neural[i,:]) 131 132 o = [] 133 descr = [] 134 w = 0.04*LF/DT 135 tmpo, tmpd = M.vowel(w, ectrs, neural, neural_now, Nsv, 136 formant_low=FORMANT_LOW, 137 formant_high=FORMANT_HIGH 138 ) 139 o.extend(tmpo) 140 descr.extend(tmpd) 141 assert len(descr)==len(o), "Descriptor mismatch" 142 143 w = 0.04*ELF/DT 144 tmpo, tmpd = M.fricative_edge(w, ectrs, neural, neural_now, Nsv, 145 do_abs=False, version=2 146 ) 147 o.extend(tmpo) 148 descr.extend(tmpd) 149 assert len(descr)==len(o), "Descriptor mismatch" 150 151 if do_voicing: 152 w = 0.02*math.sqrt(LF)/DT 153 tmpo, tmpd = M.haspitch(w, ectrs, neural, neural_now, Nsv) 154 o.extend(tmpo) 155 descr.extend(tmpd) 156 assert len(descr)==len(o), "Descriptor mismatch" 157 158 w = 0.03*ELF/DT 159 tmpo, tmpd = M.peakiness(w, ectrs, neural, neural_now, Nsv) 160 o.extend(tmpo) 161 descr.extend(tmpd) 162 assert len(descr)==len(o), "Descriptor mismatch" 163 164 N = neural[0].shape[0] 165 for (i, (tmp, dsc)) in enumerate(zip(o, descr)): 166 assert tmp.shape == (N,), "Wrong size: %d, %s = %d vs. %d" % (i, str(dsc), tmp.shape[0], N) 167 168 return (o, descr, DT, t0)
169 170
171 -def feature_vec(data, dt, DT, 172 LF=1.0, Nsv=M.NSV, ELF=1.0, 173 do_voicing=0, do_dissonance=False, 174 PlompBouman=False, do_pdur=False, 175 zerosub=0.0, do_irx=False, E=None, cache_info=None):
176 o = None 177 ci = None 178 if cache_info is not None: 179 assert isinstance(cache_info, CC.cache_info) 180 ci = cache_info.addinfo(dt, DT, LF, Nsv, ELF, do_voicing, do_dissonance, PlompBouman, 181 do_pdur, zerosub, do_irx, E, 'fv201003opt') 182 try: 183 o = ci.load() 184 except ci.Errors: 185 pass 186 if o is None: 187 o = feature_vec_guts(data, dt, DT, LF=LF, Nsv=Nsv, ELF=ELF, do_voicing=do_voicing, 188 do_dissonance=do_dissonance, PlompBouman=PlompBouman, 189 do_pdur=do_pdur, zerosub=zerosub, do_irx=do_irx, E=E, 190 cache_info=cache_info 191 ) 192 if ci is not None: 193 ci.dump(o) 194 return o
195