Package lib :: Module fv200909opt
[frames] | no frames]

Source Code for Module lib.fv200909opt

  1  #!/usr/bin/env python 
  2   
  3  """Feature vectors for optimized DTW alignment. 
  4  Exemplar project with Ladan Baghai-Ravary. 
  5  """ 
  6   
  7  import math 
  8  import numpy 
  9  from gmisclib import die 
 10  from gmisclib import erb_scale 
 11  # from gpk_voicing import percep_spec as PS 
 12  from gpk_voicing import cached_ps as PS 
 13  from gpk_voicing import fv_misc as M 
 14  from gpk_voicing import zero as Z 
 15  SillyWidthException = M.SillyWidthException 
 16   
 17  DB = 0.85 
 18   
 19  Opt_text="""# Trigger="run_to_bottom finished" 
 20  # Trigger match 
 21  # len(currentlist)= 198 tail= 0.0 nsamp= 198 
 22  # max(log_adv,,-fv200909opt_0.av) = 43.27 
 23  #  samples used = 198 filename= log_adv,,-fv200909opt_0.av 
 24  43.27 logp 
 25  # n= 44 
 26  1.73693 dist,alpha 
 27  2.13363 dist,beta 
 28  1.08861 len_exp 
 29  1.55219 lf,elf 
 30  0.154333 lf,lf 
 31  0.140556 nsv 
 32  3.53085 scale,fricative%Sedge:.*:13.0-18.8 
 33   2.8547 scale,fricative%Sedge:.*:18.8-23.0 
 34  3.30231 scale,fricative%Sedge:.*:2.5-13.0 
 35  -3.87654 scale,fricative%Sedge:.*:23.0-27.1 
 36  -0.0238129 scale,fricative%Sedge:.*:27.1-30.7 
 37  -2.76415 scale,haspitch1:.* 
 38  -3.13785 scale,peakalign1:.* 
 39  -2.65753 scale,vowel:.*:10.4 
 40  2.32077 scale,vowel:.*:11.3 
 41  -1.12553 scale,vowel:.*:12.1 
 42  0.0529135 scale,vowel:.*:13.0 
 43  -0.568936 scale,vowel:.*:13.8 
 44  0.622019 scale,vowel:.*:14.7 
 45  0.18557 scale,vowel:.*:15.5 
 46  1.24299 scale,vowel:.*:16.4 
 47  -3.96151 scale,vowel:.*:17.2 
 48  0.793534 scale,vowel:.*:18.1 
 49  0.0448047 scale,vowel:.*:18.9 
 50  1.33993 scale,vowel:.*:19.8 
 51  2.00955 scale,vowel:.*:20.6 
 52  -0.858686 scale,vowel:.*:21.5 
 53  3.48095 scale,vowel:.*:22.3 
 54  -3.47638 scale,vowel:.*:23.2 
 55  -0.966758 scale,vowel:.*:24.0 
 56  -1.98064 scale,vowel:.*:24.9 
 57  2.39958 scale,vowel:.*:25.7 
 58  0.406809 scale,vowel:.*:26.6 
 59  2.68029 scale,vowel:.*:27.4 
 60  -2.71892 scale,vowel:.*:28.3 
 61  0.715675 scale,vowel:.*:29.1 
 62   1.1363 scale,vowel:.*:30.0 
 63  -3.78759 scale,vowel:.*:4.5 
 64  1.14752 scale,vowel:.*:5.3 
 65  -0.108174 scale,vowel:.*:6.2 
 66  -1.18802 scale,vowel:.*:7.0 
 67  -0.0958434 scale,vowel:.*:7.9 
 68  0.643672 scale,vowel:.*:8.7 
 69  2.52746 scale,vowel:.*:9.6 
 70  """ 
 71  Scale = M.scale_xform(Opt_text, 
 72                          name="299010 optimization in /proj/exemplar/ItakuraSaitoDistance/IS-DTW/log_adv,,-fv200909opt_0.av" 
 73                          ) 
 74   
75 -def _irx(a):
76 return max(1, int(round(a)))
77 78
79 -def feature_vec(data, dt, DT, 80 LF=1.0, Nsv=M.NSV, ELF=1.0, 81 do_voicing=1, do_dissonance=False, 82 PlompBouman=False, do_pdur=False, 83 zerosub=0.0, do_irx=True, cachename=None):
84 assert not do_pdur and not do_dissonance 85 FORMANT_LOW = erb_scale.f_to_erb(120.0) 86 FORMANT_HIGH = erb_scale.f_to_erb(6000.0) 87 assert float(DT)>0.0 88 assert float(LF)>0.0 89 bmin = erb_scale.f_to_erb(80.0) 90 bmax = erb_scale.f_to_erb(6000.0) 91 ectrs, neural, t0 = PS.perceptual_spec(data, dt, DT, 92 bmin, bmax, DB, 93 do_mod=do_voicing, 94 do_dissonance=do_dissonance, 95 do_peakalign=True, 96 PlompBouman=PlompBouman, 97 cachename=cachename 98 ) 99 100 assert 1 < neural.shape[0] < 200 101 band_indices = [i for (i,ec) in enumerate(ectrs) if ec['type']=='band'] 102 neural_b = neural.take(band_indices, axis=0) 103 assert neural_b.shape[1]==neural.shape[1] 104 if zerosub != 0.0: 105 neural_b = neural_b.copy() 106 try: 107 z = Z.PS(neural_b) 108 except Z.ZeroProblem: 109 die.warn("Bad utterance: zero does not converge.") 110 z = numpy.zeros((neural_b.shape[0],)) 111 assert neural_b.shape[0]==z.shape[0] 112 numpy.subtract(neural_b, z[:,numpy.newaxis], neural_b) 113 assert neural_b.shape[1]==neural.shape[1] 114 assert neural_b.shape[0]<=neural.shape[0] 115 116 neural_now = numpy.average(neural_b, axis=0) # Average over frequency. 117 assert neural_now.shape[0] == neural.shape[1] 118 neural_avg = numpy.sum(neural_now**2)/numpy.sum(neural_now) # Average over time. 119 # neural_avg is a scalar, grand average. 120 numpy.divide(neural, neural_avg, neural) 121 # Now, we've normalized by an over-all average loudness. 122 numpy.divide(neural_now, neural_avg, neural_now) 123 # Now, we've normalized by an over-all average loudness. 124 125 for (i,e) in enumerate(ectrs): 126 if e['type'] == 'haspitch': 127 numpy.divide(neural[i,:], neural_avg, neural[i,:]) 128 if e['type'] == 'dissonance': 129 numpy.divide(neural[i,:], neural_avg, neural[i,:]) 130 if e['type'] == 'peakalign': 131 numpy.divide(neural[i,:], neural_avg**2, neural[i,:]) 132 133 if do_irx: 134 irx = _irx 135 else: 136 irx = lambda x: x 137 138 139 o = [] 140 descr = [] 141 w = irx(0.04*LF/DT) 142 tmpo, tmpd = M.vowel(w, ectrs, neural, neural_now, Nsv, 143 formant_low=FORMANT_LOW, 144 formant_high=FORMANT_HIGH 145 ) 146 o.extend(tmpo) 147 descr.extend(tmpd) 148 assert len(descr)==len(o), "Descriptor mismatch" 149 150 w = irx(0.04*ELF/DT) 151 tmpo, tmpd = M.fricative_edge(w, ectrs, neural, neural_now, Nsv, 152 do_abs=False 153 ) 154 o.extend(tmpo) 155 descr.extend(tmpd) 156 assert len(descr)==len(o), "Descriptor mismatch" 157 158 if do_voicing: 159 w = irx(0.02*math.sqrt(LF)/DT) 160 tmpo, tmpd = M.haspitch(w, ectrs, neural, neural_now, Nsv) 161 o.extend(tmpo) 162 descr.extend(tmpd) 163 assert len(descr)==len(o), "Descriptor mismatch" 164 165 w = irx(0.03*ELF/DT) 166 tmpo, tmpd = M.peakiness(w, ectrs, neural, neural_now, Nsv) 167 o.extend(tmpo) 168 descr.extend(tmpd) 169 assert len(descr)==len(o), "Descriptor mismatch" 170 171 N = neural[0].shape[0] 172 for (i, (tmp, dsc)) in enumerate(zip(o, descr)): 173 assert tmp.shape == (N,), "Wrong size: %d, %s = %d vs. %d" % (i, str(dsc), tmp.shape[0], N) 174 175 return (o, descr, DT, t0)
176