lib.fv201003opt

81 assert not do_irx, "Do_irx=%s" % do_irx 82 assert not do_pdur and not do_dissonance 83 FORMANT_LOW = erb_scale.f_to_erb(130.0) 84 FORMANT_HIGH = erb_scale.f_to_erb(5000.0) 85 assert float(DT)>0.0 86 assert float(LF)>0.0 87 bmin = erb_scale.f_to_erb(100.0) 88 bmax = erb_scale.f_to_erb(5500.0) 89 ectrs, neural, t0 = PS.perceptual_spec(data, dt, DT, 90 bmin, bmax, DB, 91 do_mod=do_voicing, 92 do_dissonance=do_dissonance, 93 do_peakalign=False, 94 PlompBouman=PlompBouman, 95 e=E, 96 cache_info = cache_info.copy() if cache_info is not None else None 97 ) 98 99 assert 1 < neural.shape[0] < 200 100 band_indices = [i for (i,ec) in enumerate(ectrs) if ec['type']=='band'] 101 neural_b = neural.take(band_indices, axis=0) 102 assert neural_b.shape[1]==neural.shape[1] 103 if zerosub != 0.0: 104 neural_b = neural_b.copy() 105 try: 106 z = Z.PS(neural_b) 107 except Z.ZeroProblem: 108 die.warn("Bad utterance: zero does not converge.") 109 z = numpy.zeros((neural_b.shape[0],)) 110 assert neural_b.shape[0]==z.shape[0] 111 numpy.subtract(neural_b, z[:,numpy.newaxis], neural_b) 112 assert neural_b.shape[1]==neural.shape[1] 113 assert neural_b.shape[0]<=neural.shape[0] 114 115 neural_now = numpy.average(neural_b, axis=0) # Average over frequency. 116 assert neural_now.shape[0] == neural.shape[1] 117 neural_avg = numpy.square(neural_now).sum()/numpy.sum(neural_now) # Average over time. 118 # neural_avg is a scalar, grand average. 119 numpy.divide(neural, neural_avg, neural) 120 # Now, we've normalized by an over-all average loudness. 121 numpy.divide(neural_now, neural_avg, neural_now) 122 # Now, we've normalized by an over-all average loudness. 123 124 for (i,e) in enumerate(ectrs): 125 if e['type'] == 'haspitch': 126 numpy.divide(neural[i,:], neural_avg, neural[i,:]) 127 if e['type'] == 'dissonance': 128 numpy.divide(neural[i,:], neural_avg, neural[i,:]) 129 if e['type'] == 'peakalign': 130 numpy.divide(neural[i,:], numpy.square(neural_avg), neural[i,:]) 131 132 o = [] 133 descr = [] 134 w = 0.04*LF/DT 135 tmpo, tmpd = M.vowel(w, ectrs, neural, neural_now, Nsv, 136 formant_low=FORMANT_LOW, 137 formant_high=FORMANT_HIGH 138 ) 139 o.extend(tmpo) 140 descr.extend(tmpd) 141 assert len(descr)==len(o), "Descriptor mismatch" 142 143 w = 0.04*ELF/DT 144 tmpo, tmpd = M.fricative_edge(w, ectrs, neural, neural_now, Nsv, 145 do_abs=False, version=2 146 ) 147 o.extend(tmpo) 148 descr.extend(tmpd) 149 assert len(descr)==len(o), "Descriptor mismatch" 150 151 if do_voicing: 152 w = 0.02*math.sqrt(LF)/DT 153 tmpo, tmpd = M.haspitch(w, ectrs, neural, neural_now, Nsv) 154 o.extend(tmpo) 155 descr.extend(tmpd) 156 assert len(descr)==len(o), "Descriptor mismatch" 157 158 w = 0.03*ELF/DT 159 tmpo, tmpd = M.peakiness(w, ectrs, neural, neural_now, Nsv) 160 o.extend(tmpo) 161 descr.extend(tmpd) 162 assert len(descr)==len(o), "Descriptor mismatch" 163 164 N = neural[0].shape[0] 165 for (i, (tmp, dsc)) in enumerate(zip(o, descr)): 166 assert tmp.shape == (N,), "Wrong size: %d, %s = %d vs. %d" % (i, str(dsc), tmp.shape[0], N) 167 168 return (o, descr, DT, t0)

176 o = None 177 ci = None 178 if cache_info is not None: 179 assert isinstance(cache_info, CC.cache_info) 180 ci = cache_info.addinfo(dt, DT, LF, Nsv, ELF, do_voicing, do_dissonance, PlompBouman, 181 do_pdur, zerosub, do_irx, E, 'fv201003opt') 182 try: 183 o = ci.load() 184 except ci.Errors: 185 pass 186 if o is None: 187 o = feature_vec_guts(data, dt, DT, LF=LF, Nsv=Nsv, ELF=ELF, do_voicing=do_voicing, 188 do_dissonance=do_dissonance, PlompBouman=PlompBouman, 189 do_pdur=do_pdur, zerosub=zerosub, do_irx=do_irx, E=E, 190 cache_info=cache_info 191 ) 192 if ci is not None: 193 ci.dump(o) 194 return o

Source Code for Module lib.fv201003opt