Package gmisclib :: Module MFCCFile
[frames] | no frames]

Source Code for Module gmisclib.MFCCFile

  1  import struct 
  2  import sys 
  3   
  4  import numpy 
  5   
  6   
  7  # Class for loading mfccfiles in the C_SDK format. Supports byteswapping. 
  8  # Questions: Tor Andre Myrvoll, myrvoll@research.bell-labs.com 
  9   
10 -class MFCCFile:
11 # This class loads a mfcc file in LASR format and gives access to 12 # mfcc vectors indexed by file and position
13 - def __init__(self,filename,DEBUG = 0,BYTEORDER='@'):
14 self.DEBUG = DEBUG 15 self.BYTEORDER = BYTEORDER 16 self.FileName = filename 17 long_s = 4 18 float_s = 4 19 20 # See if we should byteswap 21 if not struct.pack('%sL' % (self.BYTEORDER),1) == struct.pack('L',1): 22 BYTESWAP = 1 23 else: 24 BYTESWAP = 0 25 26 fp = open(filename,'r') 27 if DEBUG: 28 sys.stderr.write("Opening file %s\n" % (filename)) 29 30 31 32 # Store the number of sentences in this file 33 str = fp.read(long_s) 34 self.numSent = struct.unpack('%sL' % (self.BYTEORDER),str)[0] 35 if DEBUG: 36 sys.stderr.write(" Number of sentences: %d\n" % (self.numSent)) 37 38 # Store the feature vector dimension 39 str = fp.read(long_s) 40 self.vecSize = struct.unpack('%sL' % (self.BYTEORDER),str)[0] 41 if DEBUG: 42 sys.stderr.write(" Vector size: %d\n" % (self.vecSize)) 43 44 45 # Load an array of sentence lengths into a list 46 self.sentLength = [] 47 for sent in range(self.numSent): 48 str = fp.read(long_s) 49 length = struct.unpack('%sL' % (self.BYTEORDER),str)[0] 50 self.sentLength.append(length) 51 if DEBUG: 52 sys.stderr.write("\n") 53 54 55 # Now load the entire file and store the vectors in lists 56 self.sentList = [] 57 sentnum = 0 58 index = 0 59 veclength = self.vecSize*float_s 60 if DEBUG: 61 sys.stderr.write(" Loading sentences:") 62 63 64 str = fp.read() 65 for sent in range(self.numSent): 66 if DEBUG: 67 sys.stderr.write(" %d" % (sent)) 68 sentdata = [] 69 for vec in range(self.sentLength[sent]): 70 mfccvec = numpy.fromstring(str[index:index+veclength],'f') 71 if BYTESWAP: 72 mfccvec = mfccvec.byteswapped() 73 sentdata.append(mfccvec) 74 index += veclength 75 76 self.sentList.append(sentdata) 77 78 # We're done. Close the file 79 fp.close() 80 if DEBUG: 81 sys.stderr.write("\n")
82
83 - def Save(self,filename):
84 fh = open(filename,'w') 85 # First we write the header 86 str = struct.pack('LL',self.numSent,self.vecSize) 87 fh.write(str) 88 89 # Now the array of sentence lengths 90 for length in self.sentLength: 91 str = struct.pack('L',length) 92 fh.write(str) 93 94 # Finally, we save the cepstral vectors themselves 95 for utt in self.sentList: 96 for vec in utt: 97 str = vec.tostring() 98 fh.write(str) 99 100 fh.close()
101
102 - def ReturnSent(self,sentnum):
103 # Returns a list containing the entire sentence 104 return self.sentList[sentnum]
105 106
107 - def ReturnVector(self,sentnum,vecnum):
108 # Returns vector i of utterance j 109 return self.sentList[sentnum][vecnum]
110