Package gmisclib :: Module MLF_file
[frames] | no frames]

Source Code for Module gmisclib.MLF_file

1 -class BadFormatError(RuntimeError):
2 - def __init__(self, *s):
3 RuntimeError.__init__(self, *s)
4 5
6 -class NotInMLFFile(KeyError):
7 - def __init__(self, *s):
8 KeyError.__init__(self, *s)
9 10
11 -class block_MLF_file(object):
12 """This class reads in and stores a MLF file. It does not interpret the interior data, 13 but rather just breaks it up into blocks, each corresponding to an utterance. 14 """ 15 Quantum = 1e-7 16
17 - def __init__(self, fname, preprocessor=None):
18 """Read in a MLF file and store the information in C{self.block}. 19 @param fname: Filename to read 20 @type fname: str 21 @param preprocessor: A function to project the name of each block onto 22 something that you want to use as an index of blocks. 23 Typically, this function cleans up the names, removing asterisks 24 and such. 25 @type preprocessor: function str -> str 26 """ 27 if preprocessor is None: 28 preprocessor = lambda x:x 29 30 #: self.block is where all the data is kept. This is a dictionary 31 #: mapping from a file pattern (name, roughly) to a block of label 32 #: information. The block is a list of strings, one per line in the MLF file. 33 #: The file pattern is the output of C{preprocessor}. 34 #: dict(str: list(str)) 35 self.block = {} 36 37 self.fname = fname #: The name of the MLF file 38 self.blockname = {} #: Mapping from a file pattern to the name of the block. 39 block = [] 40 fd = open(fname, 'r') 41 if fd.readline() != '#!MLF!#\n': 42 raise BadFormatError 43 inblock = False 44 fpattern = None 45 blockname = None 46 for line in fd.readlines(): 47 line = line.rstrip() 48 if not inblock: 49 assert line[0]=='"' and line[-1]=='"' 50 blockname = line[1:-1] 51 fpattern = preprocessor(blockname) 52 inblock = True 53 elif inblock and line=='.': 54 inblock = False 55 self.block[fpattern] = block 56 self.blockname[fpattern] = blockname 57 block = [] 58 else: # in the block 59 block.append(line)
60 61
62 - def get(self, key):
63 """Get a block of text from a MLF file. 64 """ 65 try: 66 return self.block[key] 67 except KeyError: 68 raise NotInMLFFile(key, self.fname)
69 70
71 - def get3(self, key, n=3):
72 """Get a block of time-aligned labels from a MLF file and interpret it. 73 @return: C{(start, end, label, ...)} tuples, with C{start} and C{end} in seconds, 74 C{label} is a string indicating a phoneme or word or whatever. 75 If there is more information on a line, it will be passed along in the tuple. 76 @rtype: list(tuple(start, end, label, ...), ...) 77 """ 78 labels = [] 79 for (i,line) in enumerate(self.get(key)): 80 a = line.split() 81 if len(a) >= 3: 82 try: 83 a[0] = float(a[0]) * self.Quantum 84 a[1] = float(a[1]) * self.Quantum 85 except ValueError: 86 raise BadFormatError, "Cannot parse line %s : %s : %d" % (self.fname, key, i+1) 87 if len(a) > 3: 88 try: 89 a[3] = float(a[3]) 90 except ValueError: 91 raise BadFormatError, "Cannot parse line %s : %s : %d" % (self.fname, key, i+1) 92 labels.append( tuple(a[:n]) ) 93 labels.sort() 94 return labels
95