Package gmisclib :: Module segmentfile
[frames] | no frames]

Source Code for Module gmisclib.segmentfile

  1  """This parses a '.in' file from xwaves.   The '.in' files 
  2  show segmentation of an utterance.    segmentfile.parse(s) 
  3  takes a list of lines from such a file, and returns 
  4  three things: 
  5  * the title of the file (typically the utterance) 
  6  * a list of groups (a group is typically a word) 
  7  * a list of segments. 
  8   
  9  Groups include a list of the segments of which they are made, 
 10  along with a name and an index. 
 11  Segments contain a phoneme, a start/end time, 
 12  an index, and the group to which they belong. 
 13  """ 
 14   
15 -def _indented(s):
16 return len(s)>0 and (s[0]=='\t' or s[0]==' ')
17 18 # sample .in file: 19 # ** o cen s 20 # o 21 # o 1.620417 22 # cen 23 # Ccl 1.679583 24 # C 1.76925 25 # ^ 1.8527499 26 # N 1.941583 27 # s 28 # s 2.0380001 29
30 -def _title(s):
31 i = 0 32 while s[i] == '*' : 33 i = i + 1 34 return s[i:].lstrip()
35
36 -class segment:
37 - def __init__(self, st, en, phn, si):
38 self.start = st 39 self.end = en 40 self.phn = phn 41 self.segidx = si 42 self.group = None
43
44 - def setgroup(self, g):
45 self.group = g
46
47 - def __str__(self):
48 return "<segment [%d]/%s[%d] %g-%g>" % ( 49 self.group.groupidx, 50 self.phn, self.segidx, 51 self.start, self.end )
52
53 - def __repr__(self):
54 return self.__str__()
55 56 57
58 -class group:
59 - def __init__(self, gi, gn):
60 self.s = [] 61 self.groupidx = gi 62 self.groupname = gn
63
64 - def add(self, seg):
65 self.s.append(seg)
66
67 - def __str__(self):
68 tmp = ', '.join(map( lambda x: x.__str__(), self.s)) 69 return "<group %s[%d] %s >" % (self.groupname, self.groupidx, tmp)
70
71 - def __repr__(self):
72 return self.__str__()
73 74
75 -def parse(l):
76 """Parses a list of lines from an ESPS Xmark file (.in), 77 and returns a tuple (title, list of groups, list of segments).""" 78 pseg = [] 79 pg = [] 80 title = None 81 last_t = None 82 groupidx = 0 83 curgroup = group(groupidx, '') 84 segidx = -1 85 for line in l : 86 line = line.rstrip() 87 if not _indented(line): 88 if title is None and len(line)>0 and line[0]=='*': 89 title = _title(line) 90 elif len(line)>0: 91 curgroup = group(groupidx, line) 92 pg.append(curgroup) 93 groupidx += 1 94 else: # indented 95 ss = line.lstrip().split() 96 if(len(ss) != 2) : 97 continue 98 t = float(ss[1]) 99 if last_t is not None: 100 seg = segment(last_t, t, ss[0], segidx) 101 seg.setgroup(curgroup) 102 curgroup.add(seg) 103 pseg.append(seg) 104 105 last_t = t 106 segidx += 1 107 return (title, pg, pseg)
108 109
110 -def read(f):
111 """Read a ESPS Xmark (.in) file, and return information. See parse().""" 112 ii = open(f, "rb") 113 o = parse(ii.readlines()) 114 ii.close() 115 return o
116 117 118
119 -def test():
120 teststring = [ '** o cen s', 121 'o', 122 ' o 1.620417', 123 'cen', 124 ' Ccl 1.679583', 125 ' C 1.76925', 126 ' ^ 1.8527499', 127 ' N 1.941583', 128 's ', 129 ' s 2.0380001', 130 '' ] 131 title, grp, seg = parse(teststring) 132 assert title == 'o cen s' 133 assert seg[0].phn == 'Ccl' 134 assert seg[0].start == 1.620417 135 assert seg[0].end == 1.679583 136 assert seg[0].group.groupidx == 1 137 assert seg[0].segidx == 0 138 assert grp[0].groupidx == 0 139 assert grp[0].groupname == 'o' 140 for t in grp: 141 for q in t.s: 142 assert q.group == t 143 assert len(seg) == 5 144 assert len(grp) == 3
145 146 if __name__ == '__main__': 147 test() 148