1 """This parses a '.in' file from xwaves. The '.in' files
2 show segmentation of an utterance. segmentfile.parse(s)
3 takes a list of lines from such a file, and returns
4 three things:
5 * the title of the file (typically the utterance)
6 * a list of groups (a group is typically a word)
7 * a list of segments.
8
9 Groups include a list of the segments of which they are made,
10 along with a name and an index.
11 Segments contain a phoneme, a start/end time,
12 an index, and the group to which they belong.
13 """
14
16 return len(s)>0 and (s[0]=='\t' or s[0]==' ')
17
18
19
20
21
22
23
24
25
26
27
28
29
31 i = 0
32 while s[i] == '*' :
33 i = i + 1
34 return s[i:].lstrip()
35
38 self.start = st
39 self.end = en
40 self.phn = phn
41 self.segidx = si
42 self.group = None
43
46
48 return "<segment [%d]/%s[%d] %g-%g>" % (
49 self.group.groupidx,
50 self.phn, self.segidx,
51 self.start, self.end )
52
55
56
57
60 self.s = []
61 self.groupidx = gi
62 self.groupname = gn
63
66
68 tmp = ', '.join(map( lambda x: x.__str__(), self.s))
69 return "<group %s[%d] %s >" % (self.groupname, self.groupidx, tmp)
70
73
74
76 """Parses a list of lines from an ESPS Xmark file (.in),
77 and returns a tuple (title, list of groups, list of segments)."""
78 pseg = []
79 pg = []
80 title = None
81 last_t = None
82 groupidx = 0
83 curgroup = group(groupidx, '')
84 segidx = -1
85 for line in l :
86 line = line.rstrip()
87 if not _indented(line):
88 if title is None and len(line)>0 and line[0]=='*':
89 title = _title(line)
90 elif len(line)>0:
91 curgroup = group(groupidx, line)
92 pg.append(curgroup)
93 groupidx += 1
94 else:
95 ss = line.lstrip().split()
96 if(len(ss) != 2) :
97 continue
98 t = float(ss[1])
99 if last_t is not None:
100 seg = segment(last_t, t, ss[0], segidx)
101 seg.setgroup(curgroup)
102 curgroup.add(seg)
103 pseg.append(seg)
104
105 last_t = t
106 segidx += 1
107 return (title, pg, pseg)
108
109
111 """Read a ESPS Xmark (.in) file, and return information. See parse()."""
112 ii = open(f, "rb")
113 o = parse(ii.readlines())
114 ii.close()
115 return o
116
117
118
120 teststring = [ '** o cen s',
121 'o',
122 ' o 1.620417',
123 'cen',
124 ' Ccl 1.679583',
125 ' C 1.76925',
126 ' ^ 1.8527499',
127 ' N 1.941583',
128 's ',
129 ' s 2.0380001',
130 '' ]
131 title, grp, seg = parse(teststring)
132 assert title == 'o cen s'
133 assert seg[0].phn == 'Ccl'
134 assert seg[0].start == 1.620417
135 assert seg[0].end == 1.679583
136 assert seg[0].group.groupidx == 1
137 assert seg[0].segidx == 0
138 assert grp[0].groupidx == 0
139 assert grp[0].groupname == 'o'
140 for t in grp:
141 for q in t.s:
142 assert q.group == t
143 assert len(seg) == 5
144 assert len(grp) == 3
145
146 if __name__ == '__main__':
147 test()
148