1 """When used as a script, this reads label files produced by wavesurfer
2 and prints the result.
3
4 Also contains functions that can be used for reading and writing the
5 label format preferred by Wavesurfer.
6 """
7 import re
8 import sys
9 from gmisclib.xwaves_errs import *
10
11 _avpat = re.compile('#\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*)\s*$')
12
13 _COMTAG = '_COMMENT'
14
15 -def read(filename, loose=0):
16 """Read in label (transcription) files produced by wavesurfer.
17
18 Note that leading or trailing spaces in the label are removed.
19
20 @param filename: name of label file or '-' to mean L{sys.stdin}
21 @type filename: str
22 @param loose: how many minor deviations from the ideal format are allowed
23 @type loose: int
24 @return: (header, data).
25 Data = [(starttime, endtime, label), ...].
26 """
27 HUGE = 1e30
28 hdr = {}
29 if filename == '-':
30 fd = sys.stdin
31 else:
32 try:
33 fd = open(filename, "r")
34 except IOError, x:
35 raise NoSuchFileError, x
36 n = 0
37 t0last = -HUGE
38 t1last = -HUGE
39 d = []
40 comments = []
41 while True:
42 n += 1
43 l = fd.readline()
44 if not l:
45 break
46 l = l.lstrip().rstrip('\r\n')
47 if not l:
48 if loose > 0:
49 loose -= 1
50 else:
51 raise BadFileFormatError, 'blank lines prohibited: %s:%d' % (filename, n)
52 if l == '#\n':
53 continue
54 if l.startswith('#'):
55 m = _avpat.match(l)
56 if m:
57 hdr[m.group(1)] = m.group(2).strip()
58 else:
59 comments.append( l[1:].strip() )
60 continue
61 a = l.split(None, 2)
62 if len(a) == 2 and float(a[0]) and float(a[1]):
63 if l[-1] in (' ', '\t'):
64 a.append( '' )
65 elif loose > 0:
66 a.append( '' )
67 loose -= 1
68 else:
69 raise BadFileFormatError, 'label must not be empty: %s:%d' % (filename, n)
70 if len(a) == 3:
71 try:
72 t0 = float(a[0])
73 t1 = float(a[1])
74 except ValueError:
75 raise BadFileFormatError, 'Need <float> <float> before <label> %s:%d' % (filename, n)
76
77 if t0>=t0last and t1>=t1last and t1>=t0:
78 d.append( (t0, t1, a[2]) )
79 t0last = t0
80 t1last = t1
81 else:
82 raise DataOutOfOrderError, '%s:%d' % (filename, n)
83
84 elif loose > 0:
85 loose -= 1
86 else:
87 raise BadFileFormatError, "Cannot parse line %s:%d" % (filename, n)
88
89 fd = None
90
91
92
93 hdr[_COMTAG] = '\n'.join(comments)
94 hdr['_NAME'] = filename
95 hdr['_FILETYPE'] = 'wavesurfer'
96 hdr['NAXIS'] = 2
97 hdr['NAXIS2'] = len(d)
98 hdr['NAXIS1'] = 3
99 hdr['TTYPE1'] = 'start time'
100 hdr['TUNIT1'] = 's'
101 hdr['TTYPE2'] = 'end time'
102 hdr['TUNIT2'] = 's'
103 hdr['TTYPE3'] = 'label'
104
105 return (hdr, d)
106
107
108 from xwaves_lab import end_marks, start_stop
109
110
111 -def write(fd, header, data):
112 """Write label information to a file.
113 Note: Expects data in [(t0, t1, label), ...] form.
114 @param fd: where to write
115 @param header: header information.
116 @type header: dict
117 @type fd: file or file-like object
118 @param data: a listing of the segments to write to the file.
119 @type data: [(segment_start_time, segment_end_time, segment_label), ...]
120 """
121 HUGE = 1e30
122 d = list(data)
123 d.sort()
124
125 fd.write('#wavesurfer_label_gpk\n')
126 if _COMTAG in header:
127 for x in header[_COMTAG].split('\n'):
128 fd.write('# %s\n' % x)
129 alist = header.items()
130 alist.sort()
131 for (a, v) in alist:
132 if a != _COMTAG:
133 fd.write('# %s = %s\n' % (a, v))
134 fd.flush()
135 t0last = -HUGE
136 t1last = -HUGE
137 for (i, (t0, t1, mark)) in enumerate(data):
138 if t1 < t0 or t0<t0last or t1<t1last:
139 raise DataOutOfOrderError, 'data[%d]: (%s, %s, %s)' % (i, t0, t1, mark)
140 fd.write('%.5f %.5f %s\n' % (t0, t1, str(mark).strip()))
141 fd.flush()
142
143
144 if __name__ == '__main__':
145 hdr, data = read(sys.argv[1])
146 for (t0, t1, lbl) in data:
147 print t0, t1, lbl
148