1 """This module is a python interface to the gpkio library.
2 That library can read in a variety of 2-dimensional image
3 types and multichannel audio data.
4
5 When used with a current version of the libgpkio library,
6 * it reads and writes in NASA FITS files that contain 2-D images
7 (these are used for astronomical data, typically
8 images from telescopes),
9 * it reads HTK HParm format files
10 (these are used for speech data and feature vectors
11 for the HTK Hidden Markov Model Toolkit,
12 for speech recognition systems),
13 * it reads and writes the NIST SPHERE speech data format
14 (though not all variants),
15 * it reads a header + data format called GPK ASCII Image
16 that I frequently use. (This format can
17 give you a completely ASCII data file, or have
18 a human-readable ASCII header followed by binary
19 data. It's designed for 2-D images and adapts well
20 to multichannel audio data.)
21 * it reads and writes the obsolete J. A. Tyson data format called FOCAS,
22 which is a binary format for 2-D astronomical images,
23 * it reads and writes an old AT&T speech data format called SIG
24 (used for speech data),
25 * it reads an old AT&T speech data format called ATTSSW
26 (used for speech data),
27 * it reads a data format called "EST" from the "Edinburgh
28 Speech Tools".
29
30 The code is designed to share an ontology, so that you can
31 read in one format and write the data in another format,
32 without the user needing to transform the header or data.
33
34 The code is designed to support data formats that have a
35 header that can store key=value strings, followed by a
36 1- or 2-dimensional data array.
37
38 Some of the data formats have fewer restrictions than FITS
39 as to the content of file headers: longer keys or attributes
40 may be allowed, and/or a wider variety of characters.
41 The module does its best not to lose header information,
42 but sometimes it must be re-keyed or truncated.
43 """
44
45 import types
46 from gmisclib import Num
47 import gpkimg
48
49
51 """Thrown by read()."""
54
55
57 """Thown by column()."""
60
62 """Finds all the column definitions in the data file,
63 that is, attributes in the form 'TTYPE#'. These
64 attributes tell you what kind of data is in a specific
65 column. It then builds an output dictionary that
66 tells you what column contains any desired type of data."""
67
68 PREFIX = 'TTYPE'
69 Pl = len(PREFIX)
70 o = {}
71 for k in range(nc):
72 o[k] = k
73 sk = '%d' % k
74 o[sk] = k
75 for k in d.keys():
76 assert isinstance(k, str), 'Key must be string'
77
78 if k.startswith(PREFIX):
79 col = int(k[Pl:])
80 if col>0 and col<=nc:
81 o[d[k]] = col-1
82 return o
83
84
85
86
88 if d.has_key(k):
89 del d[k]
90 return 1
91 return 0
92
93
95 """Reads in a data file (f is the name) and returns a
96 class gpk_img object. File names may end in .Z or .gz
97 if the data is compressed.
98 Raises CannotReadDataFile if it does not recognize the file
99 format.
100 """
101 try:
102 hdr, data = gpkimg.read(f)
103 except IOError, x:
104 raise CannotReadDataFile(x.args[0])
105
106 hdr['_NAME'] = f
107 return gpk_img(hdr, data)
108
109
110 CacheSize = 10*1024*1024
111 _csize = 0
112 _cache = {}
114 """Reads in a data file (f is the name) and returns a
115 class gpk_img object. File names may end in .Z or .gz
116 if the data is compressed.
117 Raises CannotReadDataFile if it does not recognize the file
118 format.
119 """
120 global _csize, _cache, CacheSize
121 try:
122 return _cache[f].copy()
123 except KeyError:
124 pass
125 while _csize >= CacheSize and len(_cache):
126 x = _cache.pop()
127 _csize -= x.n[1]*x.n[2]
128 if len(_cache) == 0 or _csize < 0:
129 _csize = 0
130 tmp = read(f)
131 _cache[f] = tmp
132 _csize += tmp.n[1]*tmp.n[2]
133 return tmp.copy()
134
135
136
138 """This class describes a data file. It contains header
139 information and data. The header information follows
140 the NASA FITS standard closely with a few extensions.
141
142 Header items:
143 _NAME -- the filename from which the data came,
144 _FILETYPE -- a string indicating the format of the data file
145
146 Each instance has two attributes that are intended for general use:
147 hdr -- a dictionary containing all the header information, and
148 d -- a 2-dimensional numpy array containing the data.
149 """
150
152 """Creates an gpk_img object from
153 hdr (a dictionary that describes a data array)
154 and the data (a Numeric 2-dimensional array).
155 """
156
157 self.hdr = hdr.copy()
158 try:
159 self.d = Num.asarray(data, Num.Float)
160 except ValueError,x:
161 raise ValueError, '%s (you gave it %s)' % (str(x), str(type(data)))
162
163 if len(self.d.shape) == 1:
164
165 self.d = Num.transpose([self.d])
166 elif len(self.d.shape) == 2:
167 if(self.hdr.has_key('NAXIS')):
168 assert self.hdr['NAXIS'] in ['2', 2]
169 del self.hdr['NAXIS']
170 else:
171 raise ValueError, 'Needs 1 or 2 dimensional arrays'
172
173
174
175 self.n = (None, self.d.shape[1], self.d.shape[0])
176
177
178 self.cdelt = (None, float(hdr.get('CDELT1', 'NaN')),
179 float(hdr.get('CDELT2', 'NaN')))
180
181
182 self.crpix = (None, float(hdr.get('CRPIX1', 'NaN')),
183 float(hdr.get('CRPIX2', 'NaN')))
184
185
186 self.crval = (None, float(hdr.get('CRVAL1', 'NaN')),
187 float(hdr.get('CRVAL2', 'NaN')))
188
189
190 self.types = _pull_types(self.hdr, self.n[1])
191
192
194 return gpk_img(self.hdr, Num.array(self.d, Num.Float, copy=True))
195
196 - def add_history(self, h):
197 """Adds an item to the file's history."""
198 if 'HISTORY' in self.hdr:
199 self.hdr['HISTORY'] += '\n%s' % h.strip()
200 else:
201 self.hdr['HISTORY'] = h.strip()
202
203 - def get_history(self):
204 """Returns the file's history as a list of strings."""
205 return self.hdr.get('HISTORY', '').split('\n')
206
208 """Returns True if there is no data."""
209 return self.d.shape[0]<=0 or self.d.shape[1]<=0 or self.n[1]<=0 or self.n[2]<=0
210
212 """What is the i^th index if the i^th coordinate = t?
213 More plainly, this function gives you the index that
214 corresponds to the coordinate, and 'i' selects which index.
215 (i=1 or 2).
216 """
217 assert self.n[i]>0
218 assert self.cdelt[i] != 0
219 tmp = (self.crpix[i]-1) + (t-self.crval[i])/self.cdelt[i]
220 if hasattr(t, 'shape'):
221 if Num.sometrue(Num.less(tmp, 0.0)) and Num.sometrue(Num.greater(tmp,self.n[i]-1)):
222 raise IndexError, "coordinate out of range"
223 elif tmp<0 or tmp>=self.n[i]-1:
224 raise IndexError, "coordinate out of range: %g vs. (%g, %g)" % (t, self.coord(0, i), self.coord(self.n[i]-1,i))
225 return tmp
226
227 - def coord(self, index, i):
228 """What is the i^th coordinate if the i^th index==index?
229 More plainly, this function gives you the coordinate that
230 corresponds to the index, and 'i' selects which index.
231 If index=None, it returns an array of all the coordinates.
232 (i=1 or 2).
233 """
234 assert self.n[i]>0
235 if index is None:
236 index = Num.arrayrange(self.n[i])
237 return self.crval[i] + (index-(self.crpix[i]-1))*self.cdelt[i]
238
240 """Returns the y-index for a particular x-coordinate."""
241 return self.index(x, 1)
242
244 """Returns the y-index for a particular y-coordinate.
245 Also used for t-index, which returns the index
246 for a particular time, if the data is a time-series.
247 """
248 return self.index(y, 2)
249
250 - def x(self, index = None):
251 """Returns the x-coordinate for a particular x-index."""
252 return self.coord(index, 1)
253
254 - def y(self, index = None):
255 """Returns the y-coordinate for a particular y-index."""
256 return self.coord(index, 2)
257
258
259
261 """Write out a data file, with the information in this class."""
262 d = self.hdr.copy()
263 sh = self.d.shape
264
265 if len(sh) == 2:
266 data = self.d
267 else:
268 raise RuntimeError, '2 dimensional arrays only'
269
270 nc = self.d.shape[1]
271 for i in (1,2):
272 si = str(i)
273 d['NAXIS' + si] = str(sh[i-1])
274 d['CDELT' + si] = str(self.cdelt[i])
275 d['CRPIX' + si] = str(self.crpix[i])
276 d['CRVAL' + si] = str(self.crval[i])
277 for k in d.keys():
278 if k.startswith('TTYPE'):
279 cs = k[len('TTYPE'):]
280 col = int(cs)
281 if col<=0 or col>nc:
282 self._wipe(col)
283 avpairs = [ (x[0], str(x[1])) for x in d.items() ]
284
285 gpkimg.write(f, avpairs, data)
286
287
289 """This is here to make it possible to access the class
290 as a (header,data) tuple.
291 """
292 return 2
293
294
296 """This is here to make it possible to access the class
297 as a (header,data) tuple.
298 """
299 return (self.hdr, self.d)[key]
300
301
302
303
304 - def time(self, index = None):
305 """What time corresponds to a certain index?
306 (This is the same as self.y(index).)"""
307 return self.coord(index, 2)
308
309 t_index = y_index
310
311
313 """Starting time of the data."""
314 return self.time(0)
315
317 """Ending time of the data."""
318 return self.time(self.n[2]-1)
319
320
322 """Assuming time flows down a column (axis=2), what's the time interval?"""
323 return self.cdelt[2]
324
325
327 """Return the requested column of data.
328 (This is the actual data, not a copy.)
329 If s is a string, it returns the column which has TTYPEx=s.
330 Otherwise, if s is an integer, it returns that column
331 (zero-based index).
332 As a special case, if there is only one column,
333 and the function is called with s=None (the default),
334 it will return the data.
335 """
336
337 if s is None and self.d.shape[1]==1:
338 return self.d[:,0]
339
340 try:
341 ic = self.types[s]
342 except KeyError:
343 if '_NAME' in self.hdr:
344 xargs = 'file=%s; column=%s' % (
345 self.hdr['_NAME'], str(s))
346 else:
347 xargs = 'column=%s' % str(s)
348 raise NoSuchColumn, xargs
349 return self.d[:,ic]
350
351
353 a = '%s%d' % (prefix, index+1)
354 if value is not None:
355 self.hdr[a] = value
356 else:
357 _del(self.hdr, a)
358
359
361 nd = 0
362 for x in ('TTYPE', 'TUNIT', 'TRANGE'):
363 nd += _del(self.hdr, '%s%d' % (x, col+1))
364 return nd
365
366
367 - def register(self, ttype, column, trange=None, tunit=None):
368 """This sets up the TTYPE fields so that
369 the specified datatype lives in the specified
370 column. Column is zero-based.
371 You can erase by specifying ttype=None.
372 """
373
374 assert isinstance(ttype, types.StringType)
375 assert isinstance(column, types.IntType)
376
377
378
379 if self.hdr.has_key('TTYPE%d' % (column+1)):
380 oldtype = self.hdr['TTYPE%d' % (column+1)]
381 _del(self.types, oldtype)
382
383 self._wipe(column)
384
385
386 if ttype is not None:
387
388 self.types[ttype] = column
389
390
391 self._idxhdrset('TTYPE', column, ttype)
392 self._idxhdrset('TUNIT', column, tunit)
393 self._idxhdrset('TRANGE', column, trange)
394 self.types[column] = column
395
396
397 - def attach_col(self, data, ttype, trange=None, tunit=None, extra_hdr={}):
398 """Attach another column to an existing class.
399 This modifies the object it is called upon.
400 The data may or may not be copied in the process.
401 """
402 assert len(data.shape) == 1
403 assert ttype is not None
404 assert self.d is None or data.shape[0] == self.d.shape[0]
405 self.hdr.update(extra_hdr)
406 if self.d is None:
407 self.d = Num.array([data])
408 c = 0
409 elif ttype in self.types:
410 c = self.types[ttype]
411 self.d[:,c] = Num.asarray(data, Num.Float)
412 else:
413 self.d = Num.concatenate([self.d, Num.transpose([data])], axis=1)
414 c = self.d.shape[1]-1
415 self.register(ttype, c, trange=trange, tunit=tunit)
416 self.n = (None, self.d.shape[1], self.d.shape[0])
417
418
420 """Select a set of columns in an existing class,
421 returning a new gpk_img class containing a subset of
422 the data. The data is copied; the new and old
423 objects share nothing.
424 """
425 newdata = []
426 for c in columns:
427 newdata.append( self.column(c) )
428 n = gpk_img(self.hdr, Num.transpose(newdata))
429 n.hdr.update( extra_hdr )
430 nic = 0
431 for c in columns:
432 ic = self.types[c]+1
433
434 n.register(self.hdr.get('TTYPE%d' % ic, None), nic,
435 self.hdr.get('TRANGE%d' % ic, None),
436 self.hdr.get('TUNIT%d' % ic, None))
437 nic += 1
438
439 while n._wipe(nic):
440 nic += 1
441
442 return n
443