Module gpkimgclass
[frames] | no frames]

Source Code for Module gpkimgclass

  1  """This module is a python interface to the gpkio library. 
  2  That library can read in a variety of 2-dimensional image 
  3  types and multichannel audio data. 
  4   
  5  When used with a current version of the libgpkio library, 
  6  * it reads and writes in NASA FITS files that contain 2-D images 
  7          (these are used for astronomical data, typically 
  8          images from telescopes), 
  9  * it reads HTK HParm format files 
 10          (these are used for speech data and feature vectors 
 11          for the HTK Hidden Markov Model Toolkit, 
 12          for speech recognition systems), 
 13  * it reads and writes the NIST SPHERE speech data format 
 14          (though not all variants), 
 15  * it reads a header + data format called GPK ASCII Image 
 16          that I frequently use.   (This format can 
 17          give you a completely ASCII data file, or have 
 18          a human-readable ASCII header followed by binary 
 19          data.  It's designed for 2-D images and adapts well 
 20          to multichannel audio data.) 
 21  * it reads and writes the obsolete J. A. Tyson data format called FOCAS, 
 22          which is a binary format for 2-D astronomical images, 
 23  * it reads and writes an old AT&T speech data format called SIG 
 24          (used for speech data), 
 25  * it reads an old AT&T speech data format called ATTSSW 
 26          (used for speech data), 
 27  * it reads a data format called "EST" from the "Edinburgh 
 28          Speech Tools". 
 29   
 30  The code is designed to share an ontology, so that you can 
 31  read in one format and write the data in another format, 
 32  without the user needing to transform the header or data. 
 33   
 34  The code is designed to support data formats that have a 
 35  header that can store key=value strings, followed by a 
 36  1- or 2-dimensional data array. 
 37   
 38  Some of the data formats have fewer restrictions than FITS 
 39  as to the content of file headers: longer keys or attributes 
 40  may be allowed, and/or a wider variety of characters. 
 41  The module does its best not to lose header information, 
 42  but sometimes it must be re-keyed or truncated. 
 43  """ 
 44   
 45  import types 
 46  from gmisclib import Num 
 47  import gpkimg 
 48   
 49   
50 -class CannotReadDataFile(IOError):
51 """Thrown by read()."""
52 - def __init__(self, *x):
53 IOError.__init__(self, x)
54 55
56 -class NoSuchColumn(KeyError):
57 """Thown by column()."""
58 - def __init__(self, *d):
59 KeyError.__init__(self, d)
60
61 -def _pull_types(d, nc):
62 """Finds all the column definitions in the data file, 63 that is, attributes in the form 'TTYPE#'. These 64 attributes tell you what kind of data is in a specific 65 column. It then builds an output dictionary that 66 tells you what column contains any desired type of data.""" 67 68 PREFIX = 'TTYPE' 69 Pl = len(PREFIX) 70 o = {} 71 for k in range(nc): 72 o[k] = k # Columns can also be indexed by integer numbers. 73 sk = '%d' % k 74 o[sk] = k 75 for k in d.keys(): 76 assert isinstance(k, str), 'Key must be string' 77 # print "PULLTYPES(", k, ")", 'type(k)=', type(k) 78 if k.startswith(PREFIX): 79 col = int(k[Pl:]) 80 if col>0 and col<=nc: 81 o[d[k]] = col-1 82 return o
83 84 85 86
87 -def _del(d, k):
88 if d.has_key(k): 89 del d[k] 90 return 1 91 return 0
92 93
94 -def read(f):
95 """Reads in a data file (f is the name) and returns a 96 class gpk_img object. File names may end in .Z or .gz 97 if the data is compressed. 98 Raises CannotReadDataFile if it does not recognize the file 99 format. 100 """ 101 try: 102 hdr, data = gpkimg.read(f) 103 except IOError, x: 104 raise CannotReadDataFile(x.args[0]) 105 106 hdr['_NAME'] = f 107 return gpk_img(hdr, data)
108 109 110 CacheSize = 10*1024*1024 111 _csize = 0 112 _cache = {}
113 -def CachedRead(f):
114 """Reads in a data file (f is the name) and returns a 115 class gpk_img object. File names may end in .Z or .gz 116 if the data is compressed. 117 Raises CannotReadDataFile if it does not recognize the file 118 format. 119 """ 120 global _csize, _cache, CacheSize 121 try: 122 return _cache[f].copy() 123 except KeyError: 124 pass 125 while _csize >= CacheSize and len(_cache): 126 x = _cache.pop() 127 _csize -= x.n[1]*x.n[2] 128 if len(_cache) == 0 or _csize < 0: 129 _csize = 0 130 tmp = read(f) 131 _cache[f] = tmp 132 _csize += tmp.n[1]*tmp.n[2] 133 return tmp.copy()
134 135 136
137 -class gpk_img:
138 """This class describes a data file. It contains header 139 information and data. The header information follows 140 the NASA FITS standard closely with a few extensions. 141 142 Header items: 143 _NAME -- the filename from which the data came, 144 _FILETYPE -- a string indicating the format of the data file 145 146 Each instance has two attributes that are intended for general use: 147 hdr -- a dictionary containing all the header information, and 148 d -- a 2-dimensional numpy array containing the data. 149 """ 150
151 - def __init__(self, hdr, data=None):
152 """Creates an gpk_img object from 153 hdr (a dictionary that describes a data array) 154 and the data (a Numeric 2-dimensional array). 155 """ 156 157 self.hdr = hdr.copy() 158 try: 159 self.d = Num.asarray(data, Num.Float) 160 except ValueError,x: 161 raise ValueError, '%s (you gave it %s)' % (str(x), str(type(data))) 162 163 if len(self.d.shape) == 1: 164 # self.d = Num.reshape(self.d, (self.d.shape[0], 1)) 165 self.d = Num.transpose([self.d]) 166 elif len(self.d.shape) == 2: 167 if(self.hdr.has_key('NAXIS')): 168 assert self.hdr['NAXIS'] in ['2', 2] 169 del self.hdr['NAXIS'] 170 else: 171 raise ValueError, 'Needs 1 or 2 dimensional arrays' 172 173 # self.n = (None, string.atoi(d.get('NAXIS1', '1')), 174 # string.atoi(d.get('NAXIS2', '1'))) 175 self.n = (None, self.d.shape[1], self.d.shape[0]) 176 # _del(self.hdr, 'NAXIS1') 177 # _del(self.hdr, 'NAXIS2') 178 self.cdelt = (None, float(hdr.get('CDELT1', 'NaN')), 179 float(hdr.get('CDELT2', 'NaN'))) 180 # _del(self.hdr, 'CDELT1') 181 # _del(self.hdr, 'CDELT2') 182 self.crpix = (None, float(hdr.get('CRPIX1', 'NaN')), 183 float(hdr.get('CRPIX2', 'NaN'))) 184 # _del(self.hdr, 'CRPIX1') 185 # _del(self.hdr, 'CRPIX2') 186 self.crval = (None, float(hdr.get('CRVAL1', 'NaN')), 187 float(hdr.get('CRVAL2', 'NaN'))) 188 # _del(self.hdr, 'CRVAL1') 189 # _del(self.hdr, 'CRVAL2') 190 self.types = _pull_types(self.hdr, self.n[1])
191 192
193 - def copy(self):
194 return gpk_img(self.hdr, Num.array(self.d, Num.Float, copy=True))
195
196 - def add_history(self, h):
197 """Adds an item to the file's history.""" 198 if 'HISTORY' in self.hdr: 199 self.hdr['HISTORY'] += '\n%s' % h.strip() 200 else: 201 self.hdr['HISTORY'] = h.strip()
202
203 - def get_history(self):
204 """Returns the file's history as a list of strings.""" 205 return self.hdr.get('HISTORY', '').split('\n')
206
207 - def empty(self):
208 """Returns True if there is no data.""" 209 return self.d.shape[0]<=0 or self.d.shape[1]<=0 or self.n[1]<=0 or self.n[2]<=0
210
211 - def index(self, t, i):
212 """What is the i^th index if the i^th coordinate = t? 213 More plainly, this function gives you the index that 214 corresponds to the coordinate, and 'i' selects which index. 215 (i=1 or 2). 216 """ 217 assert self.n[i]>0 218 assert self.cdelt[i] != 0 219 tmp = (self.crpix[i]-1) + (t-self.crval[i])/self.cdelt[i] 220 if hasattr(t, 'shape'): 221 if Num.sometrue(Num.less(tmp, 0.0)) and Num.sometrue(Num.greater(tmp,self.n[i]-1)): 222 raise IndexError, "coordinate out of range" 223 elif tmp<0 or tmp>=self.n[i]-1: 224 raise IndexError, "coordinate out of range: %g vs. (%g, %g)" % (t, self.coord(0, i), self.coord(self.n[i]-1,i)) 225 return tmp
226
227 - def coord(self, index, i):
228 """What is the i^th coordinate if the i^th index==index? 229 More plainly, this function gives you the coordinate that 230 corresponds to the index, and 'i' selects which index. 231 If index=None, it returns an array of all the coordinates. 232 (i=1 or 2). 233 """ 234 assert self.n[i]>0 235 if index is None: 236 index = Num.arrayrange(self.n[i]) 237 return self.crval[i] + (index-(self.crpix[i]-1))*self.cdelt[i]
238
239 - def x_index(self, x):
240 """Returns the y-index for a particular x-coordinate.""" 241 return self.index(x, 1)
242
243 - def y_index(self, y):
244 """Returns the y-index for a particular y-coordinate. 245 Also used for t-index, which returns the index 246 for a particular time, if the data is a time-series. 247 """ 248 return self.index(y, 2)
249
250 - def x(self, index = None):
251 """Returns the x-coordinate for a particular x-index.""" 252 return self.coord(index, 1)
253
254 - def y(self, index = None):
255 """Returns the y-coordinate for a particular y-index.""" 256 return self.coord(index, 2)
257 258 259
260 - def write(self, f):
261 """Write out a data file, with the information in this class.""" 262 d = self.hdr.copy() 263 sh = self.d.shape 264 265 if len(sh) == 2: 266 data = self.d 267 else: 268 raise RuntimeError, '2 dimensional arrays only' 269 270 nc = self.d.shape[1] 271 for i in (1,2): 272 si = str(i) 273 d['NAXIS' + si] = str(sh[i-1]) 274 d['CDELT' + si] = str(self.cdelt[i]) 275 d['CRPIX' + si] = str(self.crpix[i]) 276 d['CRVAL' + si] = str(self.crval[i]) 277 for k in d.keys(): 278 if k.startswith('TTYPE'): 279 cs = k[len('TTYPE'):] 280 col = int(cs) 281 if col<=0 or col>nc: 282 self._wipe(col) 283 avpairs = [ (x[0], str(x[1])) for x in d.items() ] 284 # print "# avpairs=", avpairs 285 gpkimg.write(f, avpairs, data)
286 287
288 - def __len__(self):
289 """This is here to make it possible to access the class 290 as a (header,data) tuple. 291 """ 292 return 2 # header, data
293 294
295 - def __getitem__(self, key):
296 """This is here to make it possible to access the class 297 as a (header,data) tuple. 298 """ 299 return (self.hdr, self.d)[key]
300 301 302 # Below is time-series stuff. Should be a seperate class. 303
304 - def time(self, index = None):
305 """What time corresponds to a certain index? 306 (This is the same as self.y(index).)""" 307 return self.coord(index, 2)
308 309 t_index = y_index 310 311
312 - def start(self):
313 """Starting time of the data.""" 314 return self.time(0)
315
316 - def end(self):
317 """Ending time of the data.""" 318 return self.time(self.n[2]-1)
319 320
321 - def dt(self):
322 """Assuming time flows down a column (axis=2), what's the time interval?""" 323 return self.cdelt[2]
324 325
326 - def column(self, s=None):
327 """Return the requested column of data. 328 (This is the actual data, not a copy.) 329 If s is a string, it returns the column which has TTYPEx=s. 330 Otherwise, if s is an integer, it returns that column 331 (zero-based index). 332 As a special case, if there is only one column, 333 and the function is called with s=None (the default), 334 it will return the data. 335 """ 336 337 if s is None and self.d.shape[1]==1: 338 return self.d[:,0] 339 340 try: 341 ic = self.types[s] 342 except KeyError: 343 if '_NAME' in self.hdr: 344 xargs = 'file=%s; column=%s' % ( 345 self.hdr['_NAME'], str(s)) 346 else: 347 xargs = 'column=%s' % str(s) 348 raise NoSuchColumn, xargs 349 return self.d[:,ic]
350 351
352 - def _idxhdrset(self, prefix, index, value):
353 a = '%s%d' % (prefix, index+1) 354 if value is not None: 355 self.hdr[a] = value 356 else: 357 _del(self.hdr, a)
358 359
360 - def _wipe(self, col):
361 nd = 0 362 for x in ('TTYPE', 'TUNIT', 'TRANGE'): 363 nd += _del(self.hdr, '%s%d' % (x, col+1)) 364 return nd
365 366
367 - def register(self, ttype, column, trange=None, tunit=None):
368 """This sets up the TTYPE fields so that 369 the specified datatype lives in the specified 370 column. Column is zero-based. 371 You can erase by specifying ttype=None. 372 """ 373 # print 'REGISTER', ttype, column, trange, tunit 374 assert isinstance(ttype, types.StringType) 375 assert isinstance(column, types.IntType) 376 377 # First, you find out the old type that was stored 378 # in the column. That needs to be deleted from self.types. 379 if self.hdr.has_key('TTYPE%d' % (column+1)): 380 oldtype = self.hdr['TTYPE%d' % (column+1)] 381 _del(self.types, oldtype) 382 # Then, you wipe out the information in the header: 383 self._wipe(column) 384 385 # Finally, you set things: 386 if ttype is not None: 387 # print 'setting types', ttype, column 388 self.types[ttype] = column 389 390 # print 'SETTING HDR', column, ttype 391 self._idxhdrset('TTYPE', column, ttype) 392 self._idxhdrset('TUNIT', column, tunit) 393 self._idxhdrset('TRANGE', column, trange) 394 self.types[column] = column
395 396
397 - def attach_col(self, data, ttype, trange=None, tunit=None, extra_hdr={}):
398 """Attach another column to an existing class. 399 This modifies the object it is called upon. 400 The data may or may not be copied in the process. 401 """ 402 assert len(data.shape) == 1 403 assert ttype is not None 404 assert self.d is None or data.shape[0] == self.d.shape[0] 405 self.hdr.update(extra_hdr) 406 if self.d is None: 407 self.d = Num.array([data]) 408 c = 0 409 elif ttype in self.types: 410 c = self.types[ttype] 411 self.d[:,c] = Num.asarray(data, Num.Float) 412 else: 413 self.d = Num.concatenate([self.d, Num.transpose([data])], axis=1) 414 c = self.d.shape[1]-1 415 self.register(ttype, c, trange=trange, tunit=tunit) 416 self.n = (None, self.d.shape[1], self.d.shape[0])
417 418
419 - def select_cols(self, columns, extra_hdr = {}):
420 """Select a set of columns in an existing class, 421 returning a new gpk_img class containing a subset of 422 the data. The data is copied; the new and old 423 objects share nothing. 424 """ 425 newdata = [] 426 for c in columns: 427 newdata.append( self.column(c) ) 428 n = gpk_img(self.hdr, Num.transpose(newdata)) 429 n.hdr.update( extra_hdr ) 430 nic = 0 431 for c in columns: 432 ic = self.types[c]+1 433 # print 'Registering %s (old col %d) into new col %d' % (c, ic, nic) 434 n.register(self.hdr.get('TTYPE%d' % ic, None), nic, 435 self.hdr.get('TRANGE%d' % ic, None), 436 self.hdr.get('TUNIT%d' % ic, None)) 437 nic += 1 438 439 while n._wipe(nic): # Clear out misleading TTYPE info. 440 nic += 1 441 442 return n
443