Package gmisclib :: Module chunkio
[frames] | no frames]

Source Code for Module gmisclib.chunkio

  1  """These are I/O routines to allow you to write stuff 
  2  like arrays and dictionaries (and arrays of dictionaries) 
  3  to a human-readable file. 
  4   
  5  The format is normally STARTMARKER LENGTH_INFO DATA ENDMARKER, 
  6  where STARTMARKER is something like C{"a{"}, 
  7  that specifies you're at the beginning of an array. 
  8  LENGTH_INFO can depend on the data type, but it's normally 
  9  an integer.   Then comes the data, and finally as a check, 
 10  you encounter the ENDMARKER, which is normally C{"}"}. 
 11  All of this is recursive, of course. 
 12  """ 
 13   
 14  import string 
 15  import collections 
 16  import numpy 
 17   
 18  import g_encode 
 19   
 20  _e = g_encode.encoder(allowed=string.letters + string.digits + r"""<>?,./:";'{}[\]!@$^&*()_+=\|\\-""") 
 21  __version__ = "$Revision: 1.24 $" 
 22   
23 -def test_e():
24 assert _e.fwd(' ') == '%S' 25 assert _e.fwd('hello there') == 'hello%Sthere' 26 assert not _e.fwd('#x').startswith('#')
27 28
29 -class BadFileFormat(RuntimeError):
30 - def __init__(self, s):
31 RuntimeError.__init__(self, s)
32
33 -class chunk:
34 __doc__ = """Low level file I/O operations. 35 This class represents a sequence 36 of white-space separated chunks of data. 37 """ 38
39 - def __init__(self):
40 """Constructor.""" 41 pass
42 43
44 - def more(self):
45 """Returns zero if the data source is empty. 46 Returns nonzero if there is more data. 47 """ 48 raise RuntimeError, 'Virtual Function'
49 50
51 - def readchunk(self):
52 """Read in the next white-space delimited chunk of text.""" 53 raise RuntimeError, 'Virtual Function'
54 55
56 - def read_float(self):
57 tmp = self.readchunk() 58 if tmp != 'f:': 59 raise BadFileFormat, 'read_float: bad prefix' 60 return float(self.readchunk())
61 62
63 - def read_array(self, fcn):
64 """Read an array of data values. Raw text is converted to 65 finished array values by the specified fcn.""" 66 67 return self.read_array_of( lambda s: fcn(s.readchunk()) )
68
69 - def read_tuple(self, fcn):
70 """Read a tuple of data values. Raw text is converted to 71 finished array values by the specified fcn.""" 72 73 return tuple( self.read_array_of( lambda s: fcn(s.readchunk()) ) )
74 75 76 _array_prefix = 'a{' 77 _lap = len(_array_prefix) 78
79 - def read_array_of(self, fcn):
80 """Read an array of data values. Values are read 81 in by the specified fcn.""" 82 83 tmp = self.readchunk() 84 if tmp is None: 85 return None 86 if not tmp.startswith(self._array_prefix): 87 raise BadFileFormat, 'Array initial' 88 if len(tmp) > self._lap: 89 n = int(tmp[self._lap:]) 90 else: 91 n = int(self.readchunk()) 92 o = [ fcn(self) for i in range(n) ] 93 if self.readchunk() != '}': 94 raise BadFileFormat, 'Array final' 95 return o
96 97
98 - def read_dict(self, fcn):
99 """Read a dictionary of data values. Raw text is converted to 100 finished values by the specified fcn. Keys are strings.""" 101 102 return self.read_dict_of( lambda self, ff=fcn: ff(self.readchunk()) )
103 104 _dict_prefix = 'd{' 105 _ldp = len(_dict_prefix) 106
107 - def read_dict_of(self, fcn):
108 """Read a dictionary of data values. 109 Values are read in by the specified fcn. 110 This allows dictionaries of X, where X can be a complex datatype 111 like an array. Keys are strings.""" 112 113 tmp = self.readchunk() 114 if tmp is None: 115 return None 116 if not tmp.startswith('d{'): 117 raise BadFileFormat, 'Dict initial' 118 if len(tmp) > self._ldp: 119 n = int(tmp[self._ldp:]) 120 else: 121 n = int(self.readchunk()) 122 123 o = {} 124 for i in range(n): 125 k = self.readchunk() 126 v = fcn(self) 127 o[k] = v 128 if self.readchunk() != '}': 129 raise BadFileFormat, 'Dict final' 130 return o
131 132
133 - def groupstart(self):
134 tmp = self.readchunk() 135 if not tmp.startswith('g{'): 136 raise BadFileFormat, 'Group initial: "%s"' % tmp 137 return self.readchunk()
138 139
140 - def groupend(self):
141 if self.readchunk() != '}': 142 raise BadFileFormat, 'Group final'
143 144
145 - def read_NumArray(self):
146 tmp = self.readchunk() 147 if tmp != 'N{': 148 raise BadFileFormat, 'NumArray initial: "%s"' % tmp 149 sz = tuple(self.read_array(int)) 150 n = 1 151 for s in sz: 152 assert s > 0 and s < 100000 153 n *= s 154 try: 155 if len(sz) == 2: 156 d = numpy.zeros(sz) 157 for i in range(sz[0]): 158 for j in range(sz[1]): 159 d[i, j] = float(self.readchunk()) 160 else: 161 d = numpy.zeros((n,)) 162 for i in range(n): 163 d[i] = float(self.readchunk()) 164 d = numpy.reshape(d, sz) 165 except ValueError, x: 166 if str(x).startswith('invalid literal for float'): 167 raise BadFileFormat, 'Expected floats, got something else: %s' % str(x) 168 assert d.shape == sz 169 if self.readchunk() != '}': 170 raise BadFileFormat, 'NumArray final' 171 return d
172 173
174 -class datachunk(chunk):
175 __doc__ = """Low level file I/O operations. 176 This class represents a file as a sequence 177 of white-space separated chunks of data. 178 """ 179
180 - def __init__(self, fd):
181 """Constructor.""" 182 chunk.__init__(self) 183 # self.readiter = gpkmisc.threaded_readable_file(fd) 184 self.readiter = fd 185 self.next = collections.deque( self._get_next() )
186 187
188 - def _get_next(self):
189 while True: 190 nxt = self.readiter.readline() 191 if nxt == '': 192 return [] 193 if nxt.startswith('#'): 194 continue 195 nxt = nxt.strip() 196 if nxt == '': 197 continue 198 return nxt.split()
199 200
201 - def more(self):
202 """Returns False if the data source is empty. 203 @return: True if there is more data. 204 """ 205 if len(self.next): 206 return True 207 self.next.extend( self._get_next() ) 208 return len(self.next) > 0
209 210
211 - def readchunk(self):
212 """Read in the next white-space delimited chunk of text.""" 213 try: 214 return _e.back( self.next.popleft() ) 215 except IndexError: 216 self.next.extend( self._get_next() ) 217 if len(self.next) == 0: 218 return None 219 return _e.back( self.next.popleft() )
220 221
222 -class stringchunk(chunk):
223 __doc__ = """Low level operations: splitting a string into chunks. 224 This class represents a string as a sequence 225 of white-space separated chunks of data. 226 """ 227
228 - def __init__(self, s):
229 """Constructor.""" 230 chunk.__init__(self) 231 self.cache = s.split() 232 self.i = 0
233 234
235 - def more(self):
236 """@return: zero if the data source is empty; nonzero if there is more data. 237 """ 238 return self.i < len(self.cache)-1
239 240
241 - def readchunk(self):
242 """Read in the next white-space delimited chunk of text.""" 243 try: 244 tmp = _e.back(self.cache[self.i]) 245 self.i += 1 246 return tmp 247 except IndexError: 248 return None
249 250 251
252 -class chunk_w:
253 - def __init__(self):
254 pass
255 256
257 - def _indent(self):
258 pass
259 260
261 - def _dedent(self):
262 pass
263 264
265 - def writechunk(self, ch, b=0):
266 self.stringwrite(ch, b) 267 return self
268 269
270 - def write(self, ch, b=0):
271 raise RuntimeError, "Virtual function"
272 273
274 - def nl(self):
275 raise RuntimeError, "Virtual function"
276 277
278 - def comment(self, comment):
279 raise RuntimeError, "Virtual function"
280 281
282 - def close(self):
283 raise RuntimeError, "Virtual function"
284 285
286 - def write_None(self):
287 self.stringwrite("N") 288 return self
289 290
291 - def write_array_of(self, data, writer, b=0):
292 self.stringwrite("a{%d" % len(data), b) 293 self._indent() 294 for t in data: 295 writer(self, t) 296 self._dedent() 297 self.stringwrite("}", b) 298 return self
299 300
301 - def write_array(self, data, b=0, converter=str):
302 return self.write_array_of(data, 303 lambda s, x, cvt=converter: s.stringwrite(cvt(x)), 304 b)
305 306
307 - def write_dict_of(self, data, writer, b=1):
308 self.stringwrite("d{%d" % len(data), b) 309 self._indent() 310 for (k, v) in data.items(): 311 self.stringwrite(k) 312 writer(self, v) 313 self._dedent() 314 self.stringwrite("}", b) 315 return self
316 317
318 - def write_dict(self, data, b=0, converter=str):
319 return self.write_dict_of(data, 320 lambda s, v, c=converter: s.stringwrite(c(v)), 321 b)
322 323
324 - def groupstart(self, contents, b=1, comment=None):
325 """The 'contents' argument is conventionally a string 326 containing 'a' for an internal array, 'g' for a group, 327 'd' for a dictionary... It describes the contents of the group. 328 This is just used by the application to check what is in the 329 group, so it could contain any chunk. 330 """ 331 self.stringwrite("g{", b) 332 self._indent() 333 self.stringwrite(contents) 334 if comment: 335 self.comment(comment) 336 return self
337 338
339 - def groupend(self, b=1):
340 self.stringwrite("}", b) 341 self._dedent() 342 return self
343 344
345 - def write_NumArray(self, d, b=0, converter=str):
346 self.stringwrite("N{", b) 347 self.write_array(d.shape, b=0) 348 if len(d.shape) == 2: 349 for i in range(d.shape[0]): 350 for j in range(d.shape[1]): 351 self.stringwrite( converter(d[i, j]) ) 352 if d.shape[0] > 1: 353 self.nl() 354 else: 355 for t in numpy.ravel(d): 356 self.stringwrite( converter(t) ) 357 self.stringwrite("}", 0) 358 return self
359
360 - def write_float(self, d, b=0):
361 self.stringwrite("f:", b) 362 self.stringwrite(str(d), b=0) 363 return self
364
365 - def stringwrite(self, ch, b=0):
366 raise RuntimeError, 'Virtual Function'
367 368 369 370
371 -class datachunk_w(chunk_w):
372 __doc__ = """This writes stuff to a file.""" 373 374
375 - def __init__(self, fd, width=80):
376 chunk_w.__init__(self) 377 self.fd = fd 378 self.w = width 379 self.i = 0 380 self.indentlevel = 0 381 self.dentstring = ' '
382 383
384 - def _indent(self):
385 self.indentlevel += 1
386 387
388 - def _dedent(self):
389 assert self.indentlevel > 0 390 self.indentlevel -= 1
391 392
393 - def stringwrite(self, ch, b=0):
394 """ch is the chunk of text. b=1 to begin a new line.""" 395 assert isinstance(ch, str), "Non-string to stringwrite: <%s>" % str(ch) 396 n = len(ch) 397 if self.i == 0: 398 pass 399 elif b or (self.i > 0 and n+self.i > self.w): 400 self.nl() 401 else: 402 self.fd.write(' ') 403 self.i += 1 404 if self.i == 0: 405 self.fd.write(self.dentstring * self.indentlevel) 406 tmp = _e.fwd(ch) 407 self.fd.write(tmp) 408 self.i += len(tmp)
409 410
411 - def nl(self):
412 self.fd.write('\n') 413 self.i = 0
414 415
416 - def comment(self, comment):
417 if self.i > 0: 418 self.nl() 419 idl = max(0, self.indentlevel-1) 420 self.fd.write('#' + self.dentstring * idl + comment + '\n')
421 422
423 - def close(self):
424 if self.i > 0: 425 self.nl() 426 self.fd.flush() 427 # os.fsync(self.fd.fileno()) 428 self.fd = None # This will normally close the fd,
429 # unless it is held open by some other 430 # use. 431 432
433 - def __del__(self):
434 if self.fd is not None: 435 self.close()
436 437 438 439
440 -class chunkstring_w(chunk_w):
441 __doc__ = """This accumulates stuff in memory, and returns a string 442 when close() is called.""" 443
444 - def __init__(self):
445 chunk_w.__init__(self) 446 self.buf = []
447 448
449 - def stringwrite(self, ch, b=0):
450 self.buf.append(_e.fwd(ch))
451 452
453 - def nl(self):
454 pass
455 456
457 - def comment(self, comment):
458 pass
459 460
461 - def close(self):
462 o = ' '.join(self.buf) 463 self.buf = [] 464 return o
465 466 467
468 -def test():
469 # print chunkstring_w().write_array([1.3, 2.2], str).close() 470 assert stringchunk(chunkstring_w().writechunk('Hoo!#\n').close()).readchunk() == 'Hoo!#\n' 471 tmp = numpy.array([[1.0, -1.0], [1.6, 0.0], [2.0, 3.0]], numpy.float) 472 tmp1 = chunkstring_w().write_NumArray(tmp).close() 473 retmp = stringchunk(tmp1).read_NumArray() 474 assert retmp.shape == tmp.shape 475 diff = retmp - tmp 476 assert numpy.absolute(numpy.ravel(diff)).sum() < 1e-6
477 478 479 if __name__ == '__main__': 480 test_e() 481 test() 482