Package gmisclib :: Module gpkmisc
[frames] | no frames]

Source Code for Module gmisclib.gpkmisc

  1  from __future__ import with_statement 
  2  import os 
  3  import sys 
  4  import math 
  5  import time 
  6  import stat 
  7  import errno 
  8   
  9  import die 
 10   
 11  try: 
 12          from Numeric_gpk import N_maximum, N_minimum, N_median, N_mean_ad,      \ 
 13                          variance, stdev,        \ 
 14                          set_diag,       \ 
 15                          make_diag, limit, vec_variance, qform,  \ 
 16                          KolmogorovSmirnov, interp, interpN 
 17  except ImportError, _x: 
 18          if str(_x).startswith('cannot import name'): 
 19                  raise 
 20          pass 
 21   
 22   
23 -def median(x):
24 """ 25 @raise ValueError: if the input list is zero length. 26 """ 27 xx = sorted(x) 28 n = len(xx) 29 if not (n > 0): 30 raise ValueError, "No data to median." 31 return 0.5*(xx[n//2] + xx[(n-1)//2])
32 33
34 -def median_across(xl):
35 """ 36 @note: There is a version of this in Numeric_gpk that is more efficient when the input 37 is a list of numpy.ndarray vectors. 38 @raise ValueError: If the input vectors are different lengths. 39 @raise ValueError: see L{median}. 40 """ 41 rv = [] 42 tl = None 43 for (i, tup) in enumerate(zip(*xl)): 44 if tl is None: 45 tl = len(tup) 46 elif tl != len(tup): 47 raise ValueError, "Vector %d has a different length(%d) from the rest(%d)" % (i, len(tup), tl) 48 rv.append(median(tup)) 49 return rv
50 51
52 -def avg(x):
53 s = 0.0 54 n = 0 55 for t in x: 56 s += t 57 n += 1 58 return s/float(n)
59 60
61 -def median_ad(x):
62 """Median absolute deviation""" 63 medn = median(x) 64 return median( [ abs(t-medn) for t in x ] )
65 mad = median_ad 66 67
68 -def mean(x):
69 sum = 0.0 70 n = 0 71 for t in x: 72 sum += t 73 n += 1 74 return float(sum)/n
75 76
77 -def mean_stdev(x):
78 """ 79 @param x: A list of data to average. 80 @type x: list(float), typically. 81 @return: the mean and standard deviation of the input list. 82 If there is only one datum, the standard deviation is 83 reported as None. 84 @rtype: (float, float) or (float, None) 85 @raise ValueError: If there is no data. 86 """ 87 sum = 0.0 88 n = len(x) 89 if not ( n>1 ): 90 raise ValueError, "Not enough data: n=0" 91 for t in x: 92 sum += t 93 mean = float(sum)/n 94 ss = 0.0 95 for t in x: 96 tmp = t - mean 97 ss += tmp*tmp 98 return (mean, math.sqrt(ss/(n-1)) if n>1 else None)
99 100 101
102 -def mean_ad(x):
103 medn = median(x) 104 sum = 0.0 105 for t in x: 106 sum += abs(t-medn) 107 return (medn, sum/float(len(x)))
108 109
110 -def geo_mean(*d):
111 """ 112 @raise ValueError: if any argument is negative. 113 @return: Geometric mean of its arguments. 114 @rtype: float 115 """ 116 s = 0.0 117 for t in d: 118 if t == 0.0: 119 return 0.0 120 elif t > 0.0: 121 s += math.log(t) 122 else: 123 raise ValueError, "Negative/NaN argument to geo_mean: %s" % str(t) 124 return math.exp(s/len(d))
125 126 127
128 -def entropy(x):
129 """Compute the entropy of a list of probabilities.""" 130 e = 0.0 131 ps = 0.0 132 for p in x: 133 assert p <= 1.0 134 if p > 0.0: 135 e -= p*math.log(p) 136 ps += p 137 assert 0.999 < ps < 1.001, "Probabilities must sum to one." 138 return e
139 140
141 -def resample(d):
142 """Bootstrap resampling. Call this many times: each one 143 returns a random resampling. 144 """ 145 import random 146 o = [] 147 for i in range(len(d)): 148 o.append( random.choice(d) ) 149 return o
150 151
152 -def jackknife(d):
153 """Jackknife resampling. Call this once. 154 It returns a list of deleted lists.""" 155 for drop in range(len(d)): 156 yield [ di for (i, di) in enumerate(d) if i!=drop ]
157 158 159 160 161 162
163 -def Student_t_dens(x, n):
164 """From p.337 Statistical Theory by Bernard Lindgren.""" 165 166 from transcendental import gamma 167 # http://bonsai.ims.u-tokyo.ac.jp/~mdehoon/software/python/statistics.html 168 p = gamma((n+1)/2.0) * (1+x*x/n)**(-(n+1)/2.0) / ( 169 math.sqrt(n*math.pi)*gamma(n/2.0) 170 ) 171 return p
172 173
174 -def log_Student_t_dens(x, n):
175 """From p.337 Statistical Theory by Bernard Lindgren.""" 176 assert n > 0 177 from gmisclib import stats 178 # http://bonsai.ims.u-tokyo.ac.jp/~mdehoon/software/python/statistics.html 179 # p = gamma((n+1)/2.0) * (1+x*x/n)**(-(n+1)/2.0) / ( 180 # math.sqrt(n*math.pi)*gamma(n/2.0) 181 # ) 182 lp = math.log(1+x*x/n)*(-(n+1)/2.0) 183 lp += stats.gammaln((n+1)/2.0) - stats.gammaln(n/2.0) 184 lp -= 0.5*math.log(n*math.pi) 185 return lp
186 187 188 189 190 _fcache = {}
191 -def log_factorial(n):
192 assert n >= 0 193 try: 194 lf = _fcache[n] 195 except KeyError: 196 lf = 0.0 197 for i in range(2,n): 198 lf += math.log(i) 199 _fcache[n] = lf 200 return lf
201 202
203 -def log_Combinations(n, m):
204 assert n >= m 205 assert m >= 0 206 return log_factorial(n) - log_factorial(m) - log_factorial(n-m)
207
208 -def ComplexMedian(P):
209 """P is a list of complex numbers. 210 This algorithm works by repeatedly stripping off the convex 211 hull of the points. 212 """ 213 import convex_hull2d 214 import cmath 215 import dictops 216 HUGE = 1e30 217 EPS = 1e-7 218 Q = dictops.dict_of_accums() 219 for p in P: 220 Q.add(p, 1.0) 221 222 while len(Q) > 3: 223 # print 'Q=', Q 224 edge = convex_hull2d.convexHull(Q.keys()) 225 # print 'edge=', edge 226 ee = (edge[-1],) + edge + (edge[0],) 227 wt = {} 228 for i in range(1,len(edge)+1): 229 em = ee[i] - ee[i-1] 230 ep = ee[i+1] - ee[i] 231 # angle = cmath.log(em).imag - cmath.log(ep).imag 232 if min(abs(ep), abs(em)) < EPS*max(abs(em), abs(ep)): 233 angle = math.pi/2.0 #Kluge, mild. Roundoff errors. 234 else: 235 angle = cmath.log(em/ep).imag 236 # KLUGE AND ROUNDOFF WARNING! 237 if angle <= 0.0 and angle > -0.5: 238 angle = EPS #KLUGE! Awful! 239 if angle < 0.0: 240 angle += 2*math.pi 241 if angle >= math.pi and angle<math.pi+0.5: 242 angle = math.pi-EPS # KLUGE! Awful! 243 # print 'angle=', angle, ee[i-1], ee[i], ee[i+1], 'pi=', math.pi 244 assert angle>=0 and angle<=math.pi, "angle=%g" % angle 245 wt[ee[i]] = angle 246 fmin = HUGE 247 for p in edge: 248 f = Q[p]/wt[p] 249 if f < fmin: 250 fmin = f 251 # print 'fmin=', fmin 252 assert fmin > 0.0 253 sum = complex() 254 swt = 0.0 255 for p in edge: 256 fwp = fmin * wt[p] 257 # print 'Subtracting wt of ', fwp, 'from Q[', p, ']=', Q[p] 258 swt += fwp 259 sum += p * fwp 260 if Q[p] > fwp+EPS: 261 Q[p] -= fwp 262 # print '\tQ[', p, ']=', Q[p] 263 else: # Q[p]<=fwp 264 assert abs(Q[p]-fwp) < EPS 265 # print 'Deleting Q[', p, ']=', Q[p] 266 del Q[p] 267 if len(Q) == 0: 268 return sum/swt 269 # print 'Qfinal=', Q 270 sum = complex() 271 w = 0.0 272 for (p,n) in Q.items(): 273 sum += p*n 274 w += n 275 return sum/w
276 277 278
279 -def testCM():
280 def eq(a, b): 281 tmp = abs(a-b)/(abs(a)+abs(b)) < 1e-6 282 if not tmp: 283 print 'eq fails: %s vs %s' % (str(a), str(b)) 284 return tmp
285 print 'CM' 286 assert eq(ComplexMedian([complex(1,0), complex(2,0), complex(3,0)]), 2) 287 assert eq(ComplexMedian([complex(1), complex(2), complex(3), complex(4)]), 2.5) 288 assert eq(ComplexMedian([complex(1), complex(2), complex(3), complex(4), complex(5)]), 3) 289 assert eq(ComplexMedian([complex(1), complex(2), complex(3), complex(4), complex(4)]), 3) 290 assert eq(ComplexMedian([complex(1,1), complex(2,2), complex(3,3), complex(4,4)]), 291 complex(2.5,2.5)) 292 assert eq(ComplexMedian([complex(0,0), complex(1,0), complex(0,1), complex(1,1)]), 293 complex(0.5,0.5)) 294 assert eq( ComplexMedian([complex(0,0), complex(1,0), complex(0,1), 295 complex(1,1), complex(1,1)]), 296 complex(1,1)) 297 assert eq( ComplexMedian([complex(0,0), complex(1,0), complex(0,1), 298 complex(1,1), complex(0.6,0.6)]), 299 complex(0.6,0.6)) 300 assert eq(ComplexMedian([complex(0,0), complex(1,0), complex(0,1), complex(1,1), 301 complex(0,0), complex(2,0), complex(0,2), complex(2,2)]), 302 complex(0.5,0.5)) 303 assert eq(ComplexMedian([complex(0,0), complex(1,0), complex(0,1)]), 304 complex(1./3., 1./3.)) 305 assert eq(ComplexMedian([complex(0,0), complex(1,0), complex(0,1), 306 complex(0,0), complex(2,0), complex(0,2)]), 307 complex(0.3, 0.3)) 308 import cmath 309 for N in [3, 4, 5, 6, 7, 8, 13, 40, 100, 2351]: 310 print 'N=', N 311 assert eq(ComplexMedian( [ 1+cmath.exp(2*cmath.pi*1j*float(q)/N) 312 for q in range(N) ]), 313 complex(1.0, 0.0) 314 ) 315 316 317 318 319 320 import Queue 321 import threading
322 -class threaded_readable_file(object):
323 QSIZE = 100 324 _string = type('') 325
326 - def __init__(self, fd):
327 self.q = Queue.Queue(self.QSIZE) 328 329 def rhelper(fd, q): 330 try: 331 for l in fd: 332 q.put(l) 333 q.put(None) 334 # while True: 335 # l = fd.readline() 336 # if l == '': 337 # q.put(None) 338 # break 339 # else: 340 # q.put(l) 341 342 except (Exception, KeyboardInterrupt): 343 q.put( sys.exc_info() )
344 345 self.rh = threading.Thread(target=rhelper, args=(fd, self.q)) 346 self.rh.start()
347 348
349 - def readline(self):
350 if self.q is None: 351 return '' 352 x = self.q.get() 353 if type(x) != self._string: 354 self.rh.join() 355 self.q = None 356 if x is not None: 357 raise x[0], x[1], x[2] 358 return '' 359 return x
360 361
362 - def readlines(self):
363 o = [] 364 while self.q is not None: 365 x = self.q.get() 366 if type(x) != self._string: 367 self.rh.join() 368 self.q = None 369 if x is not None: 370 raise x[0], x[1], x[2] 371 break 372 o.append( x ) 373 return o
374 375
376 - def read_iter(self):
377 while self.q is not None: 378 x = self.q.get() 379 if type(x) != self._string: 380 self.rh.join() 381 self.q = None 382 if x is not None: 383 raise x[0], x[1], x[2] 384 break 385 yield x
386 387 __iter__ = read_iter 388 389
390 -def thr_iter_read(fd):
391 """Read the contents of a file as an iterator. 392 The read is two-threaded, so that one thread can be 393 waiting on disk I/O while the other thread is 394 processing the results. 395 """ 396 x = threaded_readable_file(fd) 397 return x.read_iter()
398 399 400
401 -def makedirs(fname, mode=0775):
402 """This makes the specified directory, including all 403 necessary directories above it. It is like os.makedirs(), 404 except that if the directory already exists 405 it does not raise an exception. 406 @param fname:Name of the directory to create. 407 @param mode: Linux file permissions for any directories it needs to create. 408 @type fname: L{str} 409 @type mode: L{int} 410 @note: If the directory already exists, it does not force it to 411 have the specified C{mode}. 412 @raise OSError: If it cannot create a part of the directory chain. 413 """ 414 415 c = fname.split('/') 416 for i in range(1+(c[0]==''), len(c)+1): 417 p = '/'.join(c[:i]) 418 try: 419 os.mkdir(p, mode) 420 except OSError, e: 421 if e.errno != errno.EEXIST: 422 raise
423 424 425
426 -def shuffle_Nrep(y, n=1, compare=None):
427 """Shuffle a list, y, 428 so that no item occurs more than n times in a row. 429 Equality is determined by the comparison function compare returning zero. 430 """ 431 import random 432 assert n>0, "Silly!" 433 x = list(y) 434 random.shuffle(x) 435 m = len(x) 436 if compare is None: 437 compare = lambda a, b: a==b 438 439 passes = 0 440 restart = 0 441 while passes<1000: 442 prb = None 443 pstart = 0 444 reps = 0 445 # This look searches for repetitions of a line 446 # in the shuffled output: 447 for i in range(max(1, restart), m): 448 if compare(x[i-1], x[i]): 449 reps += 1 450 pstart = i - 1 451 else: 452 pstart = None 453 reps = 0 454 if reps >= n: 455 # Too many repetitions. 456 prb = i 457 break 458 if prb is None: 459 # If we passed the test and there aren't too many repetitions. 460 y[:] = x 461 return y 462 463 k = prb+1 464 # Our problem is a block of identical lines. We need to 465 # find some lines that are not identical, first. 466 found = False 467 while k < m: 468 if not compare(x[pstart], x[k]): 469 found = True 470 break 471 k += 1 472 if not found: 473 # Whoops! The remainder of the file is stuffed with 474 # identical lines. We need to bring in some lines from 475 # the beginning. 476 tmp = x.pop(0) 477 x.append(tmp) 478 restart = 0 479 continue 480 # Not that we have a block containing at least some non-identical lines, 481 # we shuffle that block. 482 a = pstart 483 b = k + 1 484 # print 'a,b=', a, b 485 tmp = x[a:b] 486 random.shuffle(tmp) 487 # print 'tmp=', x[a:b], '->', tmp 488 x[a:b] = tmp 489 restart = a 490 passes += 1 491 # Go back to the beginning to check for repetitions. 492 raise RuntimeError, 'Too many passes: cannot avoid repetitions'
493 494
495 -def testSNR():
496 import random 497 for i in range(20): 498 x = [ i//10 for i in range(20) ] 499 shuffle_Nrep(x, 1) 500 for i in range(len(x)-1): 501 assert x[i] != x[i+1], 'Whoops: i=%d, %d %d' % (i, x[i], x[i+1]) 502 x = [ i//3 for i in range(10000) ] 503 random.shuffle(x) 504 shuffle_Nrep(x, 1) 505 for i in range(len(x)-1): 506 assert x[i] != x[i+1], 'Whoops: i=%d, %d %d' % (i, x[i], x[i+1])
507 508 509
510 -class dir_lock(object):
511 - def __init__(self, lockname):
512 self.maxtries = 10 513 self.name = lockname 514 self.pid = os.getpid() 515 self.sleep = 2.0
516
517 - def __enter__(self):
518 locktries = 0 519 d = os.path.dirname(self.name) 520 if not d: 521 d = '.' 522 assert os.path.isdir(d), "%s is not a directory" % d 523 assert os.access(d, os.W_OK|os.X_OK), "%s is not writeable" % d 524 errs = set() 525 while locktries < self.maxtries: 526 try: 527 os.mkdir(self.name) 528 except OSError, x: 529 errs.add(str(x)) 530 locktries += 1 531 time.sleep(self.sleep * (((self.pid+97*locktries)%197) / 197.0)**2) 532 else: 533 d = None 534 break 535 if d is not None: 536 die.warn("Could not acquire lock %s in %d tries (%s): continuing anyway." % (self.name, self.maxtries, ','.join(sorted(errs)))) 537 self.name = None 538 return self
539
540 - def __exit__(self, exc_type, exc_value, traceback):
541 if self.name: 542 os.rmdir(self.name)
543 544 545
546 -def open_nowipe(nm1, nm2, mode="w"):
547 """Open a file (typically for writing) but make sure that the 548 file doesn't already exist. The name is constructed from 549 nm1, a sequence number, and nm2. The sequence number gets incremented 550 until a name is found that doesn't exist. 551 This works by creating a directory as a lock file; it should be safe across 552 NFS. 553 @note: The directory containing C{nm1} must exist and be writeable. 554 @param nm1: the part of the name to the left of the sequence number 555 @param nm2: the part of the name to the right of the sequence number 556 Typically, nm2 is a suffix like ".wav". This may not contain a slash. 557 @param mode: The way to open the file -- passed to open(). 558 @type nm1: str 559 @type nm2: str 560 @type mode: str 561 @rtype C{file} 562 @return: The opened L{file} object. (Its name can be gotten from the C{name} attribute.) 563 """ 564 dname = os.path.dirname(nm1) 565 lockf = os.path.join(dname, ".gmisclib.gpkmisc.nowipe_lock") 566 with dir_lock(lockf): 567 i = 0 568 try: 569 while True: 570 nm = '%s%d%s' % (nm1, i, nm2) 571 open(nm, 'r') 572 i += 1 573 except IOError: 574 pass 575 return open(nm, mode)
576 577
578 -def dropfront(prefix, s):
579 """Drops the prefix from the string and raises an exception 580 if it is not there to be dropped. 581 """ 582 if not s.startswith(prefix): 583 raise ValueError, "String '%s' must start with '%s'" % (s[:20], prefix) 584 return s[len(prefix):]
585 586
587 -def open_compressed(fn):
588 import g_pipe 589 590 if fn.endswith('.bz2'): 591 ci, co = g_pipe.popen2("bzcat", ['bzcat', fn]) 592 ci.close() 593 return co 594 elif fn.endswith('.gz'): 595 ci, co = g_pipe.popen2("zcat", ['zcat', fn]) 596 ci.close() 597 return co 598 return open(fn, 'r')
599 600
601 -def gammaln(x):
602 raise RuntimeError, "Please import gammaln from gmisclib.stats"
603 604 _a_factor_cache = {1:1, 2:2, 3:3} # THIS CACHE SHOULD BE LOCKED
605 -def a_factor(n): # Don't mess with private_start.
606 """Finds the smallest prime factor of a number.""" 607 608 try: 609 return _a_factor_cache[n] 610 except KeyError: 611 pass 612 for p in primes(): 613 if p*p > n: 614 f = n 615 break 616 if n%p == 0: 617 f = p 618 break 619 _a_factor_cache[n] = f 620 return f 621 622 623 _primes = [2, 3] # 2, 3 *must* be there. # THIS CACHE SHOULD BE LOCKED
624 -def primes():
625 """This is a generator that produces an infinite list of primes. 626 """ 627 for p in _primes: 628 yield p 629 i = _primes[-1] + 2 630 while True: 631 for p in primes(): 632 if p*p > i: 633 if p > _primes[-1]: # multithreading... 634 _primes.append(i) 635 yield i 636 if i%p == 0: 637 i += 2 638 break
639 640 641 _factor_cache = {} # THIS CACHE SHOULD BE LOCKED
642 -def factor(n): # Don't mess with private_start.
643 """Factor a number into a list of prime factors, 644 in increasing order. 645 @param n: input number 646 @type n: int 647 @return: prime factors 648 @rtype: list 649 """ 650 651 try: 652 return _factor_cache[n] 653 except KeyError: 654 pass 655 f = a_factor(n) 656 if f == n: 657 tmp = [n] 658 else: 659 tmp = [f] + factor(n//f) 660 _factor_cache[n] = tmp[:] 661 return tmp 662 663
664 -def test_primes():
665 assert factor(11) == [11] 666 assert factor(16) == [2, 2, 2, 2] 667 assert factor(100) == [2, 2, 5, 5] 668 assert factor(2) == [2] 669 assert factor(97) == [97] 670 assert factor(55) == [5, 11] 671 assert gcd(5,3)==1 672 assert gcd(14, 21)==7 673 assert gcd(100, 70)==10
674
675 -def gcd(a, b):
676 """Greatest common factor/denominator. 677 @type a: int 678 @type b: int 679 @rtype: int 680 @return: the greatest common factor of a and b. 681 """ 682 assert a >= 0 and b >= 0 683 while b != 0: 684 tmp = b 685 b = a % b 686 a = tmp 687 return a
688 689 690
691 -def find_in_PATH(progname):
692 """Search PATH to find where a program resides. 693 @param progname: the program to look for. 694 @type progname: str 695 @return: the full path name. 696 @rtype: str 697 """ 698 for p in os.environ['PATH'].split(':'): 699 tmp = os.path.join(p, progname) 700 if os.access(tmp, os.R_OK|os.X_OK): 701 return tmp 702 return None
703 704 705
706 -def get_mtime(fn):
707 """Paired with L{need_to_recompute}(). These implement something like make, 708 where we figure out if we need to compute things based on the age of files. 709 This is used to get the age of the pre-requisites. 710 @param fn: a filename or a file 711 @type fn: str or file 712 @return: None (if the file doesn't exist) or its modification time. 713 @rtype: None or float 714 """ 715 try: 716 if isinstance(fn, file): 717 s = os.fstat(fn.fileno()) 718 else: 719 s = os.stat(fn) 720 except OSError: 721 return None 722 return s[stat.ST_MTIME]
723 724
725 -class PrereqError(ValueError):
726 - def __init__(self, *s):
727 ValueError.__init__(self, *s)
728
729 - def repl(self, x):
730 self.args = self.args[:-1] + (x,)
731 732
733 -def prereq_mtime(*tlist):
734 if None in tlist: 735 raise PrereqError("Prerequisite has not been computed yet" , tlist.index(None)) 736 return max(tlist)
737 738
739 -def need_to_recompute(fn, lazytime, size=-1):
740 """Paired with L{get_mtime}(). These implement something like make, 741 where we figure out if we need to compute things. 742 @param fn: a filename or a file 743 @type fn: C{str} or C{file} 744 @param lazytime: a time (as obtained from ST_MTIME in os.stat()). 745 If the file modification time of C{fn} is older than 746 C{lazytime}, recompute. 747 @type lazytime: C{None} or C{float} 748 @param size: recompute the file if it is smaller than C{size}. 749 Normally, this is used to recompute on empty output 750 files by setting C{size=0}. 751 @type size: int 752 @return: True if fn needs to be recomputed or if it doesn not exist. 753 @rtype: bool 754 """ 755 if lazytime is None and size<0: 756 return True 757 try: 758 if isinstance(fn, file): 759 s = os.fstat(fn.fileno()) 760 else: 761 s = os.stat(fn) 762 except OSError: 763 return True 764 if size and s[stat.ST_SIZE] <= size: 765 return True 766 return s[stat.ST_MTIME] <= lazytime
767 768
769 -def truncate(s, maxlen):
770 assert maxlen > 3 771 if len(s) < maxlen: 772 return s 773 return s[:maxlen-3] + '...'
774 775 776 777 #The algorithm comes from Handbook of Mathematical Functions, formula 7.1.26. 778 # Thanks to John D. Cook. 779 # http://stackoverflow.com/questions/457408/is-there-an-easily-available-implementation-of-erf-for-python
780 -def erf(x):
781 """erf(x)=(2/sqrt(pi))*integral{0 to x of exp(-t**2) dt}""" 782 # save the sign of x 783 sign = 1 784 if x < 0: 785 sign = -1 786 x = -x 787 788 # constants 789 a1 = 0.254829592 790 a2 = -0.284496736 791 a3 = 1.421413741 792 a4 = -1.453152027 793 a5 = 1.061405429 794 p = 0.3275911 795 796 # A&S formula 7.1.26 797 t = 1.0/(1.0 + p*x) 798 y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*math.exp(-x*x) 799 return sign*y # erf(-x) = -erf(x)
800 801 802
803 -def asinh(x):
804 """Inverse hyperbolic sine.""" 805 if x > 0: 806 sign = 1 807 elif x == 0: 808 sign = 0 809 else: 810 sign = -1 811 return sign*math.log(sign*x + math.sqrt(x*x + 1))
812 813
814 -def chooseP(x, p):
815 """Sample from a list with specified probabilities. 816 @param x: a list of things from which to sample 817 @type x: list(something) 818 @param p: a list of probabilities for sampling the corresponding 819 item of C{x}. 820 @type p: C{list(float)} 821 @return: a sample from C{x} 822 @rtype: whatever is inside C{x} 823 @raise AssertionError: Will (sometimes) detect negative probabilities, 824 or probabilities that sum to something other than one. 825 """ 826 import random 827 ps = 0.0 828 r = random.random() 829 for (xx, pp) in zip(x, p): 830 ps += pp 831 assert 0<=pp<=1.0 and ps<=1.0, "Bad probabilities" 832 if pp > r: 833 return xx 834 else: 835 r -= pp 836 raise AssertionError, "Probabilities sum to %g, not 1.0" % ps
837 838
839 -def misc_mode(lx):
840 """@return: The most common object from a list of arbitrary objects. 841 @param lx: a sequence of hashable objects. 842 """ 843 from gmisclib import dictops 844 c = dictops.dict_of_accums() 845 for x in lx: 846 c.add(x, 1) 847 vmax = 0 848 kmax = None 849 for (k, v) in c.items(): 850 if v > vmax: 851 kmax = k 852 vmax = v 853 if not (vmax > 0): 854 raise ValueError, "Empty list" 855 return kmax
856 857
858 -def distrib(key):
859 """ 860 @return: The release name of the linux distribution that you're running. 861 @rtype: str 862 """ 863 fname = '/etc/lsb-release' 864 for l in open(fname, 'r'): 865 l = l.strip() 866 x = l.split('=') 867 if len(x) == 2 and x[0]=='DISTRIB_%s' % key: 868 return x[1].strip() 869 raise KeyError, "Cannot find DISTRIB_%s in %s" % (key, fname)
870 871 872 873 if __name__ == '__main__': 874 testCM() 875 testSNR() 876 test_primes() 877