1 from __future__ import with_statement
2 import os
3 import sys
4 import math
5 import time
6 import stat
7 import errno
8
9 import die
10
11 try:
12 from Numeric_gpk import N_maximum, N_minimum, N_median, N_mean_ad, \
13 variance, stdev, \
14 set_diag, \
15 make_diag, limit, vec_variance, qform, \
16 KolmogorovSmirnov, interp, interpN
17 except ImportError, _x:
18 if str(_x).startswith('cannot import name'):
19 raise
20 pass
21
22
32
33
50
51
53 s = 0.0
54 n = 0
55 for t in x:
56 s += t
57 n += 1
58 return s/float(n)
59
60
65 mad = median_ad
66
67
75
76
78 """
79 @param x: A list of data to average.
80 @type x: list(float), typically.
81 @return: the mean and standard deviation of the input list.
82 If there is only one datum, the standard deviation is
83 reported as None.
84 @rtype: (float, float) or (float, None)
85 @raise ValueError: If there is no data.
86 """
87 sum = 0.0
88 n = len(x)
89 if not ( n>1 ):
90 raise ValueError, "Not enough data: n=0"
91 for t in x:
92 sum += t
93 mean = float(sum)/n
94 ss = 0.0
95 for t in x:
96 tmp = t - mean
97 ss += tmp*tmp
98 return (mean, math.sqrt(ss/(n-1)) if n>1 else None)
99
100
101
108
109
111 """
112 @raise ValueError: if any argument is negative.
113 @return: Geometric mean of its arguments.
114 @rtype: float
115 """
116 s = 0.0
117 for t in d:
118 if t == 0.0:
119 return 0.0
120 elif t > 0.0:
121 s += math.log(t)
122 else:
123 raise ValueError, "Negative/NaN argument to geo_mean: %s" % str(t)
124 return math.exp(s/len(d))
125
126
127
129 """Compute the entropy of a list of probabilities."""
130 e = 0.0
131 ps = 0.0
132 for p in x:
133 assert p <= 1.0
134 if p > 0.0:
135 e -= p*math.log(p)
136 ps += p
137 assert 0.999 < ps < 1.001, "Probabilities must sum to one."
138 return e
139
140
142 """Bootstrap resampling. Call this many times: each one
143 returns a random resampling.
144 """
145 import random
146 o = []
147 for i in range(len(d)):
148 o.append( random.choice(d) )
149 return o
150
151
153 """Jackknife resampling. Call this once.
154 It returns a list of deleted lists."""
155 for drop in range(len(d)):
156 yield [ di for (i, di) in enumerate(d) if i!=drop ]
157
158
159
160
161
162
164 """From p.337 Statistical Theory by Bernard Lindgren."""
165
166 from transcendental import gamma
167
168 p = gamma((n+1)/2.0) * (1+x*x/n)**(-(n+1)/2.0) / (
169 math.sqrt(n*math.pi)*gamma(n/2.0)
170 )
171 return p
172
173
175 """From p.337 Statistical Theory by Bernard Lindgren."""
176 assert n > 0
177 from gmisclib import stats
178
179
180
181
182 lp = math.log(1+x*x/n)*(-(n+1)/2.0)
183 lp += stats.gammaln((n+1)/2.0) - stats.gammaln(n/2.0)
184 lp -= 0.5*math.log(n*math.pi)
185 return lp
186
187
188
189
190 _fcache = {}
192 assert n >= 0
193 try:
194 lf = _fcache[n]
195 except KeyError:
196 lf = 0.0
197 for i in range(2,n):
198 lf += math.log(i)
199 _fcache[n] = lf
200 return lf
201
202
207
276
277
278
280 def eq(a, b):
281 tmp = abs(a-b)/(abs(a)+abs(b)) < 1e-6
282 if not tmp:
283 print 'eq fails: %s vs %s' % (str(a), str(b))
284 return tmp
285 print 'CM'
286 assert eq(ComplexMedian([complex(1,0), complex(2,0), complex(3,0)]), 2)
287 assert eq(ComplexMedian([complex(1), complex(2), complex(3), complex(4)]), 2.5)
288 assert eq(ComplexMedian([complex(1), complex(2), complex(3), complex(4), complex(5)]), 3)
289 assert eq(ComplexMedian([complex(1), complex(2), complex(3), complex(4), complex(4)]), 3)
290 assert eq(ComplexMedian([complex(1,1), complex(2,2), complex(3,3), complex(4,4)]),
291 complex(2.5,2.5))
292 assert eq(ComplexMedian([complex(0,0), complex(1,0), complex(0,1), complex(1,1)]),
293 complex(0.5,0.5))
294 assert eq( ComplexMedian([complex(0,0), complex(1,0), complex(0,1),
295 complex(1,1), complex(1,1)]),
296 complex(1,1))
297 assert eq( ComplexMedian([complex(0,0), complex(1,0), complex(0,1),
298 complex(1,1), complex(0.6,0.6)]),
299 complex(0.6,0.6))
300 assert eq(ComplexMedian([complex(0,0), complex(1,0), complex(0,1), complex(1,1),
301 complex(0,0), complex(2,0), complex(0,2), complex(2,2)]),
302 complex(0.5,0.5))
303 assert eq(ComplexMedian([complex(0,0), complex(1,0), complex(0,1)]),
304 complex(1./3., 1./3.))
305 assert eq(ComplexMedian([complex(0,0), complex(1,0), complex(0,1),
306 complex(0,0), complex(2,0), complex(0,2)]),
307 complex(0.3, 0.3))
308 import cmath
309 for N in [3, 4, 5, 6, 7, 8, 13, 40, 100, 2351]:
310 print 'N=', N
311 assert eq(ComplexMedian( [ 1+cmath.exp(2*cmath.pi*1j*float(q)/N)
312 for q in range(N) ]),
313 complex(1.0, 0.0)
314 )
315
316
317
318
319
320 import Queue
321 import threading
323 QSIZE = 100
324 _string = type('')
325
327 self.q = Queue.Queue(self.QSIZE)
328
329 def rhelper(fd, q):
330 try:
331 for l in fd:
332 q.put(l)
333 q.put(None)
334
335
336
337
338
339
340
341
342 except (Exception, KeyboardInterrupt):
343 q.put( sys.exc_info() )
344
345 self.rh = threading.Thread(target=rhelper, args=(fd, self.q))
346 self.rh.start()
347
348
350 if self.q is None:
351 return ''
352 x = self.q.get()
353 if type(x) != self._string:
354 self.rh.join()
355 self.q = None
356 if x is not None:
357 raise x[0], x[1], x[2]
358 return ''
359 return x
360
361
363 o = []
364 while self.q is not None:
365 x = self.q.get()
366 if type(x) != self._string:
367 self.rh.join()
368 self.q = None
369 if x is not None:
370 raise x[0], x[1], x[2]
371 break
372 o.append( x )
373 return o
374
375
377 while self.q is not None:
378 x = self.q.get()
379 if type(x) != self._string:
380 self.rh.join()
381 self.q = None
382 if x is not None:
383 raise x[0], x[1], x[2]
384 break
385 yield x
386
387 __iter__ = read_iter
388
389
391 """Read the contents of a file as an iterator.
392 The read is two-threaded, so that one thread can be
393 waiting on disk I/O while the other thread is
394 processing the results.
395 """
396 x = threaded_readable_file(fd)
397 return x.read_iter()
398
399
400
402 """This makes the specified directory, including all
403 necessary directories above it. It is like os.makedirs(),
404 except that if the directory already exists
405 it does not raise an exception.
406 @param fname:Name of the directory to create.
407 @param mode: Linux file permissions for any directories it needs to create.
408 @type fname: L{str}
409 @type mode: L{int}
410 @note: If the directory already exists, it does not force it to
411 have the specified C{mode}.
412 @raise OSError: If it cannot create a part of the directory chain.
413 """
414
415 c = fname.split('/')
416 for i in range(1+(c[0]==''), len(c)+1):
417 p = '/'.join(c[:i])
418 try:
419 os.mkdir(p, mode)
420 except OSError, e:
421 if e.errno != errno.EEXIST:
422 raise
423
424
425
427 """Shuffle a list, y,
428 so that no item occurs more than n times in a row.
429 Equality is determined by the comparison function compare returning zero.
430 """
431 import random
432 assert n>0, "Silly!"
433 x = list(y)
434 random.shuffle(x)
435 m = len(x)
436 if compare is None:
437 compare = lambda a, b: a==b
438
439 passes = 0
440 restart = 0
441 while passes<1000:
442 prb = None
443 pstart = 0
444 reps = 0
445
446
447 for i in range(max(1, restart), m):
448 if compare(x[i-1], x[i]):
449 reps += 1
450 pstart = i - 1
451 else:
452 pstart = None
453 reps = 0
454 if reps >= n:
455
456 prb = i
457 break
458 if prb is None:
459
460 y[:] = x
461 return y
462
463 k = prb+1
464
465
466 found = False
467 while k < m:
468 if not compare(x[pstart], x[k]):
469 found = True
470 break
471 k += 1
472 if not found:
473
474
475
476 tmp = x.pop(0)
477 x.append(tmp)
478 restart = 0
479 continue
480
481
482 a = pstart
483 b = k + 1
484
485 tmp = x[a:b]
486 random.shuffle(tmp)
487
488 x[a:b] = tmp
489 restart = a
490 passes += 1
491
492 raise RuntimeError, 'Too many passes: cannot avoid repetitions'
493
494
496 import random
497 for i in range(20):
498 x = [ i//10 for i in range(20) ]
499 shuffle_Nrep(x, 1)
500 for i in range(len(x)-1):
501 assert x[i] != x[i+1], 'Whoops: i=%d, %d %d' % (i, x[i], x[i+1])
502 x = [ i//3 for i in range(10000) ]
503 random.shuffle(x)
504 shuffle_Nrep(x, 1)
505 for i in range(len(x)-1):
506 assert x[i] != x[i+1], 'Whoops: i=%d, %d %d' % (i, x[i], x[i+1])
507
508
509
512 self.maxtries = 10
513 self.name = lockname
514 self.pid = os.getpid()
515 self.sleep = 2.0
516
518 locktries = 0
519 d = os.path.dirname(self.name)
520 if not d:
521 d = '.'
522 assert os.path.isdir(d), "%s is not a directory" % d
523 assert os.access(d, os.W_OK|os.X_OK), "%s is not writeable" % d
524 errs = set()
525 while locktries < self.maxtries:
526 try:
527 os.mkdir(self.name)
528 except OSError, x:
529 errs.add(str(x))
530 locktries += 1
531 time.sleep(self.sleep * (((self.pid+97*locktries)%197) / 197.0)**2)
532 else:
533 d = None
534 break
535 if d is not None:
536 die.warn("Could not acquire lock %s in %d tries (%s): continuing anyway." % (self.name, self.maxtries, ','.join(sorted(errs))))
537 self.name = None
538 return self
539
540 - def __exit__(self, exc_type, exc_value, traceback):
541 if self.name:
542 os.rmdir(self.name)
543
544
545
547 """Open a file (typically for writing) but make sure that the
548 file doesn't already exist. The name is constructed from
549 nm1, a sequence number, and nm2. The sequence number gets incremented
550 until a name is found that doesn't exist.
551 This works by creating a directory as a lock file; it should be safe across
552 NFS.
553 @note: The directory containing C{nm1} must exist and be writeable.
554 @param nm1: the part of the name to the left of the sequence number
555 @param nm2: the part of the name to the right of the sequence number
556 Typically, nm2 is a suffix like ".wav". This may not contain a slash.
557 @param mode: The way to open the file -- passed to open().
558 @type nm1: str
559 @type nm2: str
560 @type mode: str
561 @rtype C{file}
562 @return: The opened L{file} object. (Its name can be gotten from the C{name} attribute.)
563 """
564 dname = os.path.dirname(nm1)
565 lockf = os.path.join(dname, ".gmisclib.gpkmisc.nowipe_lock")
566 with dir_lock(lockf):
567 i = 0
568 try:
569 while True:
570 nm = '%s%d%s' % (nm1, i, nm2)
571 open(nm, 'r')
572 i += 1
573 except IOError:
574 pass
575 return open(nm, mode)
576
577
579 """Drops the prefix from the string and raises an exception
580 if it is not there to be dropped.
581 """
582 if not s.startswith(prefix):
583 raise ValueError, "String '%s' must start with '%s'" % (s[:20], prefix)
584 return s[len(prefix):]
585
586
588 import g_pipe
589
590 if fn.endswith('.bz2'):
591 ci, co = g_pipe.popen2("bzcat", ['bzcat', fn])
592 ci.close()
593 return co
594 elif fn.endswith('.gz'):
595 ci, co = g_pipe.popen2("zcat", ['zcat', fn])
596 ci.close()
597 return co
598 return open(fn, 'r')
599
600
602 raise RuntimeError, "Please import gammaln from gmisclib.stats"
603
604 _a_factor_cache = {1:1, 2:2, 3:3}
606 """Finds the smallest prime factor of a number."""
607
608 try:
609 return _a_factor_cache[n]
610 except KeyError:
611 pass
612 for p in primes():
613 if p*p > n:
614 f = n
615 break
616 if n%p == 0:
617 f = p
618 break
619 _a_factor_cache[n] = f
620 return f
621
622
623 _primes = [2, 3]
625 """This is a generator that produces an infinite list of primes.
626 """
627 for p in _primes:
628 yield p
629 i = _primes[-1] + 2
630 while True:
631 for p in primes():
632 if p*p > i:
633 if p > _primes[-1]:
634 _primes.append(i)
635 yield i
636 if i%p == 0:
637 i += 2
638 break
639
640
641 _factor_cache = {}
643 """Factor a number into a list of prime factors,
644 in increasing order.
645 @param n: input number
646 @type n: int
647 @return: prime factors
648 @rtype: list
649 """
650
651 try:
652 return _factor_cache[n]
653 except KeyError:
654 pass
655 f = a_factor(n)
656 if f == n:
657 tmp = [n]
658 else:
659 tmp = [f] + factor(n//f)
660 _factor_cache[n] = tmp[:]
661 return tmp
662
663
665 assert factor(11) == [11]
666 assert factor(16) == [2, 2, 2, 2]
667 assert factor(100) == [2, 2, 5, 5]
668 assert factor(2) == [2]
669 assert factor(97) == [97]
670 assert factor(55) == [5, 11]
671 assert gcd(5,3)==1
672 assert gcd(14, 21)==7
673 assert gcd(100, 70)==10
674
676 """Greatest common factor/denominator.
677 @type a: int
678 @type b: int
679 @rtype: int
680 @return: the greatest common factor of a and b.
681 """
682 assert a >= 0 and b >= 0
683 while b != 0:
684 tmp = b
685 b = a % b
686 a = tmp
687 return a
688
689
690
692 """Search PATH to find where a program resides.
693 @param progname: the program to look for.
694 @type progname: str
695 @return: the full path name.
696 @rtype: str
697 """
698 for p in os.environ['PATH'].split(':'):
699 tmp = os.path.join(p, progname)
700 if os.access(tmp, os.R_OK|os.X_OK):
701 return tmp
702 return None
703
704
705
707 """Paired with L{need_to_recompute}(). These implement something like make,
708 where we figure out if we need to compute things based on the age of files.
709 This is used to get the age of the pre-requisites.
710 @param fn: a filename or a file
711 @type fn: str or file
712 @return: None (if the file doesn't exist) or its modification time.
713 @rtype: None or float
714 """
715 try:
716 if isinstance(fn, file):
717 s = os.fstat(fn.fileno())
718 else:
719 s = os.stat(fn)
720 except OSError:
721 return None
722 return s[stat.ST_MTIME]
723
724
728
730 self.args = self.args[:-1] + (x,)
731
732
734 if None in tlist:
735 raise PrereqError("Prerequisite has not been computed yet" , tlist.index(None))
736 return max(tlist)
737
738
740 """Paired with L{get_mtime}(). These implement something like make,
741 where we figure out if we need to compute things.
742 @param fn: a filename or a file
743 @type fn: C{str} or C{file}
744 @param lazytime: a time (as obtained from ST_MTIME in os.stat()).
745 If the file modification time of C{fn} is older than
746 C{lazytime}, recompute.
747 @type lazytime: C{None} or C{float}
748 @param size: recompute the file if it is smaller than C{size}.
749 Normally, this is used to recompute on empty output
750 files by setting C{size=0}.
751 @type size: int
752 @return: True if fn needs to be recomputed or if it doesn not exist.
753 @rtype: bool
754 """
755 if lazytime is None and size<0:
756 return True
757 try:
758 if isinstance(fn, file):
759 s = os.fstat(fn.fileno())
760 else:
761 s = os.stat(fn)
762 except OSError:
763 return True
764 if size and s[stat.ST_SIZE] <= size:
765 return True
766 return s[stat.ST_MTIME] <= lazytime
767
768
770 assert maxlen > 3
771 if len(s) < maxlen:
772 return s
773 return s[:maxlen-3] + '...'
774
775
776
777
778
779
781 """erf(x)=(2/sqrt(pi))*integral{0 to x of exp(-t**2) dt}"""
782
783 sign = 1
784 if x < 0:
785 sign = -1
786 x = -x
787
788
789 a1 = 0.254829592
790 a2 = -0.284496736
791 a3 = 1.421413741
792 a4 = -1.453152027
793 a5 = 1.061405429
794 p = 0.3275911
795
796
797 t = 1.0/(1.0 + p*x)
798 y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*math.exp(-x*x)
799 return sign*y
800
801
802
804 """Inverse hyperbolic sine."""
805 if x > 0:
806 sign = 1
807 elif x == 0:
808 sign = 0
809 else:
810 sign = -1
811 return sign*math.log(sign*x + math.sqrt(x*x + 1))
812
813
815 """Sample from a list with specified probabilities.
816 @param x: a list of things from which to sample
817 @type x: list(something)
818 @param p: a list of probabilities for sampling the corresponding
819 item of C{x}.
820 @type p: C{list(float)}
821 @return: a sample from C{x}
822 @rtype: whatever is inside C{x}
823 @raise AssertionError: Will (sometimes) detect negative probabilities,
824 or probabilities that sum to something other than one.
825 """
826 import random
827 ps = 0.0
828 r = random.random()
829 for (xx, pp) in zip(x, p):
830 ps += pp
831 assert 0<=pp<=1.0 and ps<=1.0, "Bad probabilities"
832 if pp > r:
833 return xx
834 else:
835 r -= pp
836 raise AssertionError, "Probabilities sum to %g, not 1.0" % ps
837
838
840 """@return: The most common object from a list of arbitrary objects.
841 @param lx: a sequence of hashable objects.
842 """
843 from gmisclib import dictops
844 c = dictops.dict_of_accums()
845 for x in lx:
846 c.add(x, 1)
847 vmax = 0
848 kmax = None
849 for (k, v) in c.items():
850 if v > vmax:
851 kmax = k
852 vmax = v
853 if not (vmax > 0):
854 raise ValueError, "Empty list"
855 return kmax
856
857
859 """
860 @return: The release name of the linux distribution that you're running.
861 @rtype: str
862 """
863 fname = '/etc/lsb-release'
864 for l in open(fname, 'r'):
865 l = l.strip()
866 x = l.split('=')
867 if len(x) == 2 and x[0]=='DISTRIB_%s' % key:
868 return x[1].strip()
869 raise KeyError, "Cannot find DISTRIB_%s in %s" % (key, fname)
870
871
872
873 if __name__ == '__main__':
874 testCM()
875 testSNR()
876 test_primes()
877