Package gmisclib :: Module entropy
[frames] | no frames]

Source Code for Module gmisclib.entropy

  1  #!/usr/bin/env python 
  2   
  3  import math 
  4  import random 
  5  from gmisclib import Num 
  6  from gmisclib import mcmc 
  7  from gmisclib import mcmc_helper 
  8  import gpkavg 
  9  from gmisclib import gpkmisc 
 10   
11 -def P(x):
12 assert Num.alltrue(x >= 0) 13 sx = Num.sum(x, axis=0) 14 return x/sx
15 16
17 -def multinomial_logp(x, cF):
18 assert len(x.shape) == 1 19 c, F = cF 20 assert len(c.shape) == 1 21 assert x.shape == c.shape 22 if Num.sometrue(x <= 0): 23 return None 24 sx = Num.sum(x, axis=0) 25 xe = x / sx 26 assert len(xe.shape) == 1 27 log_combinations = gpkmisc.log_factorial(Num.sum(c, axis=0)) 28 for cc in c: 29 log_combinations -= gpkmisc.log_factorial(cc) 30 normc = x.shape[0] * math.log(sx)**2 31 rv = (Num.sum(Num.log(xe)*c, axis=0) + log_combinations - normc) * F 32 return rv
33 34
35 -def multinomial_fixer(x, c):
36 EPS = 1e-8 37 while Num.sometrue( x <= 0.0): 38 for i in Num.nonzero( x <= 0.0): 39 x[i] = -x[i] + EPS 40 return x
41 42
43 -def information_gained(c, pc=None):
44 """Suppose there is an experiment where you have N conditions 45 and in each conditions there are M possible measurements. 46 The outcomes are represented by c[N,M] matrix of counts: 47 how many times you observe each possible measurement for each 48 condition. 49 50 We want to ask what's the information gained by taking 51 a measurement. We assume the probability of 52 each condition is a pc[N] vector, which defaults to the 53 frequencies in c. (We assume that the choice of condition is 54 not a random variable, but is made in advance, like most 55 experiments.) 56 """ 57 EPS = 1e-6 58 F = 1.0 59 60 c = Num.asarray(c, Num.Int) 61 N,M = c.shape 62 if pc is not None: 63 pc = Num.asarray(pc, Num.Float) 64 else: 65 print 'c=', c 66 print 'NS(c)', Num.sum(c+0.5, axis=1) 67 pc = Num.sum(c+0.5, axis=1)/Num.sum(c+0.5) 68 print 'pc=', pc 69 assert pc.shape == (N,) 70 assert abs(Num.sum(pc)-1) < EPS 71 72 K = 2*Num.sum(c) + 10 73 K2 = int(round(math.sqrt(K)) + 10) 74 pstart = (0.5+c) / Num.sum(0.5+c, axis=0) 75 tmpP = [] 76 for i in range(N): 77 pV = 0.1*Num.identity(M)/float(M)**1.5 78 xp = mcmc.bootstepper(multinomial_logp, pstart[i,:], pV, 79 c=(c[i,:],F), fixer=multinomial_fixer) 80 s = mcmc_helper.stepper(xp) 81 s.run_to_bottom() 82 print 'RTB' 83 s.run_to_ergodic(5.0) 84 print 'RTE' 85 tmp = [] 86 for j in range(K2): 87 s.run_to_ergodic(1.0) 88 tmp.append( P(xp.prms()) ) 89 tmpP.append(tmp) 90 print 'Q' 91 assert len(tmpP) == N 92 tmpe = [] 93 for j in range(K): 94 prior = Num.zeros((M,), Num.Float) 95 epost = 0.0 96 for i in range(N): 97 pi = random.choice(tmpP[i]) 98 assert abs(Num.sum(pi)-1.0) < 0.001 99 Num.add(prior, pi*pc[i], prior) 100 epost -= pc[i]*Num.sum(Num.log(pi)*pi) 101 assert abs(Num.sum(prior)-1.0) < 0.001 102 eprior = -Num.sum(Num.log(prior)*prior) 103 tmpe.append( eprior - epost ) 104 print 'R' 105 avg, sigma = gpkavg.avg(tmpe, None, 0.0001) 106 return (avg, sigma)
107 108 if __name__ == '__main__': 109 try: 110 import psyco 111 psyco.full() 112 except ImportError: 113 pass 114 print information_gained( 115 [[0,0,10],[0,0,10],[0,0,10]] 116 ) 117