1
2 """This returns the entropy of a probability distribution that
3 produced a given sample.
4 """
5
6 import numpy
7 import mcmc
8 import mcmc_helper
9 import gpkavg
10 import math
11
12 import kl_dist
13
14
16 """Entropy of a probability distribution."""
17 rv = numpy.sum( p*numpy.log(p) ) / math.log(2)
18 return rv
19
20
22 """Entropy of a frequency distribution p.
23 Here, we assume that p is counts
24 derived from multinomial distributed data;
25 they are not normalized to one.
26 """
27
28 p = numpy.asarray(p, numpy.int)
29 if N is None:
30 N = p.shape[0]**2 * 30
31 assert numpy.sum(p) > 0
32 pstart = ((0.5+p)/numpy.sum(0.5+p))
33 pV = 0.1*numpy.identity(p.shape[0])/float(p.shape[0])**1.5
34 xp = mcmc.bootstepper(kl_dist.multinomial_logp, pstart, pV,
35 c=(p,F), fixer=kl_dist.multinomial_fixer)
36 mcmch = mcmc_helper.stepper(xp)
37 mcmch.run_to_bottom()
38 mcmch.run_to_ergodic(5.0)
39 o = []
40 while len(o) < N:
41 mcmch.run_to_ergodic(1.0/math.sqrt(N))
42 o.append( entropy_probs( kl_dist.P(xp.prms()) ) )
43 avg, sigma = gpkavg.avg(o, None, Clip)
44 return (-avg, sigma)
45
46
47
48 if __name__ == '__main__':
49 print entropy_vec([100,100,100.0,100.0])
50