1 """Some of these functions, specifically f_value(), fprob(), betai(), and betacf(),
2 are taken from stats.py "A collection of basic statistical functions for python."
3 by Gary Strangman. They are copyright 1999-2007 Gary Strangman;
4 All Rights Reserved and released under the MIT license.
5
6 The rest is copyright Greg Kochanski 2009.
7 """
8
9 import math
10
11
13 """Returns an F-statistic given the following:
14 @param ER: error associated with the null hypothesis (the Restricted model)
15 @param EF: error associated with the alternate hypothesis (the Full model)
16 @param dfR: degrees of freedom the Restricted model (null hypothesis)
17 @param dfF: degrees of freedom associated with the Full model
18 @return: f-statistic (not the probability)
19 @rtype: float
20 """
21 if not ( ER >= EF ):
22 raise ValueError, "Error with the alternative hypothesis is larger: %s vs %s" % (ER, EF)
23 if not( dfR >= dfF ):
24 raise ValueError, "Degrees of freedom with the alternative hypothesis is larger: %s vs %s" % (dfR, dfF)
25 return ((ER-EF)/float(dfR-dfF) / (EF/float(dfF)))
26
27
28
29
30
31 -def fprob (dfnum, dfden, F):
32 """Returns the (1-tailed) significance level (p-value) of an F
33 statistic given the degrees of freedom for the numerator (dfR-dfF) and
34 the degrees of freedom for the denominator (dfF).
35
36 Usage: lfprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn
37 """
38 if not (dfnum>0 and dfden>0 and F>=0.0):
39 raise ValueError, "Bad arguments to fprob(%s, %s, %s)" % (dfnum, dfden, F)
40 p = betai(0.5*dfden, 0.5*dfnum, dfden/float(dfden+dfnum*F))
41 return p
42
43
45 """Returns the incomplete beta function:
46
47 I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt)
48
49 where a>=0,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma
50 function of a. The continued fraction formulation is implemented here,
51 using the betacf function. (Adapted from: Numerical Recipes in C.)
52
53 Usage: betai(a,b,x)
54 """
55 if not (a>=0 and b>=0):
56 raise ValueError, 'Bad a=%s or b=%s in betai' % (a, b)
57 if not (x>0.0 or x>1.0):
58 raise ValueError, 'Bad x in betai'
59 if (x==0.0 or x==1.0):
60 bt = 0.0
61 else:
62 bt = math.exp(gammaln(a+b)-gammaln(a)-gammaln(b)+a*math.log(x)+b*
63 math.log(1.0-x))
64 if (x<(a+1.0)/(a+b+2.0)):
65 return bt*betacf(a,b,x)/float(a)
66 else:
67 return 1.0-bt*betacf(b,a,1.0-x)/float(b)
68
69
70
71
73 """This function evaluates the continued fraction form of the incomplete
74 Beta function, betai. (Adapted from: Numerical Recipes in C.)
75
76 Usage: betacf(a,b,x)
77 """
78 ITMAX = 200
79 EPS = 3.0e-7
80
81 bm = az = am = 1.0
82 qab = a+b
83 qap = a+1.0
84 qam = a-1.0
85 bz = 1.0-qab*x/qap
86 for i in range(ITMAX+1):
87 em = float(i+1)
88 tem = em + em
89 d = em*(b-em)*x/((qam+tem)*(a+tem))
90 ap = az + d*am
91 bp = bz+d*bm
92 d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem))
93 app = ap+d*az
94 bpp = bp+d*bz
95 aold = az
96 am = ap/bpp
97 bm = bp/bpp
98 az = app/bpp
99 bz = 1.0
100 if (abs(az-aold)<(EPS*abs(az))):
101 return az
102 raise RuntimeError, 'a or b too big, or ITMAX too small in Betacf.'
103
104
105
107 """Returns the gamma function of xx.
108 Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt.
109 (Adapted from: Numerical Recipies in C. via code
110 Copyright (c) 1999-2000 Gary Strangman and released under the LGPL.)
111 """
112 if not (x > 0):
113 raise ValueError, "Range Error: Gamma(0) is undefined."
114 coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516,
115 0.120858003e-2, -0.536382e-5]
116 tmp = x + 4.5
117 tmp -= (x-0.5)*math.log(tmp)
118 ser = 1.0
119 for cj in coeff:
120 ser += cj/x
121 x += 1
122 return -tmp + math.log(2.50662827465*ser)
123
124
125
126
127
128
129
130
131
132
134 N = 30000
135
136
137 mean = float(dfden)/float(dfden-2)
138 sigma = math.sqrt(float(2*dfden**2*(dfden+dfnum-2))/
139 float(dfnum*(dfden-2)**2*(dfden-4)))
140 assert abs(fprob(dfnum, dfden, 0.0)-1.0) < 0.001
141 assert abs(fprob(10, 20, 10.0)-0.0) < 0.001
142 m = 0.0
143 ss = 0.0
144 lpc = 1.0
145 lf = 0.0
146 ps = 0.0
147 i = 0
148 while lpc > 1.0/float(N):
149 nf = (mean+3*sigma)*((i+0.5)/float(N))**2
150 f = 0.5*(lf+nf)
151 lf = nf
152 pc = fprob(dfnum, dfden, nf)
153 dp = lpc-pc
154 lpc = pc
155 m += f*dp
156 ss += f**2 * dp
157 ps += dp
158 i += 1
159 m += lpc*f
160 ss += lpc*f**2
161 assert abs(ps-1.0) < 0.001
162 s = math.sqrt(ss-m**2)
163 print 'm=', m, mean
164 print 's=', s, sigma
165 assert abs(m-mean) < 0.03*mean
166 assert abs(s-sigma) < 0.03*sigma
167
168
169 t_Table = [(1, 0.5, 1.000), (1, 0.95, 12.71), (1, 0.98, 31.82), (1, 0.99, 63.66),
170 (1, 0.995, 127.3), (1, 0.998, 318.3), (1, 0.999, 636.6),
171 (2, 0.5, 0.816), (2, 0.95, 4.303), (2, 0.98, 6.965), (2, 0.99, 9.925),
172 (2, 0.999, 31.60),
173 (3, 0.5, 0.765), (3, 0.95, 3.182), (3, 0.99, 5.841), (3, 0.999, 12.92),
174 (4, 0.5, 0.741), (4, 0.95, 2.776), (4, 0.99, 3.797), (4, 0.999, 8.610),
175 (10, 0.5, 0.700), (10, 0.95, 2.228), (10, 0.99, 3.106), (10, 0.999, 4.587),
176 (120, 0.5, 0.677), (10, 0.95, 1.980), (120, 0.99, 2.617), (120, 0.999, 3.373),
177 (1e6, 0.5, 0.674), (1e6, 0.95, 1.960), (1e6, 0.99, 2.576), (1e6, 0.999, 3.291)
178 ]
179
181 for (n, p2s, t) in t_Table:
182 if n==ndof and p2s==p2sided:
183 return t
184 t_lower = None
185 n_lower = None
186 t_higher = None
187 n_higher = None
188 for (n, p2s, t) in t_Table:
189 if p2s==p2sided and n<=ndof and (n_lower is None or n>n_lower):
190 n_lower = n
191 t_lower = t
192 if p2s==p2sided and n>=ndof and (n_higher is None or n<n_higher):
193 n_higher = n
194 t_higher = t
195 if n_lower is not None and n_higher is not None:
196 return t_lower + ((t_higher-t_lower)*(math.log(n)-math.log(n_lower))
197 / (math.log(n_higher)-math.log(n_lower)))
198 raise RuntimeError, "Sorry: not implemented yet"
199
200
201
203 """
204
205 Lower tail quantile for standard normal distribution function.
206
207 This function returns an approximation of the inverse cumulative
208 standard normal distribution function. I.e., given P, it returns
209 an approximation to the X satisfying P = Pr{Z <= X} where Z is a
210 random variable from the standard normal distribution.
211
212 The algorithm uses a minimax approximation by rational functions
213 and the result has a relative error whose absolute value is less
214 than 1.15e-9.
215
216 @author: Peter John Acklam
217 @note: Time-stamp: 2000-07-19 18:26:14
218 @contact: pjacklam@online.no
219 @see: http://home.online.no/~pjacklam
220
221 @type p: float (0,1)
222 @rtype: float
223 @raise ValueError: if the argument is out of range.
224 @note: Downloaded from http://home.online.no/~pjacklam/notes/invnorm/impl/field/ltqnorm.txt
225 GPK 4/22/2011. Documentation at http://home.online.no/~pjacklam/notes/invnorm/index.html,
226 which is part of this package at .../references/stats_invnorm_pjacklam_2011.html.
227 @contact: Greg Kochanski <gpk@kochanski.org>
228 @note: Modified from the author's original perl code (original comments follow below)
229 by dfield@yahoo-inc.com. May 3, 2004.
230 """
231
232 if p <= 0 or p >= 1:
233
234 raise ValueError( "Argument to ltqnorm %f must be in open interval (0,1)" % p )
235
236
237 a = (-3.969683028665376e+01, 2.209460984245205e+02, \
238 -2.759285104469687e+02, 1.383577518672690e+02, \
239 -3.066479806614716e+01, 2.506628277459239e+00)
240 b = (-5.447609879822406e+01, 1.615858368580409e+02, \
241 -1.556989798598866e+02, 6.680131188771972e+01, \
242 -1.328068155288572e+01 )
243 c = (-7.784894002430293e-03, -3.223964580411365e-01, \
244 -2.400758277161838e+00, -2.549732539343734e+00, \
245 4.374664141464968e+00, 2.938163982698783e+00)
246 d = ( 7.784695709041462e-03, 3.224671290700398e-01, \
247 2.445134137142996e+00, 3.754408661907416e+00)
248
249
250 plow = 0.02425
251 phigh = 1 - plow
252
253
254 if p < plow:
255 q = math.sqrt(-2*math.log(p))
256 return (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) / \
257 ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1)
258
259
260 if phigh < p:
261 q = math.sqrt(-2*math.log(1-p))
262 return -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) / \
263 ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1)
264
265
266 q = p - 0.5
267 r = q*q
268 return (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q / \
269 (((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1)
270
271
272
273 if __name__ == '__main__':
274
275 test_fprob(2, 8)
276 test_fprob(4, 8)
277 test_fprob(7, 8)
278 test_fprob(7, 20)
279 test_fprob(2, 20)
280 test_fprob(18, 20)
281