1
2 """Solves various equations involving minimizing the
3 sum of absolute values of things.
4 """
5
6
7 import math
8 import numpy
9 from gmisclib import Numeric_gpk
10 from gmisclib import root
11 from gmisclib import weighted_percentile as WP
12
14 """We minimize the equation sum_over_i(abs(y0_i*(1-x) + y1_i*x))
15 to find the best x. Returns x.
16 I don't know if this is strictly correct.
17 """
18 n = len(y0)
19 assert len(y1)==n
20 yy0 = numpy.asarray(y0)
21 yy1 = numpy.asarray(y1)
22 slp = yy1-yy0
23 zro = -yy0/slp
24 i = numpy.argsort(zro)
25 zros = numpy.take(zro, i, axis=0)
26 slps = numpy.absolute(numpy.take(slp, i, axis=0))
27 islps = numpy.add.accumulate(slps)
28 isl_tgt = 0.5*islps[-1]
29 minidx = numpy.searchsorted(islps, [isl_tgt])
30 return zros[minidx]
31
32
34 """Solves for the line yhat = m*x + b that minimizes
35 sum(abs(y - yhat)). In other words, it's a fit to a straight line,
36 but a highly robust fit that will not be disturbed by outliers.
37 The algorithm is from Numerical Recipes, Volume 2.
38 @param epsx: a tolerance used in the iterative solution.
39 Eps is roughly the required accuracy of the x-intercept of the solution,
40 expressed as a fraction of the x-range of the data.
41 @param epsm: a tolerance used in the iterative solution. Roughly,
42 it is the accuracy of C{m} in the solution. Note the tangent() call!
43 @type epsx: C{float}
44 @type epsm: C{float}
45 @return: (mhat, bhat), where mhat*x+bhat is the best fit to the data.
46 @rtype: C{tuple(float,float)}
47 """
48 assert epsx > 0.0
49 x = numpy.asarray(x)
50 xbar = numpy.average(x)
51 x = x - xbar
52 y = numpy.asarray(y)
53 ybar = numpy.average(y)
54 y = y - ybar
55
56
57 def b(x, y, m):
58 b = y - m*x
59 return numpy.median(b)
60
61 mp2 = math.pi / 2.0
62
63 def to_be_zeroed(theta, xy):
64 x, y = xy
65
66
67 if theta == mp2:
68
69 return numpy.sum(-numpy.sign(x)*x)
70 if theta == -mp2:
71
72 return numpy.sum(x * numpy.sign(x))
73
74 m = math.tan(theta)
75 bhat = b(x, y, m)
76 tmp = numpy.sum(x * numpy.sign(y - m*x - bhat) )
77
78 return tmp
79
80 xtol = epsx*numpy.sum(numpy.absolute(x))
81 mhat = math.tan( root.root(to_be_zeroed, -mp2, mp2, (x, y),
82 epsm, xtol
83 )
84 )
85 return (mhat, b(x, y, mhat)+ybar-mhat*xbar)
86
87
88
90 """Solves for the line yhat = m*x + b that minimizes
91 sum(abs(y - yhat)). In other words, it's a fit to a straight line,
92 but a highly robust fit that will not be disturbed by outliers.
93 The algorithm is from Numerical Recipes, Volume 2.
94 @param epsx: a tolerance used in the iterative solution.
95 Eps is roughly the required accuracy of the x-intercept of the solution,
96 expressed as a fraction of the x-range of the data.
97 @param epsm: a tolerance used in the iterative solution. Roughly,
98 it is the accuracy of C{m} in the solution. Note the tangent() call!
99 @type epsx: C{float}
100 @type epsm: C{float}
101 @return: (mhat, bhat), where mhat*x+bhat is the best fit to the data.
102 @rtype: C{tuple(float,float)}
103 """
104 assert epsx > 0.0
105 if not numpy.greater_equal(wt, 0.0).all():
106 raise ValueError, "Weights must be non-negative."
107 sw = numpy.sum(wt)
108 if not (sw > 0.0):
109 raise ValueError, "Weights are NaN or all zero"
110 x = numpy.asarray(x)
111 xbar = numpy.sum(x*wt)/sw
112 x = x - xbar
113 y = numpy.asarray(y)
114 ybar = numpy.sum(y*wt)/sw
115 y = y - ybar
116
117 def b(x, y, wt, m):
118 b = y - m*x
119 return WP.wp(b, wt, [0.5])[0]
120
121 mp2 = math.pi / 2.0
122
123 def to_be_zeroed(theta, xyw):
124 x, y, wt = xyw
125
126
127 if theta == mp2:
128
129 return numpy.sum(-numpy.sign(x)*wt*x)/sw
130 if theta == -mp2:
131
132 return numpy.sum(x * numpy.sign(x) * wt)/sw
133
134 m = math.tan(theta)
135 bhat = b(x, y, wt, m)
136 tmp = numpy.sum(x * numpy.sign(y - m*x - bhat) * wt )/sw
137
138 return tmp
139
140 xtol = epsx * numpy.sum(numpy.absolute(x)*wt) / sw
141 mhat = math.tan( root.root(to_be_zeroed, -mp2, mp2, (x, y, wt),
142 epsm, xtol
143 )
144 )
145 return (mhat, b(x, y, wt, mhat)+ybar-mhat*xbar)
146
147
148
150 assert solve1([-1], [1]) == 0
151 assert solve1([0.5, 1], [0.1, 0]) == 1
152 assert solve1([1.1, 0.6], [0, 0.1]) == 1
153
154
156 m, b = solve_fit([0,1], [0,1])
157 assert abs(b-0.0)<0.0001 and abs(m-1.0)<0.0001
158 m, b = solve_fit([0,0], [1,1])
159 assert abs(b-1.0)<0.0001
160 m,b = solve_fit([0,1], [0,0.5])
161 assert abs(b-0.0)<0.0001 and abs(m-0.5)<0.0001
162 m,b = solve_fit([0,1], [1,3])
163 assert abs(b-1.0)<0.0001 and abs(m-2.0)<0.0001
164 m,b = solve_fit([0,1,2], [1,-1,-3])
165 assert abs(b-1.0)<0.0001 and abs(m+2.0)<0.0001
166 m,b = solve_fit([0.0, 1.0, 1.0, 2.0], [0.0, 0.0, 2.0, 2.0])
167 assert abs(b-0.0)<0.0001 and abs(m-1.0)<0.0001
168 m,b = solve_fit([0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0], [0.0, 1.0, 0.9, 2.0, 1.1, 2.0, 3.0, 4.0])
169 assert abs(b-1.0)<0.0001 and abs(m-1.0)<0.0001
170 m,b = solve_fit([0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 6.0, 7.0], [0.0, 1.0, 0.9, 2.0, 1.1, 6.0, 7.0, 8.0])
171 assert abs(b-1.0)<0.0001 and abs(m-1.0)<0.0001
172 m,b = solve_fit([0.0, 0.0, 0.0, 5.0, 6.0, 7.0], [1.0, 1.1, 0.9, 6.0, 7.0, 8.0])
173 assert abs(b-1.0)<0.0001 and abs(m-1.0)<0.0001
174
175 if __name__ == '__main__':
176 test1()
177 test2()
178