gmisclib.solve_sum

1 # -*- coding: utf-8 -*- 2 """Solves various equations involving minimizing the 3 sum of absolute values of things. 4 """ 5 6 7 import math 8 import numpy 9 from gmisclib import Numeric_gpk 10 from gmisclib import root 11 from gmisclib import weighted_percentile as WP 12

13 -def solve1(y0, y1):

14 """We minimize the equation sum_over_i(abs(y0_i*(1-x) + y1_i*x)) 15 to find the best x. Returns x. 16 I don't know if this is strictly correct. 17 """ 18 n = len(y0) 19 assert len(y1)==n 20 yy0 = numpy.asarray(y0) 21 yy1 = numpy.asarray(y1) 22 slp = yy1-yy0 23 zro = -yy0/slp 24 i = numpy.argsort(zro) 25 zros = numpy.take(zro, i, axis=0) 26 slps = numpy.absolute(numpy.take(slp, i, axis=0)) 27 islps = numpy.add.accumulate(slps) 28 isl_tgt = 0.5*islps[-1] 29 minidx = numpy.searchsorted(islps, [isl_tgt]) 30 return zros[minidx]

31 32

33 -def solve_fit(x, y, epsx=1e-7, epsm=1e-7):

34 """Solves for the line yhat = m*x + b that minimizes 35 sum(abs(y - yhat)). In other words, it's a fit to a straight line, 36 but a highly robust fit that will not be disturbed by outliers. 37 The algorithm is from Numerical Recipes, Volume 2. 38 @param epsx: a tolerance used in the iterative solution. 39 Eps is roughly the required accuracy of the x-intercept of the solution, 40 expressed as a fraction of the x-range of the data. 41 @param epsm: a tolerance used in the iterative solution. Roughly, 42 it is the accuracy of C{m} in the solution. Note the tangent() call! 43 @type epsx: C{float} 44 @type epsm: C{float} 45 @return: (mhat, bhat), where mhat*x+bhat is the best fit to the data. 46 @rtype: C{tuple(float,float)} 47 """ 48 assert epsx > 0.0 49 x = numpy.asarray(x) 50 xbar = numpy.average(x) 51 x = x - xbar 52 y = numpy.asarray(y) 53 ybar = numpy.average(y) 54 y = y - ybar 55 56 57 def b(x, y, m): 58 b = y - m*x 59 return numpy.median(b)

60 61 mp2 = math.pi / 2.0 62 63 def to_be_zeroed(theta, xy): 64 x, y = xy 65 # print 'SSA: x=', x 66 # print 'SSA: y=', y 67 if theta == mp2: 68 # print 'SSA: tmp=', numpy.sum(x* -numpy.sign(x)), 'theta=', theta 69 return numpy.sum(-numpy.sign(x)*x) 70 if theta == -mp2: 71 # print 'SSA: tmp=', numpy.sum(x*numpy.sign(x)), 'theta=', theta 72 return numpy.sum(x * numpy.sign(x)) 73 74 m = math.tan(theta) 75 bhat = b(x, y, m) 76 tmp = numpy.sum(x * numpy.sign(y - m*x - bhat) ) 77 # print 'SSA: tmp=', tmp, 'm=', m, 'bhat=', bhat 78 return tmp 79 80 xtol = epsx*numpy.sum(numpy.absolute(x)) 81 mhat = math.tan( root.root(to_be_zeroed, -mp2, mp2, (x, y), 82 epsm, xtol 83 ) 84 ) 85 return (mhat, b(x, y, mhat)+ybar-mhat*xbar) 86 87 88

89 -def solve_fit_wt(x, y, wt, epsx=1e-7, epsm=1e-7):

90 """Solves for the line yhat = m*x + b that minimizes 91 sum(abs(y - yhat)). In other words, it's a fit to a straight line, 92 but a highly robust fit that will not be disturbed by outliers. 93 The algorithm is from Numerical Recipes, Volume 2. 94 @param epsx: a tolerance used in the iterative solution. 95 Eps is roughly the required accuracy of the x-intercept of the solution, 96 expressed as a fraction of the x-range of the data. 97 @param epsm: a tolerance used in the iterative solution. Roughly, 98 it is the accuracy of C{m} in the solution. Note the tangent() call! 99 @type epsx: C{float} 100 @type epsm: C{float} 101 @return: (mhat, bhat), where mhat*x+bhat is the best fit to the data. 102 @rtype: C{tuple(float,float)} 103 """ 104 assert epsx > 0.0 105 if not numpy.greater_equal(wt, 0.0).all(): 106 raise ValueError, "Weights must be non-negative." 107 sw = numpy.sum(wt) 108 if not (sw > 0.0): 109 raise ValueError, "Weights are NaN or all zero" 110 x = numpy.asarray(x) 111 xbar = numpy.sum(x*wt)/sw 112 x = x - xbar 113 y = numpy.asarray(y) 114 ybar = numpy.sum(y*wt)/sw 115 y = y - ybar 116 117 def b(x, y, wt, m): 118 b = y - m*x 119 return WP.wp(b, wt, [0.5])[0]

120 121 mp2 = math.pi / 2.0 122 123 def to_be_zeroed(theta, xyw): 124 x, y, wt = xyw 125 # print 'SSA: x=', x 126 # print 'SSA: y=', y 127 if theta == mp2: 128 # print 'SSA: tmp=', numpy.sum(x* -numpy.sign(x)), 'theta=', theta 129 return numpy.sum(-numpy.sign(x)*wt*x)/sw 130 if theta == -mp2: 131 # print 'SSA: tmp=', numpy.sum(x*numpy.sign(x)), 'theta=', theta 132 return numpy.sum(x * numpy.sign(x) * wt)/sw 133 134 m = math.tan(theta) 135 bhat = b(x, y, wt, m) 136 tmp = numpy.sum(x * numpy.sign(y - m*x - bhat) * wt )/sw 137 # print 'SSA: tmp=', tmp, 'm=', m, 'bhat=', bhat 138 return tmp 139 140 xtol = epsx * numpy.sum(numpy.absolute(x)*wt) / sw 141 mhat = math.tan( root.root(to_be_zeroed, -mp2, mp2, (x, y, wt), 142 epsm, xtol 143 ) 144 ) 145 return (mhat, b(x, y, wt, mhat)+ybar-mhat*xbar) 146 147 148

149 -def test1():

150 assert solve1([-1], [1]) == 0 151 assert solve1([0.5, 1], [0.1, 0]) == 1 152 assert solve1([1.1, 0.6], [0, 0.1]) == 1

153 154

155 -def test2():

156 m, b = solve_fit([0,1], [0,1]) 157 assert abs(b-0.0)<0.0001 and abs(m-1.0)<0.0001 158 m, b = solve_fit([0,0], [1,1]) 159 assert abs(b-1.0)<0.0001 160 m,b = solve_fit([0,1], [0,0.5]) 161 assert abs(b-0.0)<0.0001 and abs(m-0.5)<0.0001 162 m,b = solve_fit([0,1], [1,3]) 163 assert abs(b-1.0)<0.0001 and abs(m-2.0)<0.0001 164 m,b = solve_fit([0,1,2], [1,-1,-3]) 165 assert abs(b-1.0)<0.0001 and abs(m+2.0)<0.0001 166 m,b = solve_fit([0.0, 1.0, 1.0, 2.0], [0.0, 0.0, 2.0, 2.0]) 167 assert abs(b-0.0)<0.0001 and abs(m-1.0)<0.0001 168 m,b = solve_fit([0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0], [0.0, 1.0, 0.9, 2.0, 1.1, 2.0, 3.0, 4.0]) 169 assert abs(b-1.0)<0.0001 and abs(m-1.0)<0.0001 170 m,b = solve_fit([0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 6.0, 7.0], [0.0, 1.0, 0.9, 2.0, 1.1, 6.0, 7.0, 8.0]) 171 assert abs(b-1.0)<0.0001 and abs(m-1.0)<0.0001 172 m,b = solve_fit([0.0, 0.0, 0.0, 5.0, 6.0, 7.0], [1.0, 1.1, 0.9, 6.0, 7.0, 8.0]) 173 assert abs(b-1.0)<0.0001 and abs(m-1.0)<0.0001

174 175 if __name__ == '__main__': 176 test1() 177 test2() 178

Source Code for Module gmisclib.solve_sum_abs