gmisclib.gpk

1 import numpy 2 3 # import gpkavg 4 from gmisclib import gpk_lsq 5 6

7 -class NotEnoughData(ValueError):

8 - def __init__(self, s):

9 ValueError.__init__(self, s)

10 11 12 _Float = numpy.dtype('float') 13

14 -class w_linear_least_squares(gpk_lsq.linear_least_squares):

15 """This solves the set of linear equations a*x = y, with weights w. 16 Normally, a.shape==(m,n) and y.shape==(m,q), 17 and the returned x.shape==(n,q). 18 where m is the number of constraints provided by the data, 19 n is the number of parameters to use in a fit 20 (equivalently, the number of basis functions), 21 and q is the number of separate sets of equations 22 that you are fitting. 23 Then, x has shape (n,q) and the_fit has shape (m,q). 24 Interpreting this as a linear regression, there are n 25 parameters in the model, and m measurements. 26 Q is the number of times you apply the model to a 27 different data set; each on yields a different solution. 28 29 This uses gpk_lsq.linear_least_squares as the underlying algorithm. 30 @note: Y may be a 1-D matrix (a vector), in which case 31 the fit is a vector. This is the normal 32 case where you are fitting one equation. 33 If y is a 2-D matrix, 34 each column (second index) in y is a separate fit, and 35 each column in the solution is a separate result. 36 """

37 - def __init__(self, a, y, w, minsv=None, minsvr=None, copy=True):

38 a = numpy.asarray(a, _Float) 39 if a.ndim != 2: 40 raise ValueError, "a needs to be 2-dimensional: shape=%s" % str(a.shape) 41 42 self.w = numpy.array(w, _Float, copy=copy) 43 if self.w.ndim != 1 and self.w.shape[0] != a.shape[0]: 44 raise ValueError, "w needs to be 1-dimensional and match a: shape=%s vs %s" % (str(self.w.shape), str(a.shape)) 45 46 aw = self.w[:,numpy.newaxis] * a 47 gpk_lsq.linear_least_squares.__init__(self, aw, None, minsv=minsv, 48 minsvr=minsvr, copy=False) 49 self.set_y(y, copy=copy)

50 51

52 - def fit(self, copy=False):

53 """ 54 @note: all elements of C{w} must be nonzero. 55 """ 56 f = gpk_lsq.linear_least_squares.fit(self, copy=False)/self.w 57 return f

58

59 - def y(self, copy=True):

60 y = gpk_lsq.linear_least_squares.y(self, copy=False)/self.w 61 return y

62

63 - def set_y(self, y, copy=True):

64 if y is not None: 65 y = self.w * y 66 gpk_lsq.linear_least_squares.set_y(self, y, copy=False)

67 68

69 - def residual(self):

70 r = self.y(copy=False) - self.fit(copy=False) 71 return r

72 73 74 75 76 # class robust_linear_fit(lls_base):

77 -class robust_linear_fit(w_linear_least_squares):

78 HTMIN = 3.0 79 STMIN = 1.5 80

81 - def __init__(self, a, y, sigma_y=None, minsv=None, minsvr=None, copy=True):

82 """This does bounded influence regression, with 83 a robust M-estimator in the y-direction. 84 """ 85 86 y = numpy.array(y, _Float, copy=copy) 87 assert y.ndim == 1 88 ndat = y.shape[0] 89 if sigma_y is None: 90 sigma_y = 1.0 91 if isinstance(sigma_y, (float, int)): 92 self.sigma_y = sigma_y * numpy.ones(y.shape, _Float) 93 else: 94 self.sigma_y = numpy.asarray(sigma_y, _Float) 95 assert self.sigma_y.ndim == 1 96 assert self.sigma_y.shape[0] == ndat 97 98 a = numpy.asarray(a, _Float) 99 assert a.ndim == 2 100 assert a.shape[0] == ndat 101 102 if a.shape[1] >= ndat: 103 raise NotEnoughData, "Not enough data to bound influence function" 104 105 w = 0.0 106 w_hat = numpy.ones(y.shape, _Float) 107 w_resid = numpy.ones(y.shape, _Float) 108 hthresh = 6*self.HTMIN 109 sthresh = 6*self.STMIN 110 endgame = False 111 while True: 112 last_w = w 113 w = w_hat * w_resid 114 if numpy.alltrue( numpy.absolute(w-last_w) < 0.01 ): 115 if endgame: 116 break 117 else: 118 hthresh = self.HTMIN 119 sthresh = self.STMIN 120 endgame = True 121 # print 'ENDGAME' 122 123 # print 'w=', w 124 # print 'y=', y 125 126 self.w = w 127 wsig = w/self.sigma_y 128 129 w_linear_least_squares.__init__(self, a, y, wsig, minsv=minsv, minsvr=minsvr) 130 131 ht = (1 - 2.0/y.shape[0])/hthresh 132 # print 'hat=', self.hat() 133 if numpy.sometrue(self.hat() >= 1.0): 134 raise NotEnoughData, "Cannot bound influence of point(s)." 135 hatfac = (1-self.hat())/ht 136 # print 'hatfac=', hatfac 137 w_hat = numpy.minimum(1.0, w_hat*hatfac**0.5) 138 # Keep largest weight at unity: 139 w_hat = w_hat/w_hat[numpy.argmax(w_hat)] 140 # print 'w_hat=', w_hat 141 142 normerr = (y - self.fit())/self.sigma_y 143 # print 'normerr=', normerr 144 # self.typdev = gpkavg.avg(numpy.absolute(normerr), None, 0.25)[0] 145 self.typdev = numpy.mean(numpy.absolute(normerr)) 146 new_w_resid = 1.0/numpy.hypot(1, normerr/(self.typdev * sthresh)) 147 w_resid = 0.5*(w_resid + new_w_resid) 148 wrs = numpy.sqrt( numpy.square(w_resid).sum(axis=0)/ndat ) 149 numpy.divide(w_resid, wrs, w_resid) 150 # print 'w_resid=', w_resid 151 152 hthresh = self.HTMIN + 0.5*(hthresh-self.HTMIN) 153 sthresh = self.STMIN + 0.5*(sthresh-self.STMIN)

154 # print 'ht, st=', hthresh, sthresh 155 # print "END" 156 157 158

159 -def test_w1():

160 basis = numpy.transpose([[1, 2, 3, 4, 5, 6, 7, 8, 9, 100], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) 161 y = numpy.array([0.0, 1, 2, 3, 4, 5, 6, 7, 18.01, 99]) 162 w = numpy.array([1, 3.2, 1, 1, 5, 1, 1, 1, .0001, 1]) 163 soln = w_linear_least_squares(basis, y, w) 164 print 'soln.y', soln.y() 165 assert (numpy.absolute(soln.y()-y)).sum() < 0.0001 166 print 'soln.x', soln.x() 167 print 'soln.fit', soln.fit() 168 assert 80 < numpy.absolute(soln.fit()).sum() < 200 169 assert soln.x().ndim == 1 170 assert soln.fit().ndim == 1 171 assert soln.rank() == 2 172 print 'residual=', soln.residual() 173 assert (numpy.absolute(soln.residual()) > 0.2).sum() == 1 174 assert (numpy.absolute(soln.residual()) > 10.2).sum() == 0 175 assert 0.5 < numpy.absolute(soln.residual()).sum() < 30 176 err = ((soln.fit()-y)**2).sum() 177 assert err>=0 178 assert (err > 0.02).sum() == 1 179 assert soln.sv().shape == (2,) 180 assert abs(soln.x()[0] - 1) < 0.01 181 assert abs(soln.x()[1] + 1) < 0.01 182 assert numpy.absolute(w-soln.w).sum() < 1e-6 183 assert numpy.absolute(numpy.dot(basis, soln.x())-soln.fit()).sum() < 1e-6

184 185 186

187 -def test_w1b():

188 basis = numpy.transpose([[1, 2, 3, 4, 5, 6, 7, 8, 9, 100], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) 189 y = numpy.array([0.01, 1, 2.01, 3, 3.99, 5, 6.01, 7, 88.01, 99]) 190 w = numpy.array([0.2, 1, 0.2, 5, 1, 0.2, 1, 2, .001, 0.01]) 191 soln = w_linear_least_squares(basis, y, w) 192 print 'soln.x', soln.x() 193 print 'soln.resid', soln.residual() 194 assert (numpy.absolute(soln.residual()) > 0.05).sum() == 1 195 assert abs(soln.x()[0] - 1) < 0.01 196 assert abs(soln.x()[1] + 1) < 0.01 197 print 'sf', soln.fit() 198 print 'mm', numpy.dot(basis, soln.x()) 199 assert numpy.absolute(numpy.dot(basis, soln.x()) -soln.fit()).sum() < 1e-6

200

201 -def test_r1():

202 basis = numpy.transpose([[1, 2, 3, 4, 5, 6, 7, 8, 9, 100], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) 203 y = numpy.array([0.01, 1, 1.99, 3, 4.01, 5, 5.99, 7, 88.01, 99]) 204 soln = robust_linear_fit(basis, y, 1) 205 print 'soln.x', soln.x() 206 assert soln.x().ndim == 1 207 assert soln.fit().ndim == 1 208 assert soln.rank() == 2 209 err = ((soln.fit()-y)**2).sum() 210 assert err>=0 211 assert (err > 0.02).sum() == 1 212 assert soln.sv().shape == (2,) 213 assert abs(soln.x()[0] - 1) < 0.06 214 assert abs(soln.x()[1] + 1) < 0.06 215 assert numpy.absolute(numpy.dot(basis, soln.x()) 216 -soln.fit()).sum() < 1e-5

217

218 -def test_r1hat():

219 basis = numpy.transpose([[1, 2, 3, 4, 5, 6, 7, 8, 9, 100], 220 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) 221 y = numpy.array([0.01, 1, 1.99, 3, 4.01, 5, 5.99, 7, 8, 90]) 222 soln = robust_linear_fit(basis, y, 1) 223 print 'soln.x', soln.x() 224 assert soln.rank() == 2 225 print 'resid=', soln.residual() 226 assert (numpy.absolute(soln.residual()) > 0.4).sum() == 1 227 assert abs(soln.x()[0] - 1) < 0.07 228 assert abs(soln.x()[1] + 1) < 0.5 229 assert numpy.absolute(numpy.dot(basis, soln.x()) 230 - soln.fit()).sum() < 1e-5

231 232 233 if __name__ == '__main__': 234 test_w1() 235 test_w1b() 236 test_r1() 237 test_r1hat() 238

Source Code for Module gmisclib.gpk_rlsq