1 import numpy
2
3
4 from gmisclib import gpk_lsq
5
6
10
11
12 _Float = numpy.dtype('float')
13
15 """This solves the set of linear equations a*x = y, with weights w.
16 Normally, a.shape==(m,n) and y.shape==(m,q),
17 and the returned x.shape==(n,q).
18 where m is the number of constraints provided by the data,
19 n is the number of parameters to use in a fit
20 (equivalently, the number of basis functions),
21 and q is the number of separate sets of equations
22 that you are fitting.
23 Then, x has shape (n,q) and the_fit has shape (m,q).
24 Interpreting this as a linear regression, there are n
25 parameters in the model, and m measurements.
26 Q is the number of times you apply the model to a
27 different data set; each on yields a different solution.
28
29 This uses gpk_lsq.linear_least_squares as the underlying algorithm.
30 @note: Y may be a 1-D matrix (a vector), in which case
31 the fit is a vector. This is the normal
32 case where you are fitting one equation.
33 If y is a 2-D matrix,
34 each column (second index) in y is a separate fit, and
35 each column in the solution is a separate result.
36 """
37 - def __init__(self, a, y, w, minsv=None, minsvr=None, copy=True):
38 a = numpy.asarray(a, _Float)
39 if a.ndim != 2:
40 raise ValueError, "a needs to be 2-dimensional: shape=%s" % str(a.shape)
41
42 self.w = numpy.array(w, _Float, copy=copy)
43 if self.w.ndim != 1 and self.w.shape[0] != a.shape[0]:
44 raise ValueError, "w needs to be 1-dimensional and match a: shape=%s vs %s" % (str(self.w.shape), str(a.shape))
45
46 aw = self.w[:,numpy.newaxis] * a
47 gpk_lsq.linear_least_squares.__init__(self, aw, None, minsv=minsv,
48 minsvr=minsvr, copy=False)
49 self.set_y(y, copy=copy)
50
51
52 - def fit(self, copy=False):
58
59 - def y(self, copy=True):
62
63 - def set_y(self, y, copy=True):
67
68
72
73
74
75
76
78 HTMIN = 3.0
79 STMIN = 1.5
80
81 - def __init__(self, a, y, sigma_y=None, minsv=None, minsvr=None, copy=True):
82 """This does bounded influence regression, with
83 a robust M-estimator in the y-direction.
84 """
85
86 y = numpy.array(y, _Float, copy=copy)
87 assert y.ndim == 1
88 ndat = y.shape[0]
89 if sigma_y is None:
90 sigma_y = 1.0
91 if isinstance(sigma_y, (float, int)):
92 self.sigma_y = sigma_y * numpy.ones(y.shape, _Float)
93 else:
94 self.sigma_y = numpy.asarray(sigma_y, _Float)
95 assert self.sigma_y.ndim == 1
96 assert self.sigma_y.shape[0] == ndat
97
98 a = numpy.asarray(a, _Float)
99 assert a.ndim == 2
100 assert a.shape[0] == ndat
101
102 if a.shape[1] >= ndat:
103 raise NotEnoughData, "Not enough data to bound influence function"
104
105 w = 0.0
106 w_hat = numpy.ones(y.shape, _Float)
107 w_resid = numpy.ones(y.shape, _Float)
108 hthresh = 6*self.HTMIN
109 sthresh = 6*self.STMIN
110 endgame = False
111 while True:
112 last_w = w
113 w = w_hat * w_resid
114 if numpy.alltrue( numpy.absolute(w-last_w) < 0.01 ):
115 if endgame:
116 break
117 else:
118 hthresh = self.HTMIN
119 sthresh = self.STMIN
120 endgame = True
121
122
123
124
125
126 self.w = w
127 wsig = w/self.sigma_y
128
129 w_linear_least_squares.__init__(self, a, y, wsig, minsv=minsv, minsvr=minsvr)
130
131 ht = (1 - 2.0/y.shape[0])/hthresh
132
133 if numpy.sometrue(self.hat() >= 1.0):
134 raise NotEnoughData, "Cannot bound influence of point(s)."
135 hatfac = (1-self.hat())/ht
136
137 w_hat = numpy.minimum(1.0, w_hat*hatfac**0.5)
138
139 w_hat = w_hat/w_hat[numpy.argmax(w_hat)]
140
141
142 normerr = (y - self.fit())/self.sigma_y
143
144
145 self.typdev = numpy.mean(numpy.absolute(normerr))
146 new_w_resid = 1.0/numpy.hypot(1, normerr/(self.typdev * sthresh))
147 w_resid = 0.5*(w_resid + new_w_resid)
148 wrs = numpy.sqrt( numpy.square(w_resid).sum(axis=0)/ndat )
149 numpy.divide(w_resid, wrs, w_resid)
150
151
152 hthresh = self.HTMIN + 0.5*(hthresh-self.HTMIN)
153 sthresh = self.STMIN + 0.5*(sthresh-self.STMIN)
154
155
156
157
158
160 basis = numpy.transpose([[1, 2, 3, 4, 5, 6, 7, 8, 9, 100], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
161 y = numpy.array([0.0, 1, 2, 3, 4, 5, 6, 7, 18.01, 99])
162 w = numpy.array([1, 3.2, 1, 1, 5, 1, 1, 1, .0001, 1])
163 soln = w_linear_least_squares(basis, y, w)
164 print 'soln.y', soln.y()
165 assert (numpy.absolute(soln.y()-y)).sum() < 0.0001
166 print 'soln.x', soln.x()
167 print 'soln.fit', soln.fit()
168 assert 80 < numpy.absolute(soln.fit()).sum() < 200
169 assert soln.x().ndim == 1
170 assert soln.fit().ndim == 1
171 assert soln.rank() == 2
172 print 'residual=', soln.residual()
173 assert (numpy.absolute(soln.residual()) > 0.2).sum() == 1
174 assert (numpy.absolute(soln.residual()) > 10.2).sum() == 0
175 assert 0.5 < numpy.absolute(soln.residual()).sum() < 30
176 err = ((soln.fit()-y)**2).sum()
177 assert err>=0
178 assert (err > 0.02).sum() == 1
179 assert soln.sv().shape == (2,)
180 assert abs(soln.x()[0] - 1) < 0.01
181 assert abs(soln.x()[1] + 1) < 0.01
182 assert numpy.absolute(w-soln.w).sum() < 1e-6
183 assert numpy.absolute(numpy.dot(basis, soln.x())-soln.fit()).sum() < 1e-6
184
185
186
188 basis = numpy.transpose([[1, 2, 3, 4, 5, 6, 7, 8, 9, 100], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
189 y = numpy.array([0.01, 1, 2.01, 3, 3.99, 5, 6.01, 7, 88.01, 99])
190 w = numpy.array([0.2, 1, 0.2, 5, 1, 0.2, 1, 2, .001, 0.01])
191 soln = w_linear_least_squares(basis, y, w)
192 print 'soln.x', soln.x()
193 print 'soln.resid', soln.residual()
194 assert (numpy.absolute(soln.residual()) > 0.05).sum() == 1
195 assert abs(soln.x()[0] - 1) < 0.01
196 assert abs(soln.x()[1] + 1) < 0.01
197 print 'sf', soln.fit()
198 print 'mm', numpy.dot(basis, soln.x())
199 assert numpy.absolute(numpy.dot(basis, soln.x()) -soln.fit()).sum() < 1e-6
200
202 basis = numpy.transpose([[1, 2, 3, 4, 5, 6, 7, 8, 9, 100], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
203 y = numpy.array([0.01, 1, 1.99, 3, 4.01, 5, 5.99, 7, 88.01, 99])
204 soln = robust_linear_fit(basis, y, 1)
205 print 'soln.x', soln.x()
206 assert soln.x().ndim == 1
207 assert soln.fit().ndim == 1
208 assert soln.rank() == 2
209 err = ((soln.fit()-y)**2).sum()
210 assert err>=0
211 assert (err > 0.02).sum() == 1
212 assert soln.sv().shape == (2,)
213 assert abs(soln.x()[0] - 1) < 0.06
214 assert abs(soln.x()[1] + 1) < 0.06
215 assert numpy.absolute(numpy.dot(basis, soln.x())
216 -soln.fit()).sum() < 1e-5
217
219 basis = numpy.transpose([[1, 2, 3, 4, 5, 6, 7, 8, 9, 100],
220 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
221 y = numpy.array([0.01, 1, 1.99, 3, 4.01, 5, 5.99, 7, 8, 90])
222 soln = robust_linear_fit(basis, y, 1)
223 print 'soln.x', soln.x()
224 assert soln.rank() == 2
225 print 'resid=', soln.residual()
226 assert (numpy.absolute(soln.residual()) > 0.4).sum() == 1
227 assert abs(soln.x()[0] - 1) < 0.07
228 assert abs(soln.x()[1] + 1) < 0.5
229 assert numpy.absolute(numpy.dot(basis, soln.x())
230 - soln.fit()).sum() < 1e-5
231
232
233 if __name__ == '__main__':
234 test_w1()
235 test_w1b()
236 test_r1()
237 test_r1hat()
238