1
2
3
4 """A classifier that assumes that log(P) is a quadratic form.
5 """
6
7
8 import math
9 from gmisclib import Num
10 from gmisclib import die
11 from gmisclib import fiatio
12 from gmisclib import g_closure
13 from gmisclib import gpkmisc
14 from gmisclib import chunkio
15 from g_classifiers import q_classifier_r as Q
16
17 COVERAGE = 3
18 FTEST = 0.25
19 N_PER_DIM = 10
20
21
23 - def __init__(self, data=None, evaluator=None, models=None, ftrim=None):
24 if data is not None:
25 classes = Q.list_classes(data)
26 fvdim = Q.get_dim(data)
27
28 elif models is not None:
29 classes = models.keys()
30 fvdim = models.values()[0]._mu.shape[0]
31
32 else:
33 raise ValueError, "Needs either a set of models or data to build a l_classifier_desc."
34 Q.classifier_desc.__init__(self, classes, fvdim, evaluator=evaluator, ftrim=ftrim)
35
37 return self.ndim + 1 + (self.ndim*(self.ndim+1))/2
38
40 """The total number of parameters required to define all classes."""
41 assert self.nc > 1, "Only %d classes" % self.nc
42 assert self.np() > 0
43 return (self.nc-1) * self.np()
44
46 """Unpack a square, symmetric matrix."""
47 nd = self.ndim
48 assert len(prmvec) >= (nd*(nd+1))/2
49 invsigma = Num.zeros((self.ndim, self.ndim), Num.Float)
50 for k in range(nd):
51 tmp = prmvec[:nd-k]
52 prmvec = prmvec[nd-k:]
53 invsigma[k, k:] = tmp
54 invsigma[k:, k] = tmp
55 return (prmvec, invsigma)
56
57
58 - def unpack(self, prmvec, trainingset_name=None, uid=None):
59 """Produce a classifier from a parameter vector."""
60 m = self.np()
61 nd = self.ndim
62 assert len(prmvec) == self.npt()
63
64
65 o = qd_classifier(self, trainingset_name=trainingset_name, uid=uid)
66 for (i, c) in enumerate(self.c):
67
68 if i >= self.nc-1:
69 break
70 pc = prmvec[i*m: (i+1)*m]
71 offset = pc[0]
72 pc = pc[1:]
73 mu = pc[:nd]
74 npc, invsigma = self._unpm(pc[nd:])
75 assert len(npc) == 0
76 o.add_model( c, Q.qmodel( mu, invsigma, offset))
77 o.add_model( c, Q.qzmodel(self.ndim))
78
79 return o
80
81
83 """Starting position for Markov Chain Monte Carlo."""
84 m = self.np()
85 data_in_class = {}
86 all_data_vecs = []
87 for c in self.c:
88 data_in_class[c] = []
89 for datum in data:
90 data_in_class[datum.classid].append( datum.value )
91 all_data_vecs.append( datum.value )
92 amu = gpkmisc.median_across( all_data_vecs )
93 mu = {}
94 for c in self.c:
95 assert len(data_in_class[c]) > 0
96 mu[c] = gpkmisc.median_across(data_in_class[c])
97
98
99 if len(data) < 2:
100 cov = Num.identity(amu.shape[0], Num.Float)
101 else:
102 cov = Num.zeros((amu.shape[0], amu.shape[0]), Num.Float)
103 for datum in data:
104
105 tmp = datum.value - amu
106
107 cov += Num.outerproduct(tmp, tmp)
108 cov /= len(data)
109
110 tr = Num.sum(Num.diagonal(cov))
111 trf = 0.01*tr/cov.shape[0]
112
113 for i in range(cov.shape[0]):
114 cov[i,i] += trf
115
116 invsigma = Num.LA.inv(cov)
117
118
119 npt = self.npt()
120 prmvec = Num.zeros((npt,), Num.Float)
121 V = Num.zeros((npt, npt), Num.Float)
122 j = 0
123 assert len(self.c) == self.nc
124 for (i, c) in enumerate(self.c):
125 if i >= self.nc-1:
126 break
127 pc = prmvec[i*m: (i+1)*m]
128 Vc = V[i*m:(i+1)*m, i*m:(i+1)*m]
129 pc[0] = 0.0
130 Vc[0,0] = 1.0
131 j = self.ndim + 1
132 pc[1:j] = mu[c]
133 Vc[1:j, 1:j] = cov
134 for k in range(self.ndim):
135
136
137 w = self.ndim-k
138
139 pc[j:j+w] = invsigma[k, k:k+w]
140
141 j += w
142 for jj in range(self.ndim+1, j):
143 Vc[jj, jj] = pc[jj]**2
144
145 assert j == self.np()
146
147 check = self.unpack(prmvec)
148
149 for (i, c) in enumerate(self.c):
150 if i < self.nc-1:
151
152
153 assert check.class_models[c].bias == 0.0
154 assert Num.sum(Num.absolute(check.class_models[c].mu-mu[c])) < 0.001
155 assert Num.sum(Num.ravel(Num.absolute(check.class_models[c].invsigma-invsigma))) < 0.001
156 else:
157 assert check.class_models[c].bias == 0.0
158 assert Num.sum(Num.absolute(check.class_models[c].mu)) < 0.001
159 assert Num.sum(Num.ravel(Num.absolute(check.class_models[c].invsigma))) < 0.001
160
161
162 return (prmvec, V)
163
165 return 'quadratic_discriminant_classifier'
166
167
168
169
170
171
173 - def __init__(self, cdesc=None, models=None, trainingset_name=None, uid=None):
181
182
183
184
185
186
187
191 d = Q.read_data(fd)
192 print '# classes: %s' % (' '.join(Q.list_classes(d)))
193 modelchoice = g_closure.Closure(qd_classifier_desc, g_closure.NotYet,
194 evaluator=Q.evaluator_from_name(evnm),
195 ftrim=ftrim)
196 classout = fiatio.writer(open('classified.fiat', 'w'))
197 classout.header('evaluator', evnm)
198 classout.header('ftrim', ftrim)
199 summary, out, wrong = Q.compute_self_class(d, coverage=coverage, ftest=ftest,
200 n_per_dim = n_per_dim,
201 modelchoice = modelchoice,
202 builder=Q.forest_build,
203 classout=classout, verbose=verbose,
204 modify_class=modify_class)
205 Q.default_writer(summary, out, classout, wrong)
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
240 print 'TEST_BUILD3:'
241 data = [
242 Q.datum_tr([1.0], 'A'), Q.datum_tr([-0.01], 'B'), Q.datum_tr([2.01], 'C'),
243 Q.datum_tr([1.02], 'A'), Q.datum_tr([-0.02], 'B'), Q.datum_tr([2.02], 'C'),
244 Q.datum_tr([1.01], 'A'), Q.datum_tr([0.0], 'B'), Q.datum_tr([2.0], 'C'),
245 Q.datum_tr([0.98], 'A'), Q.datum_tr([0.02], 'B'), Q.datum_tr([1.99], 'C'),
246 Q.datum_tr([0.99], 'A'), Q.datum_tr([0.01], 'B'), Q.datum_tr([1.98], 'C'),
247 ]
248 assert issubclass(classifier_choice, Q.classifier_desc)
249 modelchoice = classifier_choice(data, evaluator=None)
250 assert isinstance(modelchoice, Q.classifier_desc)
251 ok = False
252 for cdef in Q.forest_build(data, 40, modelchoice=modelchoice):
253 e = Q.evaluate_match(cdef, data)
254 if e < 1:
255 ok = 1
256 break
257 if not ok:
258 die.die( "can't find a perfect classifier in this trivial situation!")
259
260
261
263 import random
264 print 'TEST_BUILD1q:'
265 data = []
266 for i in range(9):
267 data.append( Q.datum_tr([random.random()], 'A'))
268 data.append( Q.datum_tr([random.random()+2.0], 'B'))
269 data.append( Q.datum_tr([random.random()+4.0], 'A'))
270 for datum in data:
271 print 'd=', datum
272 assert issubclass(classifier_choice, Q.classifier_desc)
273 modelchoice = classifier_choice(data, evaluator=None, ftrim=None)
274 assert isinstance(modelchoice, Q.classifier_desc)
275 ok = False
276 for cdef in Q.forest_build(data, 40, modelchoice=modelchoice):
277 e = Q.evaluate_match(cdef, data)
278 if e < 1:
279 ok = 1
280 break
281 if not ok:
282 die.die( "can't find a perfect classifier in this trivial situation!")
283
284
285
287 import random
288 print 'TEST_BUILD1tq:'
289 data = [ Q.datum_tr([-10.0], 'B') ]
290 for i in range(30):
291 data.append( Q.datum_tr([random.random()], 'A'))
292 data.append( Q.datum_tr([random.random()+2.0], 'B'))
293 data.append( Q.datum_tr([random.random()+4.0], 'A'))
294 assert issubclass(classifier_choice, Q.classifier_desc)
295 modelchoice = classifier_choice(data, evaluator=None, ftrim=(0.1, 6))
296 assert isinstance(modelchoice, Q.classifier_desc)
297 ok = False
298 for cdef in Q.forest_build(data, 40, modelchoice=modelchoice):
299 e = Q.evaluate_match(cdef, data)
300 if e < 2:
301 ok = 1
302 break
303 else:
304 print 'eval=', e
305 print 'cdef=', cdef
306 if not ok:
307 die.die( "can't find a perfect classifier in this trivial situation!")
308
309
310
311
330
331
332 if __name__ == '__main__':
333 test()
334