1
2
3 """This computes centers for classes.
4 You give it a classified.fiat file on the argument list,
5 as obtained from *_classifier.py, and it
6 produces a list of files, class names, and the feature vector
7 at the center of each class.
8
9 To define the center, it takes the component-by-component
10 median of the feature vector, including only
11 data that are correctly classified.
12 """
13
14
15 import math
16 import gpkavg
17 from gmisclib import gpkmisc
18 from gmisclib import Num
19 from gmisclib import die
20 from gmisclib import dictops
21 import q_classifier_r
22
23 CLIP = 0.15
24
25
27 h, data, c = q_classifier_r.read_fiat(open(f, 'r'))
28 lp = []
29 nic = {}
30 for datum in data:
31 if datum['trueclass'] == datum['compclass']:
32 lp.append( datum['V'].shape[0] )
33 tc = datum['trueclass']
34 dictops.add_doc(nic, tc, 1)
35 n = int(math.ceil(gpkmisc.median(lp)))
36
37 index = {}
38 psa = {}
39 for (cl, nicl) in nic.items():
40 psa[cl] = Num.zeros( (n, nicl), Num.Float)
41 index[cl] = 0
42
43 for datum in data:
44 if datum['trueclass'] == datum['compclass']:
45 v = datum['V']
46 ml = min( v.shape[0], n )
47 tc = datum['trueclass']
48 psa[tc][:n,index[tc]] = v[:n]
49 index[tc] += 1
50 o = {}
51 for cl in nic.keys():
52 acoef = Num.zeros((n,), Num.Float)
53 vcoef = Num.zeros((n,), Num.Float)
54 for j in range(n):
55 thisAvg, thisVar = gpkavg.avg(psa[cl][j,:], None, CLIP)
56 acoef[j] = thisAvg
57 vcoef[j] = thisVar
58 o[cl] = (acoef, vcoef)
59 return o
60
61
64
65
66 if __name__ == '__main__':
67 import sys
68 arglist = sys.argv[1:]
69 Var = False
70 while arglist and arglist[0].startswith('-'):
71 arg = arglist.pop(0)
72 if arg == '--':
73 break
74 elif arg == '-var':
75 Var = True
76 elif arg == '-clip':
77 CLIP = float(arglist.pop(0))
78 else:
79 die.die('Unrecognized flag: %s' % arg)
80
81 for f in arglist:
82 typ = process(f)
83 for (cl, (acoef, vcoef)) in typ.items():
84 if Var:
85 print f, cl, format(vcoef)
86 else:
87 print f, cl, format(acoef)
88