1
2
3 """Reads the *.fiat file produced by q_classifier.py that
4 contains the decisions of the individual classifiers in the forest.
5 Returns the majority decision."""
6
7 import math
8 from gmisclib import fiatio
9 from gmisclib import nice_hash
10 from gmisclib import avio
11 from gmisclib import dictops
12
13 DEFAULTS = {}
14
16 """Takes a list of decisions on an individual utterance
17 from an individual classifier (as from *.fiat from
18 q_classifier.py) and indexes them by the utterance.
19 """
20 uid_index = {}
21 for decision in individuals:
22 uid = decision['uid']
23 dictops.add_dol(uid_index, uid, decision)
24 return uid_index
25
26
27 _h = nice_hash.nice_hash( lambda x: x )
28
29 -def vote(individuals):
30
31 totalsc = float(len(individuals))
32
33
34 idx = index(individuals)
35
36
37
38 domain = {}
39 for (uid, decisions) in idx.iteritems():
40 for decision in decisions:
41 domain[decision['compclass']] = 0
42 domain[decision['trueclass']] = 0
43
44
45
46
47 o = []
48 for (uid, decisions) in idx.iteritems():
49
50
51
52 scores = domain.copy()
53 trueclass = None
54 for decision in decisions:
55 compclass = decision['compclass']
56 scores[compclass] += 1
57 if trueclass is None:
58 trueclass = decision['trueclass']
59 else:
60 assert trueclass == decision['trueclass']
61 assert trueclass is not None
62
63
64
65 total = 0
66 bestscore = 0
67 compclass = None
68 for (k, sc) in scores.items():
69 total += sc
70 if sc > bestscore:
71 bestscore = sc
72 majorityclass = k
73 assert majorityclass is not None
74 correctvote = scores[trueclass]
75
76 tmp = DEFAULTS.copy()
77 for (k, sc) in scores.items():
78 tmp['V_%s' % k] = float(sc)/float(total)
79 tmp['compclass'] = majorityclass
80 tmp['trueclass'] = trueclass
81 tmp['cclassid'] = _h.add(majorityclass)
82 tmp['tclassid'] = _h.add(trueclass)
83 tmp['vote'] = bestscore
84 tmp['totalvote'] = total
85 tmp['uid'] = uid
86 tmp['correctvote'] = correctvote
87 o.append( tmp )
88 return o
89
90
91 if __name__ == '__main__':
92 import sys
93 arglist = sys.argv[1:]
94 outputfile = None
95 DEFAULTS = {}
96 while arglist and arglist[0].startswith('-'):
97 arg = arglist.pop(0)
98 if arg == '--':
99 break
100 elif arg == 'default':
101 DEFAULTS[arglist[0]] = arglist[1]
102 arglist = arglist[2:]
103 elif arg == '-o':
104 outputfile = arglist.pop(0)
105 else:
106 die.die('Unrecognized argument: %s' % arg)
107 fname = arglist.pop(0)
108 h, d, comments = fiatio.read(open(fname, 'r'))
109 correct = 0
110 total = 0
111 indicorrect = 0
112 inditotal = 0
113 votes = vote(d)
114 chance = {}
115 for decision in votes:
116 if decision['compclass'] == decision['trueclass']:
117 correct += 1
118 total += 1
119 indicorrect += decision['correctvote']
120 inditotal += decision['totalvote']
121 if decision['trueclass'] not in chance:
122 chance[decision['trueclass']] = 1
123 else:
124 chance[decision['trueclass']] += 1
125
126 Pcorrect = float(correct)/float(total)
127 Picorrect = float(indicorrect)/float(inditotal)
128 Pchance = 0.0
129 for c in chance.values():
130 Pchance += (float(c)/float(total))**2
131 K = (Pcorrect-Pchance)/(1.0-Pchance)
132 Ki = (Picorrect-Pchance)/(1.0-Pchance)
133 out = {'K': K, 'Kindividual': Ki, 'Pcorrect': Pcorrect,
134 'Individual': Picorrect, 'chance': Pchance,
135 'total': total}
136
137 print avio.concoct(out)
138 if outputfile is not None:
139 fd = open(outputfile, 'w')
140 fiatio.write(fd, votes, hdr=h,
141 comments=comments + ['Processed by q_classiboost'])
142