# Source Code for Module classifiers.q_classiboost

```  1  #!/usr/bin/env python
2
3  """Reads the *.fiat file produced by q_classifier.py that
4  contains the decisions of the individual classifiers in the forest.
5  Returns the majority decision."""
6
7  import math
8  from gmisclib import fiatio
9  from gmisclib import nice_hash
10  from gmisclib import avio
11  from gmisclib import dictops
12
13  DEFAULTS = {}
14
15 -def index(individuals):
16          """Takes a list of decisions on an individual utterance
17          from an individual classifier (as from *.fiat from
18          q_classifier.py) and indexes them by the utterance.
19          """
20          uid_index = {}
21          for decision in individuals:
22                  uid = decision['uid']
24          return uid_index
25
26
27  _h = nice_hash.nice_hash( lambda x: x )
28
29 -def vote(individuals):
30          # How many in total?
31          totalsc = float(len(individuals))
32
33          # Collect data in groups of utterance id.
34          idx = index(individuals)
35
36
37          # First, we see what the set of classes is:
38          domain = {}
39          for (uid, decisions) in idx.iteritems():
40                  for decision in decisions:
41                          domain[decision['compclass']] = 0
42                          domain[decision['trueclass']] = 0
43          # Domain.keys() is now a list of all classes that appear
44          # in the input file.
45
46
47          o = []
48          for (uid, decisions) in idx.iteritems():
49                  # Scores will contain the number of times that the
50                  # input file showed a given class to be the computed
51                  # result of the first-level classifier.
52                  scores = domain.copy()
53                  trueclass = None
54                  for decision in decisions:
55                          compclass = decision['compclass']
56                          scores[compclass] += 1
57                          if trueclass is None:
58                                  trueclass = decision['trueclass']
59                          else:
60                                  assert trueclass == decision['trueclass']
61                  assert trueclass is not None
62
63                  # Now, we compute the class that a majority of the
64                  # first-level classifiers vote for:
65                  total = 0
66                  bestscore = 0
67                  compclass = None
68                  for (k, sc) in scores.items():
69                          total += sc
70                          if sc > bestscore:
71                                  bestscore = sc
72                                  majorityclass = k
73                  assert majorityclass is not None
74                  correctvote = scores[trueclass]
75
76                  tmp = DEFAULTS.copy()
77                  for (k, sc) in scores.items():
78                          tmp['V_%s' % k] = float(sc)/float(total)
79                  tmp['compclass'] = majorityclass
80                  tmp['trueclass'] = trueclass
83                  tmp['vote'] = bestscore
84                  tmp['totalvote'] = total
85                  tmp['uid'] = uid
86                  tmp['correctvote'] = correctvote
87                  o.append( tmp )
88          return o
89
90
91  if __name__ == '__main__':
92          import sys
93          arglist = sys.argv[1:]
94          outputfile = None
95          DEFAULTS = {}
96          while arglist and arglist[0].startswith('-'):
97                  arg = arglist.pop(0)
98                  if arg == '--':
99                          break
100                  elif arg == 'default':
101                          DEFAULTS[arglist[0]] = arglist[1]
102                          arglist = arglist[2:]
103                  elif arg == '-o':
104                          outputfile = arglist.pop(0)
105                  else:
106                          die.die('Unrecognized argument: %s' % arg)
107          fname = arglist.pop(0)
109          correct = 0
110          total = 0
111          indicorrect = 0
112          inditotal = 0
114          chance = {}
116                  if decision['compclass'] == decision['trueclass']:
117                          correct += 1
118                  total += 1
119                  indicorrect += decision['correctvote']
120                  inditotal += decision['totalvote']
121                  if decision['trueclass'] not in chance:
122                          chance[decision['trueclass']] = 1
123                  else:
124                          chance[decision['trueclass']] += 1
125
126          Pcorrect = float(correct)/float(total)
127          Picorrect = float(indicorrect)/float(inditotal)
128          Pchance = 0.0
129          for c in chance.values():
130                  Pchance += (float(c)/float(total))**2
131          K = (Pcorrect-Pchance)/(1.0-Pchance)
132          Ki = (Picorrect-Pchance)/(1.0-Pchance)
133          out = {'K': K, 'Kindividual': Ki, 'Pcorrect': Pcorrect,
134                  'Individual': Picorrect, 'chance': Pchance,
135                  'total': total}
136
137          print avio.concoct(out)
138          if outputfile is not None:
139                  fd = open(outputfile, 'w')