classifiers.q_classiboost

Source Code for Module classifiers.q_classiboost

1 #!/usr/bin/env python 2 3 """Reads the *.fiat file produced by q_classifier.py that 4 contains the decisions of the individual classifiers in the forest. 5 Returns the majority decision.""" 6 7 import math 8 from gmisclib import fiatio 9 from gmisclib import nice_hash 10 from gmisclib import avio 11 from gmisclib import dictops 12 13 DEFAULTS = {} 14

15 -def index(individuals):

16 """Takes a list of decisions on an individual utterance 17 from an individual classifier (as from *.fiat from 18 q_classifier.py) and indexes them by the utterance. 19 """ 20 uid_index = {} 21 for decision in individuals: 22 uid = decision['uid'] 23 dictops.add_dol(uid_index, uid, decision) 24 return uid_index

25 26 27 _h = nice_hash.nice_hash( lambda x: x ) 28

29 -def vote(individuals):

30 # How many in total? 31 totalsc = float(len(individuals)) 32 33 # Collect data in groups of utterance id. 34 idx = index(individuals) 35 36 37 # First, we see what the set of classes is: 38 domain = {} 39 for (uid, decisions) in idx.iteritems(): 40 for decision in decisions: 41 domain[decision['compclass']] = 0 42 domain[decision['trueclass']] = 0 43 # Domain.keys() is now a list of all classes that appear 44 # in the input file. 45 46 47 o = [] 48 for (uid, decisions) in idx.iteritems(): 49 # Scores will contain the number of times that the 50 # input file showed a given class to be the computed 51 # result of the first-level classifier. 52 scores = domain.copy() 53 trueclass = None 54 for decision in decisions: 55 compclass = decision['compclass'] 56 scores[compclass] += 1 57 if trueclass is None: 58 trueclass = decision['trueclass'] 59 else: 60 assert trueclass == decision['trueclass'] 61 assert trueclass is not None 62 63 # Now, we compute the class that a majority of the 64 # first-level classifiers vote for: 65 total = 0 66 bestscore = 0 67 compclass = None 68 for (k, sc) in scores.items(): 69 total += sc 70 if sc > bestscore: 71 bestscore = sc 72 majorityclass = k 73 assert majorityclass is not None 74 correctvote = scores[trueclass] 75 76 tmp = DEFAULTS.copy() 77 for (k, sc) in scores.items(): 78 tmp['V_%s' % k] = float(sc)/float(total) 79 tmp['compclass'] = majorityclass 80 tmp['trueclass'] = trueclass 81 tmp['cclassid'] = _h.add(majorityclass) 82 tmp['tclassid'] = _h.add(trueclass) 83 tmp['vote'] = bestscore 84 tmp['totalvote'] = total 85 tmp['uid'] = uid 86 tmp['correctvote'] = correctvote 87 o.append( tmp ) 88 return o

89 90 91 if __name__ == '__main__': 92 import sys 93 arglist = sys.argv[1:] 94 outputfile = None 95 DEFAULTS = {} 96 while arglist and arglist[0].startswith('-'): 97 arg = arglist.pop(0) 98 if arg == '--': 99 break 100 elif arg == 'default': 101 DEFAULTS[arglist[0]] = arglist[1] 102 arglist = arglist[2:] 103 elif arg == '-o': 104 outputfile = arglist.pop(0) 105 else: 106 die.die('Unrecognized argument: %s' % arg) 107 fname = arglist.pop(0) 108 h, d, comments = fiatio.read(open(fname, 'r')) 109 correct = 0 110 total = 0 111 indicorrect = 0 112 inditotal = 0 113 votes = vote(d) 114 chance = {} 115 for decision in votes: 116 if decision['compclass'] == decision['trueclass']: 117 correct += 1 118 total += 1 119 indicorrect += decision['correctvote'] 120 inditotal += decision['totalvote'] 121 if decision['trueclass'] not in chance: 122 chance[decision['trueclass']] = 1 123 else: 124 chance[decision['trueclass']] += 1 125 126 Pcorrect = float(correct)/float(total) 127 Picorrect = float(indicorrect)/float(inditotal) 128 Pchance = 0.0 129 for c in chance.values(): 130 Pchance += (float(c)/float(total))**2 131 K = (Pcorrect-Pchance)/(1.0-Pchance) 132 Ki = (Picorrect-Pchance)/(1.0-Pchance) 133 out = {'K': K, 'Kindividual': Ki, 'Pcorrect': Pcorrect, 134 'Individual': Picorrect, 'chance': Pchance, 135 'total': total} 136 137 print avio.concoct(out) 138 if outputfile is not None: 139 fd = open(outputfile, 'w') 140 fiatio.write(fd, votes, hdr=h, 141 comments=comments + ['Processed by q_classiboost']) 142