Package classifiers :: Module read_classified
[frames] | no frames]

Source Code for Module classifiers.read_classified

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """This module reads the outputs of 
  5  qd_classifier.py, specificially classifed.fiat and classes.chunk. 
  6  """ 
  7   
  8  from gmisclib import fiatio 
  9  from gmisclib import chunkio 
 10  from gmisclib import die 
 11  from g_classifiers import q_classifier_r as Q 
 12  # from g_classifiers import l_classifier_guts as LC 
 13  # from g_classifiers import qd_classifier_guts as QC 
 14   
 15   
 16   
17 -def read_classified(f):
18 """Read classified.fiat, as produced by l_classifier or qd_classifier 19 This unpacks the two columns that are packed into ASCII representations 20 and turns them back into python objects. 21 @param f: file or filename 22 @type f: file or str 23 @rtype (dict, list(dict), list(str)) 24 @return: Much like L{fiatio.read<fiatio>}, it returns a tuple of header 25 information, a list of dictionaries (each dictionary 26 corresponding to a line) and a list of comment strings. 27 """ 28 if isinstance(f, str): 29 f = open(f, 'r') 30 header, data, comments = fiatio.read(f) 31 32 for d in data: 33 if 'P' in d: 34 d['P'] = chunkio.stringchunk(d['P']).read_dict(float) 35 if 'V' in d: 36 d['V'] = chunkio.stringchunk(d['V']).read_NumArray() 37 return (header, data, comments)
38 39 40
41 -def model_fromchunk(chunk):
42 """A factory function to read in an arbitrary classifier model. 43 @type chunk: L{chunkio.chunk} 44 """ 45 tmp = chunk.groupstart() 46 if tmp == 'quadratic_class_model': 47 rv = Q.qmodel.fromchunk(chunk) 48 elif tmp == 'linear_class_description': 49 rv = Q.lmodel.fromchunk(chunk) 50 else: 51 raise chunkio.BadFileFormat, 'Unknown type of classifier model: %s' % tmp 52 chunk.groupend() 53 return rv
54 55 56
57 -def read_classifier(chunk):
58 """Read a classifier in from a L{gmisclib.chunkio.chunk}. 59 This can yield either a quadratic or a linear classifier, depending what's 60 available in the data file. (This is not normally called directly 61 by the user.) 62 63 @type chunk: chunkio.chunk 64 @param chunk: loosely, a data file. More precisely, a source of tokens. 65 @rtype: L{l_classifier} or L{qd_classifier} 66 @return: a single classifier, containing models for several classes. 67 """ 68 typename = chunk.groupstart() 69 models = chunk.read_dict_of(model_fromchunk) 70 info = chunk.read_dict(str) 71 chunk.groupend() 72 return Q.classifier(typename, models, info=info, 73 trainingset_name=info.get('trainingset', None), 74 uid=info.get('Cuid', None) 75 )
76 77
78 -def read_classes(f):
79 """Read classes.chunk, as produced by l_classifier or qd_classifier 80 It converts the header information to ints or floats as appropriate. 81 It expects to read in a forest of equivalent classifiers, and it 82 returns a list of them. This is the normal API for reading 83 classes.chunk. 84 85 @param f: a filename or a file to read 86 @type f: L{str} or L{file} 87 @rtype: tuple(dict(str:str), list(some_kind_of_classifier)) 88 """ 89 if isinstance(f, str): 90 f = open(f, 'r') 91 dc = chunkio.datachunk(f) 92 hdr = read_classes_header(dc) 93 classilist = dc.read_array_of(read_classifier) 94 # if hdr['classifier_type'] == 'linear_discriminant_classifier': 95 # classilist = dc.read_array_of(LC.classifier_reader) 96 # else: 97 # classilist = dc.read_array_of(QC.classifier_reader) 98 return (hdr, classilist)
99 100
101 -def read_classes_header(dc):
102 """Read in the header info. This is part of the normal API. 103 @param dc: a datachunk containing the classes.chunk file 104 produced by a classifier run. 105 @type dc: L{chunkio.datachunk} 106 @rtype: dict(str: str, int, or float) 107 @return: a dictionary containing the header information. 108 See the docstring for the C{l_classifier} script for typical contents. 109 Numbers are converted to L{float} or L{int} as appropriate. 110 """ 111 hdr = dc.read_dict(str) 112 for k in ['nok', 'total', 'N_per_dim']: 113 try: 114 hdr[k] = int(hdr[k]) 115 except KeyError: 116 pass 117 for k in ['Pcorrect', 'K', 'KSigma', 'PSigma', 'Chance', 118 'Perfection', 'PerfectionSigma', 'ChSigma', 119 'Ftest', 'coverage']: 120 try: 121 hdr[k] = float(hdr[k]) 122 except KeyError: 123 pass 124 return hdr
125 126 127 if __name__ == '__main__': 128 import sys 129 print read_classes(sys.stdin) 130