1
2
3
4 """This module reads the outputs of
5 qd_classifier.py, specificially classifed.fiat and classes.chunk.
6 """
7
8 from gmisclib import fiatio
9 from gmisclib import chunkio
10 from gmisclib import die
11 from g_classifiers import q_classifier_r as Q
12
13
14
15
16
18 """Read classified.fiat, as produced by l_classifier or qd_classifier
19 This unpacks the two columns that are packed into ASCII representations
20 and turns them back into python objects.
21 @param f: file or filename
22 @type f: file or str
23 @rtype (dict, list(dict), list(str))
24 @return: Much like L{fiatio.read<fiatio>}, it returns a tuple of header
25 information, a list of dictionaries (each dictionary
26 corresponding to a line) and a list of comment strings.
27 """
28 if isinstance(f, str):
29 f = open(f, 'r')
30 header, data, comments = fiatio.read(f)
31
32 for d in data:
33 if 'P' in d:
34 d['P'] = chunkio.stringchunk(d['P']).read_dict(float)
35 if 'V' in d:
36 d['V'] = chunkio.stringchunk(d['V']).read_NumArray()
37 return (header, data, comments)
38
39
40
42 """A factory function to read in an arbitrary classifier model.
43 @type chunk: L{chunkio.chunk}
44 """
45 tmp = chunk.groupstart()
46 if tmp == 'quadratic_class_model':
47 rv = Q.qmodel.fromchunk(chunk)
48 elif tmp == 'linear_class_description':
49 rv = Q.lmodel.fromchunk(chunk)
50 else:
51 raise chunkio.BadFileFormat, 'Unknown type of classifier model: %s' % tmp
52 chunk.groupend()
53 return rv
54
55
56
58 """Read a classifier in from a L{gmisclib.chunkio.chunk}.
59 This can yield either a quadratic or a linear classifier, depending what's
60 available in the data file. (This is not normally called directly
61 by the user.)
62
63 @type chunk: chunkio.chunk
64 @param chunk: loosely, a data file. More precisely, a source of tokens.
65 @rtype: L{l_classifier} or L{qd_classifier}
66 @return: a single classifier, containing models for several classes.
67 """
68 typename = chunk.groupstart()
69 models = chunk.read_dict_of(model_fromchunk)
70 info = chunk.read_dict(str)
71 chunk.groupend()
72 return Q.classifier(typename, models, info=info,
73 trainingset_name=info.get('trainingset', None),
74 uid=info.get('Cuid', None)
75 )
76
77
79 """Read classes.chunk, as produced by l_classifier or qd_classifier
80 It converts the header information to ints or floats as appropriate.
81 It expects to read in a forest of equivalent classifiers, and it
82 returns a list of them. This is the normal API for reading
83 classes.chunk.
84
85 @param f: a filename or a file to read
86 @type f: L{str} or L{file}
87 @rtype: tuple(dict(str:str), list(some_kind_of_classifier))
88 """
89 if isinstance(f, str):
90 f = open(f, 'r')
91 dc = chunkio.datachunk(f)
92 hdr = read_classes_header(dc)
93 classilist = dc.read_array_of(read_classifier)
94
95
96
97
98 return (hdr, classilist)
99
100
102 """Read in the header info. This is part of the normal API.
103 @param dc: a datachunk containing the classes.chunk file
104 produced by a classifier run.
105 @type dc: L{chunkio.datachunk}
106 @rtype: dict(str: str, int, or float)
107 @return: a dictionary containing the header information.
108 See the docstring for the C{l_classifier} script for typical contents.
109 Numbers are converted to L{float} or L{int} as appropriate.
110 """
111 hdr = dc.read_dict(str)
112 for k in ['nok', 'total', 'N_per_dim']:
113 try:
114 hdr[k] = int(hdr[k])
115 except KeyError:
116 pass
117 for k in ['Pcorrect', 'K', 'KSigma', 'PSigma', 'Chance',
118 'Perfection', 'PerfectionSigma', 'ChSigma',
119 'Ftest', 'coverage']:
120 try:
121 hdr[k] = float(hdr[k])
122 except KeyError:
123 pass
124 return hdr
125
126
127 if __name__ == '__main__':
128 import sys
129 print read_classes(sys.stdin)
130