1
2
3 """This module helps you plot confusion matrices or similar
4 images where the axis labels do not have a natural order.
5 So, the probability of confusing two phonemes is a perfect example:
6 phonemes do not naturally fall onto a 1-dimensional sequence,
7 so one is free to put them in any order one likes.
8 Given that, one might as well put them into an order that
9 reveals something interesting about the probabilities.
10
11 All the functions ending in "2" work on rectangular arrays.
12 All the functions without "2" work only on square arrays, and
13 they assume that that both axes remain in the same order.
14 """
15
16 import numpy
17
18 import die
19 import blue_data_selector
20
21
22
24 """Swap rows and columns in matrix m.
25 Produces a copy of the matrix.
26 @param m: matrix
27 @param sw1: how to re-arrange first axis
28 @param sw2: how to re-arrange second axis
29 """
30 assert sw1.shape == (m.shape[0],)
31 assert sw2.shape == (m.shape[1],)
32 m1 = numpy.take(m, sw1, axis=0)
33 tmp = numpy.take(m1, sw2, axis=1)
34 assert tmp.shape == m.shape
35 return tmp
36
37
39 """Swap labels in list m, according to mapping swv.
40 Produces a new list.
41 """
42 assert swv.shape == (len(m),)
43 return [ m[swv[i]] for i in range(len(m)) ]
44
45
46
48 """A class that defines a figure-of-merit for a matrix.
49 It tries to put the largest entries on the diagonal.
50 Instances of this class are passed to L{symm_swap_toward_minimal_fom}.
51 """
53 """@param sign:
54 - If sign=1, the f-o-m returned by L{eval}
55 will be minimal when
56 the most positive matrix elements are on the diagonal.
57 - If sign=-1, you'll get minimal fom with the
58 the most positive elements off diagonal.
59 @type sign: int
60 @param n: dimension of matrix.
61 @type n: int
62 """
63 ar = numpy.arange(n)
64 self.weight = sign*numpy.absolute(ar[:,numpy.newaxis] - ar[numpy.newaxis,:])
65
66 - def eval(self, swv, m):
67 """Evaluate the figure of merit for a matrix m with the
68 specified swap vector.
69 @param m: matrix
70 @param swv: swap vector
71 @return: figure of merit
72 @rtype: int or float (according to the type of m).
73 """
74 m2 = _mswap(swv, swv, m)
75
76 tmp = numpy.sum(m2*self.weight)
77 return tmp
78
79
81 """This tries to make your matrix approximately diagonal for rectangular matrices.
82 """
87
88 - def eval(self, sw1, sw2, m):
89 m2 = _mswap(sw1, sw2, m)
90
91 return numpy.sum(m2*self.weight)
92
93
95 """This tries to make your matrix into a block form.
96 The idea is to mimimize the sum of entry-to-entry differences along a row
97 (same with columns).
98 """
101
102 - def eval(self, sw1, sw2, m):
103 m2 = _mswap(sw1, sw2, m)
104 o = 0.0
105 o += numpy.sum(numpy.absolute(m2[1:,:] - m2[:-1,:]))
106 o += numpy.sum(numpy.absolute(m2[:,1:] - m2[:,:-1]))
107 return o
108
109
111 """Used internally. Finds an ordering of the labels that minimizes
112 whatever C{fom} you supply. Both axes will have the same order.
113 """
114 n = m.shape[0]
115 assert m.shape == (n, n)
116 swv = numpy.arange(n)
117 of = fom.eval(swv, m)
118 sincelast = 0
119 bds = blue_data_selector.bluedata( [ (i,j) for i in range(n) for j in range(n) if i!=j ] )
120 if maxtries is None:
121 maxtries = 2*n*n
122 for (i,j) in bds:
123 oswv = numpy.array(swv, numpy.int, copy=True)
124 swv[i], swv[j] = (swv[j], swv[i])
125 nf = fom.eval(swv, m)
126 if nf < of:
127 of = nf
128 sincelast = 0
129 else:
130 swv = oswv
131 sincelast += 1
132 if sincelast > maxtries:
133 break
134 return swv
135
136
138 """Used internally. Finds an ordering of the labels that minimizes
139 whatever C{fom} you supply. The ordering of the two axes may be different.
140 """
141 n1, n2 = m.shape
142 assert m.shape == (n1, n2)
143 sw1 = numpy.arange(n1)
144 sw2 = numpy.arange(n2)
145 of = fom.eval(sw1, sw2, m)
146 sincelast = 0
147 bd1 = blue_data_selector.bluedata( [ (i,j) for i in range(n1) for j in range(n1) if i!=j ] )
148 bd2 = blue_data_selector.bluedata( [ (i,j) for i in range(n2) for j in range(n2) if i!=j ] )
149 if maxtries is None:
150 maxtries = 2*(n1*n1 + n2*n2)
151 else:
152 maxtries = 2*maxtries
153 while True:
154 i, j = bd1.pick(1)[0]
155 osw1 = numpy.array(sw1, numpy.int, copy=True)
156 sw1[i], sw1[j] = (sw1[j], sw1[i])
157 nf = fom.eval(sw1, sw2, m)
158 if nf < of:
159 die.info('fom=%g->%g; sincelast=%d' % (of, nf, sincelast))
160 of = nf
161 sincelast = 0
162 else:
163 sw1 = osw1
164 sincelast += 1
165
166 i, j = bd2.pick(1)[0]
167 osw2 = numpy.array(sw2, numpy.int, copy=True)
168 sw2[i], sw2[j] = (sw2[j], sw2[i])
169 nf = fom.eval(sw1, sw2, m)
170 if nf < of:
171 of = nf
172 die.info('fom=%g->%g; sincelast=%d' % (of, nf, sincelast))
173 sincelast = 0
174 else:
175 sw2 = osw2
176 sincelast += 1
177
178 i, j = bd1.pick(1)[0]
179 osw1 = numpy.array(sw1, numpy.int, copy=True)
180 sw1[i], sw1[j] = (sw1[j], sw1[i])
181 i, j = bd2.pick(1)[0]
182 osw2 = numpy.array(sw2, numpy.int, copy=True)
183 sw2[i], sw2[j] = (sw2[j], sw2[i])
184 nf = fom.eval(sw1, sw2, m)
185 if nf < of:
186 of = nf
187 die.info('fom=%g->%g; sincelast=%d' % (of, nf, sincelast))
188 sincelast = 0
189 else:
190 sw1 = osw1
191 sw2 = osw2
192 sincelast += 1
193
194 if sincelast > maxtries:
195 break
196 return (sw1, sw2)
197
198
199
201 """Swap the rows and columns of a matrix to bring it closer to a diagonal
202 matrix: i.e. entries with large absolute values on the main diagonal and small entries
203 away from the main diagonal. Rows and columns are swapped together,
204 so that the ordering or rows will match the ordering of columns.
205 @param m: a 2-dimensional array
206 @type m: numpy.ndarray
207 @param lbls: a list of arbitrary labels for each row or column.
208 @type lbls: C{list(anything)}
209 @type maxtries: int or None
210 @param maxtries: how hard to work at optimizing the matrix layout?
211 None gives a resonable default value. Appropriate values are
212 a few times the number of elements in the matrix.
213 @param sign: (default=1) if C{sign=-1}, do the opposite: put the small absolute
214 values on the main diagonal.
215 @type sign: int
216 @return: A tuple containing the swapped matrix and a swapped list of values.
217 @rtype: C{tuple(numpy.ndarray, list(something))}
218 """
219 if m.shape != (len(lbls), len(lbls)):
220 raise ValueError, "Number of labels must match the size of the matrix."
221 fom = diagfom(len(lbls), sign=sign)
222 swv = symm_swap_toward_minimal_fom(numpy.absolute(m), fom, maxtries)
223 return ( _mswap(swv, swv, m), _lswap(swv, lbls) )
224
225
227 """Swap the rows and columns of a matrix to put the most positive values on the
228 main diagonal. Rows and columns are swapped together,
229 so that the ordering or rows will match the ordering of columns.
230 @param m: a 2-dimensional array
231 @type m: numpy.ndarray
232 @param lbls: a list of arbitrary labels for each row or column.
233 @type lbls: C{list(anything)}
234 @type maxtries: int or None
235 @param maxtries: how hard to work at optimizing the matrix layout?
236 None gives a resonable default value. Appropriate values are
237 a few times the number of elements in the matrix.
238 @param sign: (default=1) if C{sign=-1}, do the opposite: put the negative entries
239 on the main diagonal.
240 @type sign: int
241 @return: A tuple containing the swapped matrix and a swapped list of labels.
242 @rtype: C{tuple(numpy.ndarray, list(something))}
243 """
244 fom = diagfom(len(lbls), sign=sign)
245 swv = symm_swap_toward_minimal_fom(m, fom, maxtries)
246 return ( _mswap(swv, swv, m), _lswap(swv, lbls) )
247
248
250 """Swap the rows and columns of a matrix to make it roughly
251 diagonal:
252 i.e. large entries on the main diagonal and small entries
253 away from the main diagonal. Note that this will work
254 even if the matrix is not square.
255 @param m: a 2-dimensional array
256 @type m: numpy.ndarray
257 @param lbl1: a list of arbitrary labels for the first index of the matrix C{m}.
258 @type lbl1: C{list(anything)}
259 @param lbl2: a list of arbitrary labels for the second index of the matrix C{m}.
260 @type lbl2: C{list(anything)}
261 @type maxtries: int or None
262 @param maxtries: how hard to work at optimizing the matrix layout?
263 None gives a resonable default value. Appropriate values are
264 a few times the number of elements in the matrix.
265 @return: A tuple containing the swapped matrix and the two swapped lists of labels,
266 one for the first and one for the second axis.
267 @rtype: C{tuple(numpy.ndarray, list(something), list(something))}
268 """
269 if m.shape != (len(lbl1), len(lbl2)):
270 raise ValueError, "Number of labels must match the size of the matrix."
271 fom = diagfom2(len(lbl1), len(lbl2))
272 sw1, sw2 = swap_toward_minimal_fom(numpy.absolute(m), fom, maxtries)
273 return ( _mswap(sw1, sw2, m), _lswap(sw1, lbl1), _lswap(sw2, lbl2) )
274
275
277 fom = diagfom2(len(lbl1), len(lbl2))
278 sw1, sw2 = swap_toward_minimal_fom(m, fom, maxtries)
279 return ( _mswap(sw1, sw2, m), _lswap(sw1, lbl1), _lswap(sw2, lbl2) )
280
281
283 fom = diagfom2(len(lbl1), len(lbl2))
284 sw1, sw2 = swap_toward_minimal_fom(-m, fom, maxtries)
285 return ( _mswap(sw1, sw2, m), _lswap(sw1, lbl1), _lswap(sw2, lbl2) )
286
287
289 """Swap rows and columns of a matrix to bring it closer to a block
290 form, where similar values occur together in blocks.
291 @param m: a 2-dimensional array
292 @type m: numpy.ndarray
293 @param lbl1: a list of arbitrary labels for the first axis of m
294 @type lbl1: C{list(anything)}
295 @param lbl2: a list of arbitrary labels for the second axis of m
296 @type lbl2: C{list(anything)}
297 @type maxtries: int or None
298 @param maxtries: how hard to work at optimizing the matrix layout?
299 None gives a resonable default value. Appropriate values are
300 a few times the number of elements in the matrix.
301 @return: A tuple containing the swapped matrix and the two swapped lists of values,
302 one for the first and one for the second axis.
303 @rtype: C{tuple(numpy.ndarray, list(something), list(something))}
304 """
305 fom = blockfom(len(lbl1), len(lbl2))
306 sw1, sw2 = swap_toward_minimal_fom(m, fom, maxtries)
307 return ( _mswap(sw1, sw2, m), _lswap(sw1, lbl1), _lswap(sw2, lbl2) )
308
310 """Swap rows and columns of a matrix to bring it closer to a block
311 form, where similar values occur together in blocks. It minimizes
312 the sum of changes when proceeding along a row or column.
313 @param m: a 2-dimensional array
314 @type m: numpy.ndarray
315 @param lbls: a list of arbitrary labels for the first axis of m
316 @type lbls: C{list(anything)}
317 @type maxtries: int or None
318 @param maxtries: how hard to work at optimizing the matrix layout?
319 None gives a resonable default value. Appropriate values are
320 a few times the number of elements in the matrix.
321 @return: A tuple containing the swapped matrix and a swapped lists of values.
322 @rtype: C{tuple(numpy.ndarray)}, list(something))
323 """
324 fom = blockfom(len(lbls), len(lbls))
325 swv = symm_swap_toward_minimal_fom(m, fom, maxtries)
326 return ( _mswap(swv, swv, m), _lswap(swv, lbls) )
327
328
330 """Two test cases on 2x2 matrices."""
331 m = numpy.array([[0, 1], [1, 0]])
332 lbls = ['zero', 'one']
333 mswapped, lswapped = swap_toward_diag(m, lbls)
334 assert lswapped == ['zero', 'one']
335 assert numpy.equal(mswapped, numpy.array([[0, 1], [1, 0]])).all()
336
337 m = numpy.array([[1, 0], [0, 1]])
338 lbls = ['zero', 'one']
339 mswapped, lswapped = swap_toward_diag(m, lbls)
340 assert lswapped == ['zero', 'one']
341 assert numpy.equal(mswapped, numpy.array([[1, 0], [0, 1]])).all()
342
343
346
347 if __name__ == '__main__':
348 test()
349