gmisclib.matrix_rearrange

1 #!/usr/bin/env python 2 3 """This module helps you plot confusion matrices or similar 4 images where the axis labels do not have a natural order. 5 So, the probability of confusing two phonemes is a perfect example: 6 phonemes do not naturally fall onto a 1-dimensional sequence, 7 so one is free to put them in any order one likes. 8 Given that, one might as well put them into an order that 9 reveals something interesting about the probabilities. 10 11 All the functions ending in "2" work on rectangular arrays. 12 All the functions without "2" work only on square arrays, and 13 they assume that that both axes remain in the same order. 14 """ 15 16 import numpy 17 18 import die 19 import blue_data_selector 20 21 22

23 -def _mswap(sw1, sw2, m):

24 """Swap rows and columns in matrix m. 25 Produces a copy of the matrix. 26 @param m: matrix 27 @param sw1: how to re-arrange first axis 28 @param sw2: how to re-arrange second axis 29 """ 30 assert sw1.shape == (m.shape[0],) 31 assert sw2.shape == (m.shape[1],) 32 m1 = numpy.take(m, sw1, axis=0) 33 tmp = numpy.take(m1, sw2, axis=1) 34 assert tmp.shape == m.shape 35 return tmp

36 37

38 -def _lswap(swv, m):

39 """Swap labels in list m, according to mapping swv. 40 Produces a new list. 41 """ 42 assert swv.shape == (len(m),) 43 return [ m[swv[i]] for i in range(len(m)) ]

44 45 46

47 -class diagfom(object):

48 """A class that defines a figure-of-merit for a matrix. 49 It tries to put the largest entries on the diagonal. 50 Instances of this class are passed to L{symm_swap_toward_minimal_fom}. 51 """

52 - def __init__(self, n, sign=1):

53 """@param sign: 54 - If sign=1, the f-o-m returned by L{eval} 55 will be minimal when 56 the most positive matrix elements are on the diagonal. 57 - If sign=-1, you'll get minimal fom with the 58 the most positive elements off diagonal. 59 @type sign: int 60 @param n: dimension of matrix. 61 @type n: int 62 """ 63 ar = numpy.arange(n) 64 self.weight = sign*numpy.absolute(ar[:,numpy.newaxis] - ar[numpy.newaxis,:])

65

66 - def eval(self, swv, m):

67 """Evaluate the figure of merit for a matrix m with the 68 specified swap vector. 69 @param m: matrix 70 @param swv: swap vector 71 @return: figure of merit 72 @rtype: int or float (according to the type of m). 73 """ 74 m2 = _mswap(swv, swv, m) 75 # return Num.sum(Num.ravel(m2*self.weight)) 76 tmp = numpy.sum(m2*self.weight) 77 return tmp

78 79

80 -class diagfom2(object):

81 """This tries to make your matrix approximately diagonal for rectangular matrices. 82 """

83 - def __init__(self, n1, n2):

84 a1 = numpy.arange(n1) 85 a2 = numpy.arange(n2) 86 self.weight = numpy.absolute(a1[:,numpy.newaxis]/float(n1-1) - a2[numpy.newaxis,:]/float(n2-1))

87

88 - def eval(self, sw1, sw2, m):

89 m2 = _mswap(sw1, sw2, m) 90 # return Num.sum(Num.ravel(m2*self.weight)) 91 return numpy.sum(m2*self.weight)

92 93

94 -class blockfom(object):

95 """This tries to make your matrix into a block form. 96 The idea is to mimimize the sum of entry-to-entry differences along a row 97 (same with columns). 98 """

99 - def __init__(self, n1, n2):

100 pass

101

102 - def eval(self, sw1, sw2, m):

103 m2 = _mswap(sw1, sw2, m) 104 o = 0.0 105 o += numpy.sum(numpy.absolute(m2[1:,:] - m2[:-1,:])) 106 o += numpy.sum(numpy.absolute(m2[:,1:] - m2[:,:-1])) 107 return o

108 109

110 -def symm_swap_toward_minimal_fom(m, fom, maxtries):

111 """Used internally. Finds an ordering of the labels that minimizes 112 whatever C{fom} you supply. Both axes will have the same order. 113 """ 114 n = m.shape[0] 115 assert m.shape == (n, n) 116 swv = numpy.arange(n) 117 of = fom.eval(swv, m) 118 sincelast = 0 119 bds = blue_data_selector.bluedata( [ (i,j) for i in range(n) for j in range(n) if i!=j ] ) 120 if maxtries is None: 121 maxtries = 2*n*n 122 for (i,j) in bds: 123 oswv = numpy.array(swv, numpy.int, copy=True) 124 swv[i], swv[j] = (swv[j], swv[i]) 125 nf = fom.eval(swv, m) 126 if nf < of: 127 of = nf 128 sincelast = 0 129 else: 130 swv = oswv 131 sincelast += 1 132 if sincelast > maxtries: 133 break 134 return swv

135 136

137 -def swap_toward_minimal_fom(m, fom, maxtries):

138 """Used internally. Finds an ordering of the labels that minimizes 139 whatever C{fom} you supply. The ordering of the two axes may be different. 140 """ 141 n1, n2 = m.shape 142 assert m.shape == (n1, n2) 143 sw1 = numpy.arange(n1) 144 sw2 = numpy.arange(n2) 145 of = fom.eval(sw1, sw2, m) 146 sincelast = 0 147 bd1 = blue_data_selector.bluedata( [ (i,j) for i in range(n1) for j in range(n1) if i!=j ] ) 148 bd2 = blue_data_selector.bluedata( [ (i,j) for i in range(n2) for j in range(n2) if i!=j ] ) 149 if maxtries is None: 150 maxtries = 2*(n1*n1 + n2*n2) 151 else: 152 maxtries = 2*maxtries 153 while True: 154 i, j = bd1.pick(1)[0] 155 osw1 = numpy.array(sw1, numpy.int, copy=True) 156 sw1[i], sw1[j] = (sw1[j], sw1[i]) 157 nf = fom.eval(sw1, sw2, m) 158 if nf < of: 159 die.info('fom=%g->%g; sincelast=%d' % (of, nf, sincelast)) 160 of = nf 161 sincelast = 0 162 else: 163 sw1 = osw1 164 sincelast += 1 165 166 i, j = bd2.pick(1)[0] 167 osw2 = numpy.array(sw2, numpy.int, copy=True) 168 sw2[i], sw2[j] = (sw2[j], sw2[i]) 169 nf = fom.eval(sw1, sw2, m) 170 if nf < of: 171 of = nf 172 die.info('fom=%g->%g; sincelast=%d' % (of, nf, sincelast)) 173 sincelast = 0 174 else: 175 sw2 = osw2 176 sincelast += 1 177 178 i, j = bd1.pick(1)[0] 179 osw1 = numpy.array(sw1, numpy.int, copy=True) 180 sw1[i], sw1[j] = (sw1[j], sw1[i]) 181 i, j = bd2.pick(1)[0] 182 osw2 = numpy.array(sw2, numpy.int, copy=True) 183 sw2[i], sw2[j] = (sw2[j], sw2[i]) 184 nf = fom.eval(sw1, sw2, m) 185 if nf < of: 186 of = nf 187 die.info('fom=%g->%g; sincelast=%d' % (of, nf, sincelast)) 188 sincelast = 0 189 else: 190 sw1 = osw1 191 sw2 = osw2 192 sincelast += 1 193 194 if sincelast > maxtries: 195 break 196 return (sw1, sw2)

197 198 199

200 -def swap_toward_diag(m, lbls, maxtries=None, sign=1):

201 """Swap the rows and columns of a matrix to bring it closer to a diagonal 202 matrix: i.e. entries with large absolute values on the main diagonal and small entries 203 away from the main diagonal. Rows and columns are swapped together, 204 so that the ordering or rows will match the ordering of columns. 205 @param m: a 2-dimensional array 206 @type m: numpy.ndarray 207 @param lbls: a list of arbitrary labels for each row or column. 208 @type lbls: C{list(anything)} 209 @type maxtries: int or None 210 @param maxtries: how hard to work at optimizing the matrix layout? 211 None gives a resonable default value. Appropriate values are 212 a few times the number of elements in the matrix. 213 @param sign: (default=1) if C{sign=-1}, do the opposite: put the small absolute 214 values on the main diagonal. 215 @type sign: int 216 @return: A tuple containing the swapped matrix and a swapped list of values. 217 @rtype: C{tuple(numpy.ndarray, list(something))} 218 """ 219 if m.shape != (len(lbls), len(lbls)): 220 raise ValueError, "Number of labels must match the size of the matrix." 221 fom = diagfom(len(lbls), sign=sign) 222 swv = symm_swap_toward_minimal_fom(numpy.absolute(m), fom, maxtries) 223 return ( _mswap(swv, swv, m), _lswap(swv, lbls) )

224 225

226 -def swap_toward_positive(m, lbls, maxtries=None, sign=1):

227 """Swap the rows and columns of a matrix to put the most positive values on the 228 main diagonal. Rows and columns are swapped together, 229 so that the ordering or rows will match the ordering of columns. 230 @param m: a 2-dimensional array 231 @type m: numpy.ndarray 232 @param lbls: a list of arbitrary labels for each row or column. 233 @type lbls: C{list(anything)} 234 @type maxtries: int or None 235 @param maxtries: how hard to work at optimizing the matrix layout? 236 None gives a resonable default value. Appropriate values are 237 a few times the number of elements in the matrix. 238 @param sign: (default=1) if C{sign=-1}, do the opposite: put the negative entries 239 on the main diagonal. 240 @type sign: int 241 @return: A tuple containing the swapped matrix and a swapped list of labels. 242 @rtype: C{tuple(numpy.ndarray, list(something))} 243 """ 244 fom = diagfom(len(lbls), sign=sign) 245 swv = symm_swap_toward_minimal_fom(m, fom, maxtries) 246 return ( _mswap(swv, swv, m), _lswap(swv, lbls) )

247 248

249 -def swap_toward_diag2(m, lbl1, lbl2, maxtries=None):

250 """Swap the rows and columns of a matrix to make it roughly 251 diagonal: 252 i.e. large entries on the main diagonal and small entries 253 away from the main diagonal. Note that this will work 254 even if the matrix is not square. 255 @param m: a 2-dimensional array 256 @type m: numpy.ndarray 257 @param lbl1: a list of arbitrary labels for the first index of the matrix C{m}. 258 @type lbl1: C{list(anything)} 259 @param lbl2: a list of arbitrary labels for the second index of the matrix C{m}. 260 @type lbl2: C{list(anything)} 261 @type maxtries: int or None 262 @param maxtries: how hard to work at optimizing the matrix layout? 263 None gives a resonable default value. Appropriate values are 264 a few times the number of elements in the matrix. 265 @return: A tuple containing the swapped matrix and the two swapped lists of labels, 266 one for the first and one for the second axis. 267 @rtype: C{tuple(numpy.ndarray, list(something), list(something))} 268 """ 269 if m.shape != (len(lbl1), len(lbl2)): 270 raise ValueError, "Number of labels must match the size of the matrix." 271 fom = diagfom2(len(lbl1), len(lbl2)) 272 sw1, sw2 = swap_toward_minimal_fom(numpy.absolute(m), fom, maxtries) 273 return ( _mswap(sw1, sw2, m), _lswap(sw1, lbl1), _lswap(sw2, lbl2) )

274 275

276 -def pos_near_diag2(m, lbl1, lbl2, maxtries=None):

277 fom = diagfom2(len(lbl1), len(lbl2)) 278 sw1, sw2 = swap_toward_minimal_fom(m, fom, maxtries) 279 return ( _mswap(sw1, sw2, m), _lswap(sw1, lbl1), _lswap(sw2, lbl2) )

280 281

282 -def neg_near_diag2(m, lbl1, lbl2, maxtries=None):

283 fom = diagfom2(len(lbl1), len(lbl2)) 284 sw1, sw2 = swap_toward_minimal_fom(-m, fom, maxtries) 285 return ( _mswap(sw1, sw2, m), _lswap(sw1, lbl1), _lswap(sw2, lbl2) )

286 287

288 -def swap_toward_blocks2(m, lbl1, lbl2, maxtries=None):

289 """Swap rows and columns of a matrix to bring it closer to a block 290 form, where similar values occur together in blocks. 291 @param m: a 2-dimensional array 292 @type m: numpy.ndarray 293 @param lbl1: a list of arbitrary labels for the first axis of m 294 @type lbl1: C{list(anything)} 295 @param lbl2: a list of arbitrary labels for the second axis of m 296 @type lbl2: C{list(anything)} 297 @type maxtries: int or None 298 @param maxtries: how hard to work at optimizing the matrix layout? 299 None gives a resonable default value. Appropriate values are 300 a few times the number of elements in the matrix. 301 @return: A tuple containing the swapped matrix and the two swapped lists of values, 302 one for the first and one for the second axis. 303 @rtype: C{tuple(numpy.ndarray, list(something), list(something))} 304 """ 305 fom = blockfom(len(lbl1), len(lbl2)) 306 sw1, sw2 = swap_toward_minimal_fom(m, fom, maxtries) 307 return ( _mswap(sw1, sw2, m), _lswap(sw1, lbl1), _lswap(sw2, lbl2) )

308

309 -def swap_toward_blocks(m, lbls, maxtries=None):

310 """Swap rows and columns of a matrix to bring it closer to a block 311 form, where similar values occur together in blocks. It minimizes 312 the sum of changes when proceeding along a row or column. 313 @param m: a 2-dimensional array 314 @type m: numpy.ndarray 315 @param lbls: a list of arbitrary labels for the first axis of m 316 @type lbls: C{list(anything)} 317 @type maxtries: int or None 318 @param maxtries: how hard to work at optimizing the matrix layout? 319 None gives a resonable default value. Appropriate values are 320 a few times the number of elements in the matrix. 321 @return: A tuple containing the swapped matrix and a swapped lists of values. 322 @rtype: C{tuple(numpy.ndarray)}, list(something)) 323 """ 324 fom = blockfom(len(lbls), len(lbls)) 325 swv = symm_swap_toward_minimal_fom(m, fom, maxtries) 326 return ( _mswap(swv, swv, m), _lswap(swv, lbls) )

327 328

329 -def test1():

330 """Two test cases on 2x2 matrices.""" 331 m = numpy.array([[0, 1], [1, 0]]) 332 lbls = ['zero', 'one'] 333 mswapped, lswapped = swap_toward_diag(m, lbls) 334 assert lswapped == ['zero', 'one'] 335 assert numpy.equal(mswapped, numpy.array([[0, 1], [1, 0]])).all() 336 # second test case: 337 m = numpy.array([[1, 0], [0, 1]]) 338 lbls = ['zero', 'one'] 339 mswapped, lswapped = swap_toward_diag(m, lbls) 340 assert lswapped == ['zero', 'one'] 341 assert numpy.equal(mswapped, numpy.array([[1, 0], [0, 1]])).all()

342 343

344 -def test():

345 test1()

346 347 if __name__ == '__main__': 348 test() 349

Source Code for Module gmisclib.matrix_rearrange_labels