Package gmisclib :: Module multivariance_mm
[frames] | no frames]

Source Code for Module gmisclib.multivariance_mm

  1  """This a helper module for multivariance.py""" 
  2   
  3  import Num 
  4  import multivariance_classes as M 
  5  import random 
  6  import g_implements 
  7   
8 -class datum_c:
9 - def __init__(self, vector, classid):
10 self.classid = classid 11 self.value = vector
12 13
14 -def _multi_mu__init__(self, dataset=None, ndim=None, idmap=None, details=None):
15 """You either give it a complete dataset to look at, 16 including class IDs, *or* the dimensionality of the data 17 (ndim) and a map between classids and integers. 18 This map can be obtained from nice_hash. 19 """ 20 import dictops 21 22 if dataset is not None: 23 assert details is None 24 assert ndim is None and idmap is None 25 import nice_hash 26 assert (ndim is None) and (idmap is None) 27 h = nice_hash.nice_hash(lambda x:x) 28 for t in dataset: 29 assert g_implements.impl(t, datum_c), \ 30 "Bad input type: %s" % g_implements.why(t, datum_c) 31 h.add(t.classid) 32 ndim = len(dataset[0].value) 33 idmap = h.map() 34 elif details is not None: 35 assert ndim is None and idmap is None 36 ndim = details.ndim() 37 idmap = details.id_to_int 38 assert (ndim is not None) and (idmap is not None) 39 self.Nmu = len(idmap) 40 self.id_to_int = idmap.copy() 41 self.int_to_id = dictops.rev1to1(idmap) 42 # print 'MM__init__id_to_int=', self.id_to_int 43 # print 'MM__init__int_to_id=', self.int_to_id 44 for i in range(len(self.int_to_id)): 45 assert i in self.int_to_id, "Not a good mapping to indices."
46 47
48 -class multi_mu(M.modeldesc):
49 __doc__ = """This describes a quadratic model of a known size, 50 with multiple means 51 (one for each different class of data).""" 52
53 - def __init__(self, dataset=None, ndim=None, idmap=None, details=None):
54 M.modeldesc.__init__(self, ndim) 55 _multi_mu__init__(self, dataset, ndim, idmap, details)
56 __init__.__doc__ = _multi_mu__init__.__doc__ 57 58
59 - def modeldim(self):
60 m = self.ndim() 61 return self.Nmu*m + (m*(m+1))/2
62 63
64 - def unpack(self, prms):
65 m = self.ndim() 66 assert len(prms) == self.modeldim() 67 mu = {} 68 for i in range(self.Nmu): 69 mu[self.int_to_id[i]] = prms[i*m:(i+1)*m] 70 # print 'MMunpackMu=', mu 71 invsigma = Num.zeros((m, m), Num.Float) 72 j = self.Nmu*m 73 for i in range(m): 74 tmp = prms[j:j+(m-i)] 75 invsigma[i,i:] = tmp 76 invsigma[i:,i] = tmp 77 j += m-i 78 return self.new(mu, invsigma)
79 80
81 - def new(self, mu, invsigma, bias=0.0):
82 """Mu is a mapping of classids to vectors. invsigma is a square matrix.""" 83 # print 'new(', mu, invsigma, ')' 84 assert type(mu) == type({}) 85 # print 'NewMu=', mu 86 return multi_mu_with_numbers(mu, invsigma, self, bias)
87 88
89 - def start(self, data):
90 raise RuntimeError, 'Broken' 91 import nice_hash 92 h = nice_hash.nice_hash(lambda x: x.classid) 93 for datum in data: 94 assert g_implements.impl(datum, datum_c) 95 h.add(datum) 96 if len(data) > 1: 97 ivar = M.diag_inv_variance([datum.value for datum in data]) 98 else: 99 ivar = Num.identity(self.ndim()) 100 divar = Num.diagonal(ivar) 101 rnd = {} 102 var = {} 103 for (k, v) in h.rmap().items(): 104 # print 'start: k=', k, 'v=', v, 'divar=', divar 105 rnd[self.int_to_id[k]] = random.choice(v).value 106 var[self.int_to_id[k]] = 1.0/divar 107 # print 'ivarsize=', ivar.shape, Num.outerproduct(divar, divar).shape 108 return (self.new(rnd, ivar), 109 self.new(var, Num.outerproduct(divar,divar)) 110 )
111 112
113 -class multi_mu_with_numbers(M.model_with_numbers):
114 - def __init__(self, mu, invsigma, details, bias=0.0, offset=None):
115 """self.mu, self.invsigma, and self._offset should be considered 116 read-only for all users of this class.""" 117 assert isinstance(details, multi_mu) 118 M.model_with_numbers.__init__(self, details, bias) 119 self.mu = Num.array(mu, copy=True) 120 # print 'MMmu=', mu 121 # print 'invsigma.shape=', invsigma.shape 122 self.invsigma = Num.array(invsigma) 123 self._offset = offset
124
125 - def __str__(self):
126 return '<multi_mu: mu=%s; invsigma=%s >' % (str(self.mu), str(self.invsigma))
127 128 __repr__ = __str__ 129 130 addoff = M._q_addoff # Will not be called if _offset is not None 131
132 - def pack(self):
133 n = self.ndim() 134 # print 'invsigma.shape=', self.invsigma.shape, 'n=', n 135 assert self.invsigma.shape == (n,n) 136 assert len(self.mu) == self.desc.Nmu 137 tmp = [] 138 # print 'self.mu=', self.mu 139 for i in range(self.desc.Nmu): 140 tmp.append( self.mu[self.desc.int_to_id[i]] ) 141 for i in range(n): 142 tmp.append(self.invsigma[i,i:]) 143 # print 'Pack tmp=', tmp 144 return Num.concatenate(tmp)
145 146
147 - def logp(self, datum):
148 delta = datum.value - self.mu[datum.classid] 149 parab = Num.dot(delta, Num.matrixmultiply(self.invsigma, 150 delta)) 151 if not parab >= 0.0: 152 raise M.QuadraticNotNormalizable, "Not positive-definite" 153 return -parab/2.0 + self.offset() + self.bias
154 155 156
157 -class multi_mu_diag(M.modeldesc):
158 __doc__ = """This describes a quadratic model of a known size, 159 with multiple means (one for each different class of data). 160 The covariance matrix is shared and diagonal.""" 161
162 - def __init__(self, dataset=None, ndim=None, idmap=None, details=None):
163 M.modeldesc.__init__(self, ndim) 164 _multi_mu__init__(self, dataset, ndim, idmap, details)
165 __init__.__doc__ = _multi_mu__init__.__doc__ 166 167
168 - def modeldim(self):
169 m = self.ndim() 170 return self.Nmu*m + m
171 172
173 - def unpack(self, prms):
174 m = self.ndim() 175 assert len(prms) == self.modeldim() 176 mu = {} 177 for i in range(self.Nmu): 178 mu[self.int_to_id[i]] = prms[i*m:(i+1)*m] 179 # print 'MMunpackMu=', mu 180 j = self.Nmu*m 181 invsigma = prms[j:] 182 return self.new(mu, invsigma)
183 184
185 - def new(self, mu, invsigma, bias=0.0):
186 """Mu is a mapping of classids to vectors. Invsigma is a vector.""" 187 assert type(mu) == type({}) 188 return multi_mu_diag_with_numbers(mu, invsigma, self, bias)
189 190
191 - def start(self, data):
192 raise RuntimeError, 'Broken' 193 import nice_hash 194 h = nice_hash.nice_hash(lambda x: x.classid) 195 for datum in data: 196 assert g_implements.impl(datum, datum_c) 197 h.add(datum) 198 if len(data) > 1: 199 divar = M.vec_inv_variance([datum.value for datum in data]) 200 else: 201 divar = Num.ones((self.ndim(),), Num.Float) 202 rnd = {} 203 var = {} 204 for (k, v) in h.rmap().items(): 205 # print 'start: k=', k, 'v=', v, 'divar=', divar 206 rnd[self.int_to_id[k]] = random.choice(v).value 207 var[self.int_to_id[k]] = 1.0/divar 208 return (self.new(rnd, divar), 209 self.new(var, divar*divar) 210 )
211 212
213 -class multi_mu_diag_with_numbers(M.model_with_numbers):
214 - def __init__(self, mu, invsigma, details, bias=0.0, offset=None):
215 """self.mu, self.invsigma, and self._offset should be considered 216 read-only for all users of this class.""" 217 assert isinstance(details, multi_mu_diag) 218 M.model_with_numbers.__init__(self, details, bias) 219 self.mu = Num.array(mu) 220 # print 'MMmu=', mu 221 # print 'invsigma.shape=', invsigma.shape 222 self.invsigma = Num.array(invsigma) 223 self._offset = offset
224
225 - def __str__(self):
226 return '<multi_mu_diag: mu=%s; invsigma=%s >' % (str(self.mu), str(self.invsigma))
227 228 __repr__ = __str__ 229 230 addoff = M._d_addoff # Will not be called if _offset is not None 231
232 - def pack(self):
233 n = self.ndim() 234 # print 'invsigma.shape=', self.invsigma.shape, 'n=', n 235 assert self.invsigma.shape == (n,) 236 assert len(self.mu) == self.desc.Nmu 237 tmp = [] 238 # print 'self.mu=', self.mu 239 for i in range(self.desc.Nmu): 240 tmp.append( self.mu[self.desc.int_to_id[i]] ) 241 tmp.append( self.invsigma ) 242 # print 'Pack tmp=', tmp 243 return Num.concatenate(tmp)
244 245
246 - def logp(self, datum):
247 delta = datum.value - self.mu[datum.classid] 248 parab = Num.sum(self.invsigma * delta**2) 249 if not parab >= 0.0: 250 raise M.QuadraticNotNormalizable, "Not positive-definite" 251 return -parab/2.0 + self.offset() + self.bias
252