Package gmisclib :: Module mcmc
[frames] | no frames]

Source Code for Module gmisclib.mcmc

   1  """Bootstrap Markov-Chain Monte-Carlo algorithm. 
   2  This can be used to generate samples from a probability distribution, 
   3  or also as a simulated annealing algorithm for maximization. 
   4  This can be imported and its functions and classes can be used. 
   5   
   6  The central interfaces are the L{BootStepper} class, and within 
   7  it, the L{BootStepper.step} method is used iteratively to take a Markov step. 
   8  To use this module, the user should derive a class from the L{problem_definition} 
   9  class and redefine (at least) the L{problem_definition.logp} method. 
  10   
  11  It was originally inspired by amoeba_anneal (in Numerical Recipes, Press et al.). 
  12  The essential feature is that it keeps a large archive of previous positions 
  13  (possibly many times more than M{N} of them). It samples two positions 
  14  from the archive and subtracts them to generate candidate steps. 
  15  This has the nice property that when sampling from a multivariate 
  16  Gaussian distribution, the candidate steps match the distribution nicely. 
  17   
  18  It can be operated in two modes (or set to automatically switch). 
  19  One is optimization mode where it heads for the maximum of the probability 
  20  distribution.   The other mode is sampling mode, where it 
  21  asymptotically follows a Markov sampling procedure and has the 
  22  proper statistical properties. 
  23   
  24  The algorithm has been described in: 
  25   
  26          - ``Bootstrap Markov chain Monte Carlo and optimal solutions for the 
  27          Law of Categorical Judgment (Corrected)'' Greg Kochanski and Burton S. Rosner (2010), 
  28          \emph{arXiv:1008.1596} available from \url{http://arxiv.org/abs/1008.1596}. 
  29   
  30  An earlier version has been used in: 
  31   
  32          - ``Rhythm measures and dimensions of durational variation in speech'', 
  33          Anastassia Loukina, Greg Kochanski, Burton Rosner, and Elinor Keane and Chilin Shih 
  34          (Submitted 2010 to J. Acoustical Society of America). 
  35          Preprint available at U{http://kochanski.org/gpk/papers/2010/classifier.pdf}. 
  36   
  37          - ``Image quality in non-gated versus gated reconstruction of tongue motion using magnetic resonance imaging: 
  38          a comparison using automated image processing'', 
  39          Christopher Alvey, C. Orphanidou, J. Coleman, A. McIntyre, S. Golding and G. Kochanski, 
  40          I{Intl. J. of Computer Assisted Radiology and Surgery} v3(5), 2008, pp. 457--464, 
  41          U{doi:10.1007/s11548-008-0218-5 <http://dx.doi.org/10.1007/s11548-008-0218-5>}. 
  42   
  43          - ``Evidence for attractors in English intonation'', 
  44          Braun, B., Kochanski, G., Grabe, E., Rosner, B. S., 
  45          2006, I{J. Acoustical Society of America} 119(6) 4006--4015 
  46          U{http://dx.doi.org/10.1121/1.2195267}. 
  47  """ 
  48  from __future__ import with_statement 
  49   
  50  import math 
  51  import types 
  52  import random 
  53  import bisect 
  54  import threading 
  55   
  56  import numpy 
  57  numpy.seterr(invalid="raise") 
  58   
  59  import die 
  60  import gpkmisc 
  61  import g_implements 
  62  import multivariate_normal as MVN 
  63   
  64  Debug = 0 
  65  MEMORYS_WORTH_OF_PARAMETERS = 1e8       #: How many bytes can we use to store the archives? 
66 67 68 69 -class problem_definition(object):
70 """This class implements the problem to be solved. 71 It's main function in life is to compute the probability that a given 72 parameter vector is acceptable. Mcmc.py then uses that to run a 73 Markov-Chain Monte-Carlo sampling. 74 You probably want to derive a class from this and override most of the functions defined here, 75 or implement your own class with the same functions. 76 (Of course, in either case, it can contain extra functions also.) 77 78 For instance, it can be a good idea to define a function 79 "model" that computes the model that you are fitting to data 80 (if that is your plan). Then C{logp()} can be something 81 like C{return -numpy.sum((self.model()-self.data)**2)}. 82 Also, it can be good to define a "guess" function that computes 83 a reasonable initial guess to the parameters, somehow. 84 """ 85 86 @g_implements.make_optional 87 @g_implements.make_varargs
88 - def __init__(self):
89 pass
90
91 - def logp(self, x):
92 """Compute the log of the probability density at C{x}. 93 @param x:a parameter vector 94 @type x: numpy.ndarray 95 @return: The log of the probability that the model assigns to parameters x. 96 @rtype: float 97 @raise NotGoodPosition: This exception is used to indicate that the position 98 x is not valid. This is equivalent to returning an extremely negative 99 logp. 100 """ 101 raise RuntimeError, "Virtual method"
102
103 - def fixer(self, x):
104 """This is called on each candidate position 105 vector. Generally, it is used to restrict the possible 106 solution space by folding position vectors that escape outside the solution space back into 107 the solution space. It can also allow for symmetries in equations. 108 109 Formally, it defines a convex region. All vectors outside the region are mapped 110 into the region, and the mapping must be continuous at the boundary. 111 (More precisely, C{logp(fixer(x))} must be continuous everywhere that C{logp(x)} is continuous, 112 including the boundary.) For instance, mapping C{x[0]} into C{abs(x[0])} defines a convex region 113 (the positive half-space), and the mapping is continuous near C{x[0]=0}. 114 115 Additionally, it may re-normalize parameters at will subject to the restriction 116 that C{logp(fixer(x))==logp(x)}. 117 For instance, it can implement a constraint that C{sum(x)==0} by mapping 118 C{x} into C{x - average(x)}, so long as the value of C{logp()} is unaffected by that 119 substitution. Other folds can sometimes lead to problems. 120 121 @param x:a parameter vector 122 @type x: numpy.ndarray 123 @return: a (possibly modified) parameter vector. 124 @rtype: numpy.ndarray 125 @attention: Within a convex region (presumably one that contains the optimal C{x}), 126 fixer must I{not} change the value of logp(): logp(fixer(x)) == logp(x). 127 @raise NotGoodPosition: This exception is used to indicate that the position 128 x is not valid. Fixer has the option of either 129 mapping invalid parameter vectors into valid ones 130 or raising this exception. 131 """ 132 return x
133 134 135 @g_implements.make_optional
136 - def log(self, p, i):
137 """Some code calls this function every iteration 138 to log the current state of the MCMC process. 139 @param p: the current parameter vector, and 140 @param i: an integer iteration counter. 141 @return: nothing. 142 """ 143 raise RuntimeError, "Virtual method"
144
145 # @g_implements.make_optional 146 # def guess(self): 147 # """Some code calls this function at the beginning 148 # of the MCMC process to seed the iteration. 149 # @return: a list of guess vectors; each guess vector is 150 # suitable for passing to self.fixer() or self.logp() as x. 151 # """ 152 # raise RuntimeError, "Virtual method" 153 154 155 -class problem_definition_F(problem_definition):
156 """This is a variant that is used bin the bin/mcmc.py script. 157 """ 158 @g_implements.make_optional 159 @g_implements.make_varargs
160 - def __init__(self, logp_fcn, c, fixer=None):
161 problem_definition.__init__(self) 162 self.c = c 163 self._fixer = fixer 164 # assert logp_fcn is None or callable(logp_fcn) 165 assert callable(logp_fcn) 166 self.lpf = logp_fcn
167
168 - def logp(self, x):
169 return (self.lpf)(x, self.c)
170 171 logp.__doc__ = problem_definition.logp.__doc__ 172 173
174 - def fixer(self, x):
175 if self._fixer is not None: 176 return (self._fixer)(x, self.c) 177 return x
178 179 fixer.__doc__ = problem_definition.fixer.__doc__
180 181 182 183 184 185 _Ntype = type(numpy.zeros((1,), numpy.float)) #: Used for type checking.
186 187 188 189 -class position_base(object):
190 """This class is used internally in the MCMC sampling process to 191 represent a position. It stores a parameter vector, a reference to 192 the problem definition, and (optionally) caches computed values 193 of log(P). 194 """ 195 @g_implements.make_optional
196 - def __init__(self, x, problem_def):
197 """You need this function and 198 this signature if you are going to pass 199 it to make_list_of_positions(), but not necessarily 200 otherwise. 201 """ 202 g_implements.check(problem_def, problem_definition) 203 self.pd = problem_def 204 # print "position:x=", x 205 tmp = numpy.array(x, numpy.float, copy=True) 206 tmp = self.pd.fixer(tmp) 207 assert isinstance(tmp, _Ntype), "Output of fixer() is not numpy array." 208 assert tmp.shape == x.shape, "Fixer output[%s] must match shape of input[%s]." % (str(tmp.shape), str(x.shape)) 209 # self.x = numpy.array(tmp, copy=True) 210 self.x = tmp 211 self._uid = hash(self.vec().tostring())
212 213 214 # def __deepcopy__(self, memo): 215 # """Don't copy andy deeper than this.""" 216 # return position_base(self.x, self.pd) 217 218
219 - def logp(self):
220 """Compute the log of the probability for the position. 221 """ 222 raise RuntimeError, "Virtual Function"
223 224
225 - def logp_nocompute(self):
226 """Shows a recent logp value. It should not compute 227 a value unless necessary. The intent is to look up 228 a value in a cache. 229 """ 230 raise RuntimeError, "Virtual Function"
231 232
233 - def new(self, shift, logp=None):
234 """Returns a new C{position}, shifted by the specified amount. 235 @param shift: How much of a move to make to the new position? 236 @type shift: numpy.ndarray 237 @param logp: (optional) If this is supplied, is is used to set the 238 C{log(P)} value for the newly created position structure. 239 @type logp: float or None 240 @return: a new position 241 @rtype: L{position_base} or a subclass 242 """ 243 raise RuntimeError, "Virtual Function"
244 245
246 - def prms(self):
247 """The result of this function is to be handed to problem_definition.logp(). 248 This result must contain all the information specifying the position. 249 Normally, this is a vector of floats, but conceivably, you could include other information. 250 """ 251 return self.x
252 253
254 - def vec(self):
255 """Returns a numpy vector. 256 The result should contain all the information specifying the position; 257 if not all, it should at least contain all the information that can be 258 usefully expressed as a vector of floating point numbers. 259 260 Normally, L{vec}() and L{prms}() are identical. 261 """ 262 return self.x
263 264 265 @g_implements.make_optional
266 - def __repr__(self):
267 return '<POSbase %s>' % str(self.x)
268 269
270 - def __cmp__(self, other):
271 """This is used when the archive is sorted.""" 272 dd = 0 273 try: 274 a = self.logp_nocompute() 275 except NotGoodPosition: 276 dd -= 2 277 a = 0.0 # only relevant when both are uncomputable. 278 try: 279 b = other.logp_nocompute() 280 except NotGoodPosition: 281 dd += 2 282 b = 0.0 # only relevant when both are uncomputable. 283 return dd or cmp(a, b)
284 285
286 - def uid(self):
287 return self._uid
288
289 290 -class position_repeatable(position_base):
291 """This is for the common case where logp is a well-behaved 292 function of its arguments. It caches positions and their corresponding 293 values of C{log(P)}. 294 """ 295 EPS = 1e-7 #: When we check for reproducibility, how closely must answers match? 296 HUGE = 1e38 297 CACHE_LIFE = 50 298 FIXER_CHECK = 100 299 """How often should we recompute cached values to make sure that the 300 same parameters always lead to the same results?""" 301 # CACHE_LIFE = 1 # For debugging only. 302 303 @g_implements.make_optional
304 - def __init__(self, x, problem_def, logp=None):
305 """ 306 @param x: 307 @type x: numpy.ndarray 308 @type problem_def: L{problem_definition} or a subclass thereof. 309 @type logp: float or None 310 @param logp: the value of M{log(P)} at M{x}, or C{None} to indicate that it 311 hasn't been computed yet. 312 @raise ValueError: if sanity check is failed. 313 @raise L{NotGoodPosition}: from inside L{problem_definition.logp}. 314 """ 315 position_base.__init__(self, x, problem_def) 316 if logp is not None and not (logp < self.HUGE): 317 raise ValueError("Absurdly large value of logP", logp, self.x) 318 self.cache = logp 319 if logp is None: 320 self.cache_valid = -1 321 else: 322 self.cache_valid = self.CACHE_LIFE 323 if random.random()*self.FIXER_CHECK < 1.0: 324 # This is a sanity check: 1% of the time we make sure that 325 # the supplied value of log(P) is consistent with what is yielded by L{problem_def}. 326 # Note that self.pd.fixer() gets called inside position_base inside 327 # position_repeatable. 328 tmp = position_repeatable(self.x, problem_def) 329 tlp = tmp.logp() 330 slp = self.logp() 331 if abs(tlp - slp) > 0.1 + self.EPS*abs(tlp+slp): 332 # Ideally, EPS will be zero. But, you sometimes get roundoff 333 # errors in the fixer early on in an optimization when the fit 334 # is still awful. So, it might allow past some non-idempotent functions, 335 # but only for a set of parameters that are so wild and awful 336 # that no one should really cares. This is a bit of a kluge, but not a bad one. 337 die.info("Parameters before fixer: %s" % self.x) 338 die.info("Parameters after fixer: %s" % tmp.x) 339 ibgst = numpy.argmax(numpy.absolute(self.x-tmp.x)).item() 340 die.info("Biggest difference is prm[%d]: %s -> %s" % (ibgst, self.x[ibgst], tmp.x[ibgst])) 341 die.warn("Logp changes from %s to %s around fixer." % (self.logp_nocompute(), tmp.logp_nocompute())) 342 die.info("Change on recomputation: %s -> %s" % (self.logp_nocompute(), self.logp())) 343 raise ValueError, "Fixer is not idempotent. Logp changes from %s to %s." % (self.logp_nocompute(), tmp.logp_nocompute())
344 345 346
347 - def invalidate_cache(self):
348 """This can be called when the mapping between parameters (x) 349 and value changes. You might use it if you wanted to 350 change the probability distribution (i.e. M{log(P)}). 351 """ 352 self.cache_valid = -1
353 354
355 - def logp(self):
356 if self.cache_valid <= 0: 357 try: 358 logp = self.pd.logp(self.prms()) 359 except NotGoodPosition: 360 logp = None 361 362 if logp is not None and not (logp < self.HUGE): 363 raise ValueError("Absurdly large value of logP", logp, self.x) 364 if(self.cache_valid==0 and 365 ((self.cache is None)!=(logp is None) 366 or 367 abs(logp - self.cache) > 0.1 + 1e-8*(abs(logp)+abs(self.cache)) 368 )): 369 raise ValueError, 'Recomputing position cache; found mismatch %s to %s' % (self.cache, logp) 370 self.cache = logp 371 self.cache_valid = self.CACHE_LIFE 372 self.cache_valid -= 1 373 if self.cache is None: 374 raise NotGoodPosition 375 return self.cache
376 377
378 - def logp_nocompute(self):
379 if self.cache_valid < 0: 380 return self.logp() 381 if self.cache is None: 382 raise NotGoodPosition 383 return self.cache
384 385
386 - def new(self, shift, logp=None):
387 """Returns a new position, shifted by the specified amount.""" 388 return position_repeatable(self.vec() + shift, self.pd, logp=logp)
389 390
391 - def __repr__(self):
392 if self.cache_valid > 0: 393 s = str(self.cache) 394 elif self.cache_valid == 0: 395 s = "<cache expired>" 396 else: 397 s = "<uncomputed>" 398 return '<POSr %s -> %s>' % (str(self.prms()), s)
399
400 401 402 -class position_nonrepeatable(position_base):
403 """This is for the (unfortunately common) case where logp 404 is an indpendent random function of its arguments. It does 405 not cache as much as L{position_repeatable}. 406 Arguments are analogous to L{position_repeatable}. 407 @note: I think this class is obsolete, given the call to L{T_acceptor.probe}() that is not inserted in to 408 L{BootStepper.step}(). 409 """ 410 HUGE = 1e38 411 412 @g_implements.make_optional
413 - def __init__(self, x, problem_def, logp=None):
414 position_base.__init__(self, x, problem_def) 415 if logp is None: 416 self.cache = [] 417 else: 418 if not (logp < self.HUGE): 419 raise ValueError("Absurdly large value of logP", logp, self.x) 420 self.cache = [ logp ] 421 self.CSIZE = 5
422 423 424
425 - def logp(self):
426 if random.random() > self.CSIZE / float(self.CSIZE + len(self.cache)): 427 logp = random.choice(self.cache) 428 else: 429 try: 430 logp = self.pd.logp(self.prms()) 431 except NotGoodPosition: 432 logp = None 433 434 if not (logp is None or logp < self.HUGE): 435 raise ValueError("Absurdly large value of logP", logp, self.x) 436 self.cache.append(logp) 437 if logp is None: 438 raise NotGoodPosition 439 return logp
440 441
442 - def logp_nocompute(self):
443 if self.cache: 444 tmp = random.choice(self.cache) 445 if tmp is None: 446 raise NotGoodPosition 447 else: 448 tmp = self.logp() 449 return tmp
450 451
452 - def new(self, shift, logp=None):
453 """Returns a new position, shifted by the specified amount.""" 454 return position_nonrepeatable(self.vec() + shift, self.pd, logp=logp)
455 456
457 - def __repr__(self):
458 if len(self.cache): 459 s1 = "" 460 mn = None 461 mx = None 462 for q in self.cache: 463 if q is None: 464 s1 = "BAD or" 465 else: 466 if mx is None or q>mx: 467 mx = q 468 if mn is None or q<mx: 469 mn = q 470 s = "%s%g to %g" % (s1, mn, mx) 471 else: 472 s = "<uncomputed>" 473 return '<POSnr %s -> %s>' % (str(self.prms()), s)
474
475 476 477 # class _empty(object): 478 # """Just a singleton marker.""" 479 # pass 480 481 482 -class acceptor_base(object):
483 """@ivar last_probe: A pair of L{position_base} instances showing the position and logP 484 at each end of the most recent probe for a discontinuity. Or, it could be None. 485 This value is not used in mcmc,py. Rather, it is there for the logging modules. 486 """ 487
488 - def __init__(self):
489 self.last_probe = None
490
491 - def T(self):
492 """@return: A decent approximation to the system temperature. 493 @rtype: L{float} 494 """ 495 raise RuntimeError, "Virtual Method"
496
497 - def __call__(self, delta):
498 """Accept a step or not? 499 @param delta: The proposed step gives C{delta} as the change in 500 M{log(probability)}. 501 @type delta: float 502 @return: should it be accepted or not? 503 @rtype: C{bool} 504 """ 505 raise RuntimeError, "Virtual Method"
506
507 - def probe(self, currentpos, stepper):
508 """This is a hook for a special case where you are optimizing a random 509 function or one that is deterministic, but very rough, and you don't 510 care about the small-scale structure. In such a case, you might use 511 this to probe the local variability and use that knowledge to change 512 the temperature. 513 """ 514 return None
515
516 - def jitter(self):
517 """In the acoustic distances paper, it is called C{$\\bar{delta}$}. 518 @return: An estimate of how rough the function is, on a small scale. 519 @rtype: L{float} 520 """ 521 return 0.0;
522
523 - def reset(self):
524 """In order to make the overal algorithm asymptotically a Markovian, 525 we need to make sure that (asymptotically) the acceptor function 526 depends only on long-term averages, not on recent history. 527 The problem is the jitter measurement, of course. 528 So, one needs to average the jitter over a time longer than the 529 recurrence time, asymptotically. But that makes it horribly 530 unresponsive in the early stages of the optimization when things 531 are rapidly changing. The solution is to pass along resets, 532 so that it can (temporarily) revert to a short averaging time. 533 """ 534 pass
535
536 537 -class T_acceptor(acceptor_base):
538 """This class implements a normal Metropolis-Hastings 539 acceptance of steps. 540 """
541 - def __init__(self, T=1.0):
542 """You can change the temperature to do simulated annealing. 543 @param T: temperature 544 @type T: float 545 """ 546 acceptor_base.__init__(self) 547 assert T >= 0.0 548 self._T = T
549
550 - def T(self):
551 """@return: the system temperature. 552 @rtype: L{float} 553 """ 554 return self._T
555
556 - def __call__(self, delta):
557 """Accept a step or not? 558 @param delta: The proposed step gives C{delta} as the change in 559 M{log(probability)}. 560 @type delta: float 561 @return: should it be accepted or not? 562 @rtype: C{bool} 563 """ 564 return delta > -self.T()*random.expovariate(1.0)
565
566 567 -class rough_acceptor_base(acceptor_base):
568 """@ivar since_last_reset: Number of iterations since the last overall MCMC reset. 569 """
570 - def probe(self, currentpos, stepper):
571 """This computes logP() near the current step position, and sees how 572 much that differs from logP() _at_ the current step position. 573 That difference is used as a measure of the "roughness" or "randomness" 574 of the function. That extra roughness is added onto the temperature 575 to ensure that the MCMC stepper doesn't get trapped in an unimportant 576 local valley. Essentially, we are making the statement that any 577 differences within the region covered by L{probestep} are unimportant. 578 """ 579 if self._probestep is None: 580 return None 581 # Note that it is *critical* that the steps generated below have a zero mean. 582 # The random multiplication is intended to enforce that, even if the user 583 # is lazy and doesn't generate good steps. 584 # If the mean step is nonzero, it's quite possible to drive the optimization off 585 # to infinity. 586 tmp = currentpos.new(self._probestep(stepper) * (1.0 if random.random()<0.5 else -1.0)) 587 delta = abs(tmp.logp() - currentpos.logp_nocompute()) 588 f = (self.tau+self.since_last_reset)/self.tau 589 self._jitter = ( max(delta, self._jitter) + f*self._jitter + self.droop*delta) / (1 + f + self.droop) 590 die.info("Probe: jitter adjusted to %g because of %g difference" % (self._jitter, delta)) 591 self.since_last_reset += 1 592 self.last_probe = (currentpos, tmp) 593 return tmp
594
595 - def jitter(self):
596 return self._jitter
597
598 - def reset(self):
599 self.since_last_reset = 1
600
601 - def __init__(self, probestep, tau, droop):
602 acceptor_base.__init__(self) 603 self._jitter = 0.0 604 self._probestep = probestep 605 self.tau = tau if tau is not None else 10.0 606 self.droop = droop if droop is not None else 0.1 607 assert self.tau > 0.0 608 assert self.droop >= 0.0 609 self.since_last_reset = 1
610
611 612 -class rough_T_acceptor(rough_acceptor_base):
613 """This class implements a normal Metropolis-Hastings 614 acceptance of steps. 615 """
616 - def __init__(self, probestep, tau_probe=None, T=1.0, jitterdroop=None):
617 """You can change the temperature to do simulated annealing. 618 @param T: temperature 619 @type T: float 620 @param probestep: something callable that generates small offsets within a voxel. 621 This is called with the currently active stepper as an argument. 622 @type probestep: function(L{stepper}) -> numpy.ndarray (length=L{BootStepper.np}). 623 """ 624 rough_acceptor_base.__init__(self, probestep, tau_probe, jitterdroop) 625 assert T >= 0.0 626 self._T0 = T
627 628
629 - def T(self):
630 """@return: the system temperature. 631 @rtype: L{float} 632 """ 633 return math.hypot(self._T0, self._jitter)
634
635 - def __call__(self, delta):
636 """Accept a step or not? 637 @param delta: The proposed step gives C{delta} as the change in 638 M{log(probability)}. 639 @type delta: float 640 @return: should it be accepted or not? 641 @rtype: C{bool} 642 """ 643 return delta > -self.T()*random.expovariate(1.0)
644
645 646 647 648 649 -class stepper(object):
650 """This is your basic stepper class. It's a base class containing common 651 features; it is not used directly. 652 @ivar last_failed: It should reflect the success or failure of the most recently 653 completed step. It is None if the last step succeeded; it is the 654 most recently rejected L{position_base} object if the last step failed. 655 This is intended for debugging mostly. It is not used by code in this module. 656 """ 657 658 @g_implements.make_varargs
659 - def __init__(self):
660 self.since_last_rst = 0 661 self.resetid = 0 662 self.last_reset = None 663 self.last_failed = None 664 self.lock = threading.RLock() 665 self.acceptable = T_acceptor(1.0) 666 """Acceptable is a function that decides whether or not a step is OK. 667 You can replace it if you want, but the class should be equivalent 668 to L{T_acceptor}."""
669 670
671 - def step(self):
672 """In subclasses, this takes a step and returns 0 or 1, 673 depending on whether the step was accepted or not.""" 674 self.since_last_rst += 1 675 return None
676 677
678 - def prms(self):
679 """@return: The current parameters 680 @rtype: C{numpy.ndarray} 681 """ 682 return self.current().prms()
683 684
685 - def status(self):
686 """Provides some printable status information in a=v; format.""" 687 raise RuntimeError, "Virtual Function"
688 689
690 - def reset(self):
691 """Called internally to mark when the optimization 692 has found a new minimum. 693 @note: You might also call it if the function you are minimizing changes. 694 """ 695 # print 'stepper.reset', threading.currentThread().getName() 696 with self.lock: 697 self.since_last_rst = 0 698 self.resetid += 1 699 self.acceptable.reset()
700
701 - def reset_id(self):
702 """Use this to tell if the stepper has been reset since you last 703 looked at it. 704 @return: an integer that's different for each reset. 705 @rtype: int 706 """ 707 return self.resetid
708
709 - def needs_a_reset(self):
710 """Decides if we we need a reset. This checks 711 to see if we have a new C{logP} that exceeds 712 the old record. It keeps track of the necessary 713 paperwork. 714 """ 715 current = self.current() 716 # print 'stepper.needs_a_reset', threading.currentThread().getName() 717 with self.lock: 718 if self.last_reset is None: 719 self.last_reset = current 720 rst = False 721 else: 722 rst = current.logp_nocompute() > self.last_reset.logp_nocompute() + self.acceptable.T()*0.5 723 # rst = current.logp_nocompute() > self.last_reset.logp_nocompute() 724 if rst: 725 self.last_reset = current 726 if rst and Debug>1: 727 print '# RESET: logp=', current.logp_nocompute() 728 return rst
729 730
731 - def current(self):
732 """@return: the current position. 733 @rtype: L{position_base} 734 """ 735 with self.lock: 736 return self._current
737 738
739 - def _set_current(self, newcurrent):
740 """Memorizes the current position. 741 @raise NotGoodPosition: if newcurrent doesn't have a finite logp() value. 742 """ 743 with self.lock: 744 # logp_nocompute() will raise a NotGoodPosition exception 745 # so this essentially asserts that the current position is good. 746 newcurrent.logp_nocompute() 747 self._current = newcurrent
748
749 750 751 -class adjuster(object):
752 """The C{adjuster} class controls the step size. 753 """ 754 Z0 = 1.5 755 DZ = 0.3 756 TOL = 0.2 757 STABEXP = 1.0 758
759 - def __init__(self, F, vscale, vse=0.0, vsmax=1e30):
760 assert 0.0 < F < 1.0 761 self.lock = threading.Lock() 762 self.vscale = vscale 763 self.F = F 764 # self.np = np 765 self.state = 0 766 self.vse = vse 767 self.reset() 768 769 self.vsmax = vsmax 770 """Used when the acceptance probability is larger than 25%. 771 Large acceptance probabilities 772 can happen if the probability is everywhere about 773 equal. (E.g. a data fitting problem with almost 774 no data)"""
775
776 - def reset(self):
777 """Called when old habits should be forgotten. 778 """ 779 # print 'adjuster.reset', threading.currentThread().getName() 780 with self.lock: 781 self.z = self.Z0 782 self.since_reset = 0 783 self.ncheck = self._calc_ncheck() 784 self.blocknacc = 0 785 self.blockntry = 0
786 # print "#ADJUSTER RESET" 787
788 - def _calc_ncheck(self):
789 """This estimates when to make the next check for statistically significant 790 deviations from the correct step acceptance rate. (It's not worth checking 791 at every step, so we do it only occasionally.) 792 """ 793 assert self.z >= self.Z0 794 ss = max( -self.z/math.log(1-self.f()), -self.z/math.log(self.f()) ) 795 """C{ss} is the fewest samples where you could possibly 796 detect statistically significant deviations 797 from getting a fraction self.F of steps accepted.""" 798 assert ss > 3.0 799 # The "100" is just there because checking the step acceptance rate isn't very expensive, 800 # so we might as well guard against absurdly long check intervals. 801 return min(100, int(math.ceil(ss)))
802
803 - def f(self):
804 """This allows the desired fraction of accepted steps to 805 depend on self.vscale. 806 @requires: This method should only be called with C{self.vse != 0} 807 where self.vscale can normally be expected to be fairly close to unity, 808 and where small values of C{self.vse} indicate trouble. 809 In the L{mcmc.Bootstepper.step_boot} case this is true. 810 """ 811 # Ideally, we'd like self.f()==self.F for 812 # greatest efficiency, but that can fail badly if the acceptance 813 # probability is always smaller than self.F. One example is 814 # where this happens is if the maximum is at a corner of an 815 # allowable region, such as: 816 # log(P) = { w<0 or x<0 or y<0 or z<0 : -infinity, 817 # else: -(x+y+z+w) 818 # }. 819 # In such a case, trying to maintain the optimal step acceptance 820 # can drive vscale to zero, which is not good for the convergence 821 # rate. The function chosen here gives up some efficiency 822 # to avoid such a disaster. 823 return self.F * min(1.0, self.vscale**self.vse)
824 825
826 - def inctry(self, accepted):
827 # print 'adjuster.inctry', threading.currentThread().getName() 828 with self.lock: 829 self.since_reset += 1 830 self.blockntry += 1 831 self.blocknacc += accepted 832 833 # Only check occasionally. 834 if self.blockntry > self.ncheck: 835 self._inctry_guts()
836 837
838 - def _inctry_guts(self):
839 """Called under lock! 840 We check that the observed fraction of accepted 841 steps is consistent with a Binomial distribution. 842 If not, we try updating self.vscale. 843 """ 844 EPS = 1e-6 845 Flow = self.f() * (1.0 - self.TOL) 846 Fhi = self.f() * (1.0 + self.TOL) 847 lPH0low = self.blocknacc*math.log(Flow) + (self.blockntry-self.blocknacc)*math.log(1-Flow) 848 lPH0hi = self.blocknacc*math.log(Fhi) + (self.blockntry-self.blocknacc)*math.log(1-Fhi) 849 Phat = (self.blocknacc + EPS)/(self.blockntry + 2*EPS) 850 sigmaP = math.sqrt(self.f() * (1-self.f()) / self.blockntry) 851 lPH1 = self.blocknacc*math.log(Phat) + (self.blockntry-self.blocknacc)*math.log(1-Phat) 852 # print '#NCHECK bnacc=', self.blocknacc, self.blockntry, lPH0low, lPH0hi, lPH1, self.z 853 854 if (Phat>Fhi and lPH1-lPH0hi > self.z) or (Phat<Flow and lPH1-lPH0low > self.z): 855 # The fraction of accepted steps is inconsistent with the range 856 # from Flow to Fhi -- in other words, we're reasonably sure the acceptance 857 # rate is substantially wrong. 858 859 delta = math.log(Phat/self.f()) 860 delta = min(max(delta, -self.TOL), self.TOL) 861 self.vscale *= math.exp(delta*self.STABEXP) 862 if self.vscale > self.vsmax: 863 self.vscale = self.vsmax 864 if Debug>2: 865 print '#NCHECK step acceptance rate is %.2f vs. %.2f:' % (Phat, self.f()), 'changing vscale to', self.vscale 866 print '#NCHECK ADJ vscale=', self.vscale, self.z, self.blocknacc, self.blockntry, delta 867 self.blockntry = 0 868 self.blocknacc = 0 869 self.state = -1 870 self.ncheck = self._calc_ncheck() 871 elif Phat>Flow and Phat<Fhi and 2.0*sigmaP/self.f() < self.TOL: 872 # We're as accurate as we need to be. 873 # Therefore, we might as well restart the counters 874 # in case the process is non-stationary. 875 self.blockntry = 0 876 self.blocknacc = 0 877 self.state = 1 878 self.ncheck = self._calc_ncheck() 879 # print '#NCHECK close enough', 'phat=', Phat, 'sigmaP=', sigmaP 880 else: 881 # Doing OK. The step acceptance rate is not 882 # known to be incorrect. 883 self.z += self.DZ 884 self.state = 0 885 self.ncheck = self.blockntry + self._calc_ncheck()
886 # print "#NCHECK OK", self.ncheck, 'z=', self.z 887 888
889 - def vs(self):
890 """We stick in the factor of random.lognormvariate() 891 so that all sizes of move are possible and thus we 892 can prove that we can random-walk to any point in 893 a connected region. This makes the proof of 894 ergodicity simpler. 895 @return: a scale factor for the step size. 896 @rtype: float 897 """ 898 assert type(self.vscale)==types.FloatType 899 return random.lognormvariate(0.0, self.TOL/2.0)*self.vscale
900 901
902 - def status(self):
903 """@return: misc. status information for debugging purposes.""" 904 with self.lock: 905 tmp = (self.blocknacc, self.blockntry, self.state) 906 return tmp
907
908 909 910 911 912 913 -def start_is_list_a(start):
914 """Is the argument a sequence of numpy arrays? 915 """ 916 917 for (i, tmp) in enumerate(start): 918 if not isinstance(tmp, _Ntype): 919 if i > 0: 920 raise TypeError, "Sequence is not all the same type" 921 return False 922 return len(start) > 0
923
924 925 -def start_is_list_p(start):
926 """Is the argument a sequence of position_base objects? 927 """ 928 929 for (i, tmp) in enumerate(start): 930 if not g_implements.impl(tmp, position_base): 931 if i > 0: 932 raise TypeError, "Sequence is not all the same type" 933 return False 934 return len(start) > 0
935
936 937 938 -class NoBoot(Exception):
939 """This is used internally to signify that some particular method 940 of selecting a step has decided that it is not suitable. For instance, 941 a method that depends on an archive of previous steps might see that 942 the archive isn't long enough. 943 """
944 - def __init__(self, *s):
945 Exception.__init__(self, *s)
946
947 948 -class NotGoodPosition(ValueError):
949 """This is raised by user code (e.g. in the L{problem_definition.logP} 950 method or in L{problem_definition.fixer}) to indicate that the current 951 position is not computable or is unacceptable in some other way. 952 This is assumed to be a permanent problem: i.e. the mathematical model 953 blows up for this set of parameters. 954 """
955 - def __init__(self, *s):
956 ValueError.__init__(self, *s)
957
958 - def add_args(self, *s):
959 """ 960 Typical usage:: 961 962 try: 963 something 964 except NotGoodPosition, ex: 965 raise ex.add_args(more, args, added, here) 966 967 @note: This modifies the original object and returns it. 968 @param s: a tuple of arguments to add onto the tail of the exception's C{args} list. 969 @rtype: NotGoodPosition 970 @return: The modified exception. 971 """ 972 self.args = self.args + s 973 return self
974
975 976 977 978 -def make_list_of_positions(x, PositionClass, problem_def):
979 """Turn almost anything into a list of position_base objects. 980 You can hand it a sequence of numpy vectors 981 or a single 1-dimensional numpy vector; 982 a sequence of position_base objects 983 or a single 1-dimensional position_base object. 984 985 @precondition: This depends on PositionClass being callable as 986 PositionClass(vector_of_doubles, problem_definition). 987 """ 988 o = [] 989 if start_is_list_a(x): 990 assert len(x) > 0, "Zero length list of arrays." 991 for t in x: 992 if len(o) > 0: 993 assert t.shape == (o[0].vec().shape[0],) 994 # If the parameters are identical, 995 # we can share position structures, 996 # and reduce the number of times we need 997 # to evaluate logp(x, c). 998 if len(o)>0 and numpy.alltrue(numpy.equal(o[-1].vec(), t)): 999 # print 'SAME:', t, self.current() 1000 o.append( o[-1] ) 1001 else: 1002 # print 'DIFF:', t 1003 o.append( PositionClass(t, problem_def) ) 1004 elif start_is_list_p(x): 1005 assert len(x) > 0, "Zero length list of positions." 1006 o = [] 1007 for t in x: 1008 g_implements.check(t, PositionClass) 1009 if len(o) > 0: 1010 assert t.vec().shape[0] == (o[0].vec().shape[0],) 1011 if len(o)>0 and numpy.alltrue(numpy.equal(t.vec(), o[-1].vec())): 1012 o.append( o[-1] ) 1013 else: 1014 # print 'DIFF:', t 1015 o.append( t ) 1016 elif g_implements.impl(x, PositionClass): 1017 o = [x] 1018 elif isinstance(x, _Ntype) and len(x.shape)==1: 1019 o = [ PositionClass(x, problem_def) ] 1020 else: 1021 raise TypeError, "Cannot handle type=%s for x. Must implement %s or be a 1-dimensional numpy array." % (type(x), repr(PositionClass)) 1022 return o
1023
1024 1025 1026 -def _check_list_of_positions(x):
1027 if not isinstance(x, list): 1028 raise TypeError, "Not a List (should be a list of positions)" 1029 for (i, t) in enumerate(x): 1030 failure = g_implements.why(t, position_base) 1031 if failure is not None: 1032 raise TypeError, "x[%d] does not implement position_base: %s" % (i, failure)
1033
1034 1035 -class hashcounter_c(dict):
1036 """Keep a count of how many times we've see various items.""" 1037
1038 - def incr(self, x):
1039 try: 1040 self[x] += 1 1041 except KeyError: 1042 self[x] = 1
1043
1044 - def decr(self, x):
1045 """@raise ValueError: if you ever see something less than zero times. 1046 """ 1047 tmp = self[x] 1048 if tmp == 1: 1049 del self[x] 1050 elif tmp < 1: 1051 raise ValueError("More decrements than increments", x) 1052 else: 1053 self[x] = tmp-1
1054
1055 1056 1057 1058 1059 -class Archive(object):
1060 """This maintains a list of all the recent accepted positions.""" 1061 SUPHILL = 'hillclimb' 1062 """Always keep the list of positions sorted. 1063 This improves mimimization performance at the cost of a distorted probability distribution.""" 1064 SSAMPLE = 'sample' #: Never sort the list. Best if you really want an exact Monte Carlo distribution. 1065 SANNEAL = 'intermediate' #: Sort the list only when logp() is making substantial improvements. 1066 1067
1068 - def __init__(self, lop, np_eff, strategy=SANNEAL, maxArchSize=None, alpha=None):
1069 assert strategy in (self.SSAMPLE, self.SANNEAL, self.SUPHILL) 1070 assert len(lop) > 0 1071 self.lop = lop 1072 self.strategy = strategy 1073 self.sorted = False 1074 if self.strategy == self.SANNEAL: 1075 self.sort() 1076 self.sorted = True 1077 self.np_eff = np_eff 1078 self.since_last_rst = 0 1079 self.lock = threading.Lock() 1080 self._hashes = hashcounter_c() 1081 for p in self.lop: 1082 self._hashes.incr(p.uid()) 1083 1084 if maxArchSize is None: 1085 maxArchSize = MEMORYS_WORTH_OF_PARAMETERS // self.np_eff 1086 #: The minimum length for the archive. This is chosen to be big enough so that 1087 #: all the parallel copies probably span the full parameter space. 1088 self.min_l = self.np_eff + int(round(2*math.sqrt(self.np_eff))) + 3 1089 if maxArchSize < self.min_l: 1090 raise ValueError, "maxArchSize is too small for trustworthy operation: %d < min_l=%d (npeff=%d)" % (maxArchSize, self.min_l, self.np_eff) 1091 self.max_l = maxArchSize 1092 self.alpha = alpha
1093 1094
1095 - def distinct_count(self):
1096 """How many distinct values of the parameters are there in the archive?""" 1097 with self.lock: 1098 return len(self._hashes)
1099 1100
1101 - def prmlist(self, n):
1102 assert n > 0 1103 with self.lock: 1104 return self.lop[max(0,len(self.lop)-n):]
1105 1106
1107 - def sort(self):
1108 """Called under lock. Sort the archive into order of C{logp}.""" 1109 if self.sorted or self.strategy==self.SSAMPLE: 1110 return 1111 if Debug: 1112 die.info( '# Sorting archive' ) 1113 # This uses position.__cmp__(): 1114 self.lop.sort() 1115 self.sorted = True
1116 1117
1118 - def reset(self):
1119 """We sort the archive to speed the convergence to 1120 the best solution. After all, if you've just 1121 gotten a reset, it is likely that you're not at the 1122 bottom yet, so statistical properties of the distribution 1123 are likely to be irrelevant. 1124 """ 1125 # print 'Archive.reset', threading.currentThread().getName() 1126 with self.lock: 1127 self.sort() 1128 self.truncate(self.min_l) 1129 self.since_last_rst = 0
1130 1131
1132 - def __len__(self):
1133 with self.lock: 1134 return len(self.lop)
1135 1136
1137 - def choose(self):
1138 # print 'Archive.lock', threading.currentThread().getName() 1139 with self.lock: 1140 return random.choice( self.lop )
1141 1142
1143 - def truncate(self, desired_length):
1144 """Shortens the archive and updates the various counters and statistics. 1145 @param desired_length: Measured in terms of the number of distinct positions. 1146 @note: Must be called under lock. 1147 """ 1148 assert len(self.lop) > 0 1149 assert len(self._hashes) <= len(self.lop) 1150 assert (len(self._hashes)>0) == (len(self.lop)>0) 1151 MIN_TRUNCATE = 4 #: It's not worth the cost of truncating one at a time. Do it in clumps. 1152 1153 if len(self._hashes) > desired_length + MIN_TRUNCATE: 1154 j = 0 1155 while len(self._hashes) > desired_length: 1156 self._hashes.decr(self.lop[j].uid()) 1157 j += 1 1158 assert j <= len(self.lop) 1159 if j > 0: 1160 self.truncate_hook(self.lop[:j]) 1161 self.lop = self.lop[j:] 1162 if Debug > 1: 1163 die.info('Truncating archive from %d by %d' % (len(self.lop), j)) 1164 assert len(self._hashes) <= len(self.lop)
1165 # There is a possibility that we have truncated the current position. 1166 # This can happen when the archive is sorted, and we have just accepted a step that 1167 # has worsened log(P). I wonder if that's a problem? 1168 1169 1170 Sfac = {SUPHILL: 100, SANNEAL: 5, SSAMPLE: 2} 1171
1172 - def append(self, x, maxdups):
1173 """Adds stuff to the archive, possibly sorting the 1174 new information into place. It updates all kinds of 1175 counters and summary statistics. 1176 1177 @param x: A position to (possibly) add to the archive. 1178 @type x: L{position_base} 1179 @return: A one-letter code indicating what happened. 'f' if x 1180 is a duplicate and duplicates are forbidden. 1181 'd' if it is a duplicate and there have been 1182 too many duplicates lately. 1183 'a' otherwise -- x has been added to the archive. 1184 @rtype: str 1185 """ 1186 # print 'Archive.append', threading.currentThread().getName() 1187 F = 0.25 # Ideally, this would be BootStepper.F or even the result of adjuster.f(). 1188 with self.lock: 1189 self.since_last_rst += 1 1190 1191 assert len(self._hashes) <= len(self.lop) 1192 uid = x.uid() 1193 if self.strategy!=self.SSAMPLE and maxdups>0 and self._hashes.get(uid, 0) > maxdups: 1194 # We don't want the clutter up the archive with lots of duplicates if we're not 1195 # in sampling mode. 1196 return 'f' 1197 self._hashes.incr(uid) 1198 1199 if not self.sorted or self.strategy==self.SSAMPLE: 1200 self.lop.append(x) 1201 self.sorted = False 1202 else: 1203 # This uses position.__cmp__(): 1204 bisect.insort_right(self.lop, x) 1205 # We can't do the following check because (a) it can 1206 # fail for position_nonrepeatable, and (b) it triggers 1207 # extra evaluations of logp(), which can be expensive: 1208 # assert self.lop[-1].logp() >= self.lop[0].logp(). 1209 # 1210 # Check that we've had enough steps to have had a reasonable chance 1211 # to have detected a new maximum. 1212 if self.since_last_rst > self.Sfac[self.strategy]*self.np_eff/F: 1213 self.sorted = False 1214 if Debug: 1215 die.info( '# Archive sorting is now off' ) 1216 self.append_hook(x) 1217 if Debug > 1: 1218 die.info('Archive length=%d' % len(self.lop)) 1219 # print "self.lop=", self.lop 1220 assert len(self._hashes) <= len(self.lop) 1221 self.truncate( min(int( self.min_l + self.alpha*self.since_last_rst ), self.max_l)) 1222 return 'a'
1223 1224
1225 - def append_hook(self, x):
1226 pass
1227
1228 - def truncate_hook(self, to_be_dropped):
1229 pass
1230
1231 1232 -class ContPrmArchive(Archive):
1233 - def __init__(self, lop, np_eff, strategy=Archive.SANNEAL, maxArchSize=None, alpha=None):
1234 """Append_hook() is called for every element of the archive. 1235 That function can be replaced in a sub-class to accumulate 1236 some kind of summary. Here, it is used to keep track of parameter 1237 means and standard deviations. 1238 """ 1239 Archive.__init__(self, lop, np_eff, strategy=strategy, 1240 maxArchSize=maxArchSize, alpha=alpha) 1241 self.p0 = lop[-1].vec() 1242 self.s = numpy.zeros(self.p0.shape, numpy.float) 1243 self.ss = numpy.zeros(self.p0.shape, numpy.float) 1244 for a in lop: 1245 self.append_hook(a)
1246 1247
1248 - def append_hook(self, x):
1249 """This accumulates parameter means and standard deviations. 1250 """ 1251 tmp = x.vec() - self.p0 1252 numpy.add(self.s, tmp, self.s) 1253 numpy.add(self.ss, numpy.square(tmp), self.ss)
1254 1255
1256 - def truncate_hook(self, to_be_dropped):
1257 if len(to_be_dropped) > len(self.lop): 1258 # Take the opportunity to pick a better value of self.p0. 1259 self.p0 = self.s/(len(to_be_dropped) + len(self.lop)) 1260 self.s = numpy.zeros(self.p0.shape) 1261 self.ss = numpy.zeros(self.p0.shape) 1262 for prms in self.lop: 1263 self.append_hook(prms) 1264 else: 1265 for prms in to_be_dropped: 1266 tmp = prms.vec() - self.p0 1267 numpy.subtract(self.s, tmp, self.s) 1268 numpy.subtract(self.ss, numpy.square(tmp), self.ss)
1269
1270 - def variance(self):
1271 with self.lock: 1272 n = len(self.lop) 1273 core = self.ss - self.s*self.s/n 1274 if not numpy.alltrue(numpy.greater(core, 0.0)): 1275 self.ss -= numpy.minimum(0.0, core) 1276 if Debug > 0: 1277 die.warn('Zero stdev in archive.stdev for p=%s' 1278 % ','.join( ['%d' % q for q 1279 in numpy.nonzero(numpy.less_equal(core, 0.0))[0] 1280 ]) 1281 ) 1282 return numpy.ones(self.s.shape, numpy.float) 1283 assert n > 1 1284 assert numpy.alltrue(numpy.greater(core, 0.0)) 1285 return core/(n-1)
1286
1287 1288 -class BootStepper(stepper):
1289 """The BootStepper class is the primary interface. It implements a Bootstrap 1290 Markov Chain Monte Carlo stepper. The class instance stores the necessary 1291 state information, and each call to L{step}() takes another step. 1292 """ 1293 1294 F = 0.234 1295 """F is the targeted step acceptance rate. This is from G. O. Roberts, 1296 A. Gelman, and W. Gilks (1997) "Weak convergence and optimal 1297 scaling of random walk Metropolis algorithm." Ann. Applied. 1298 Probability, 7, p. 110-120 and also from 1299 G. O. Roberts and J. S. Rosenthal (2001) "Optimal scaling of 1300 various Metropolis-Hastings algorithms." Statistical Sci. 1301 16, pp. 351-367.""" 1302 1303 alpha = 0.1 #: How rapidly should one expand the archive after a reset? 1304 1305 PBootLim = 0.9 1306 """PBootLim Limits the probability of taking a bootstrap step. 1307 This, if the optimization collapses into a subspace, some other kind of step 1308 will eventually get it out.""" 1309 1310 # Control of when to sort steps and when to run with honest Monte-Carlo: 1311 SSAMPLE = ContPrmArchive.SSAMPLE #: Sampling mode 1312 SUPHILL = ContPrmArchive.SUPHILL #: Go straight uphill 1313 SANNEAL = ContPrmArchive.SANNEAL #: Simulated annealing 1314 SSAUTO = SANNEAL 1315 SSNEVER = SSAMPLE 1316 SSLOW = SANNEAL 1317 SSALWAYS = SUPHILL 1318
1319 - def __init__(self, lop, v, strategy=SANNEAL, maxArchSize=None, parallelSizeDiv=1):
1320 """@param maxArchSize: How many position vectors can be stored. This is 1321 normally used to (loosely) enforce a memory limitation for large 1322 jobs. 1323 @param parallelSizeDiv: For use when there are several cooperating MCMC 1324 processes that share data. When >1, this allows each process 1325 to have smaller stored lists. Normally, parallelSizeDiv is 1326 between 1 and the number of cooperating processes. 1327 @type lop: C{list} of a class derived from L{position_base}. 1328 @param lop: These are a list of starting postions that should be evaluated 1329 first and used as the basis for further exploration. 1330 @param v: A covariance matrix for non-bootstrap steps. 1331 Mostly, the system takes bootstrap steps that are adaptively derived from 1332 previous steps. However, to generate the list of previous steps, 1333 it needs a way to generate steps ab initio. So, C{v} is used to 1334 get things going. C{v} is also used occasionally thereafter, just to 1335 make sure the stepper doesn't get trapped in some subspace. 1336 @type strategy: One of L{SSAMPLE}, L{SUPHILL}, or L{SANNEAL}. 1337 """ 1338 stepper.__init__(self) 1339 _check_list_of_positions(lop) 1340 stepper._set_current(self, lop[-1]) 1341 self.np = lop[0].vec().shape[0] #: The number of parameters: 1342 self.np_eff = (self.np+parallelSizeDiv-1)//parallelSizeDiv 1343 """In a multiprocessor situation, np_eff tells you how much data do you 1344 need to store locally, so that the overall group of processors 1345 stores enough variety of data.""" 1346 1347 self.archive = ContPrmArchive(lop, self.np_eff, strategy=strategy, 1348 maxArchSize=maxArchSize, alpha=self.alpha) 1349 self.maxdups = int(round(4.0/self.F)) 1350 1351 if not self.np > 0: 1352 raise ValueError, "Np=%d; it must be positive." % self.np 1353 if v.shape != (self.np, self.np): 1354 raise ValueError, "v must be square, with side equal to the number of parameters. Vs=%s, np=%d." % (str(v.shape), self.np) 1355 1356 self.v = numpy.array(v, numpy.float, copy=True) 1357 self.V = MVN.multivariate_normal(numpy.zeros(v.shape[0], numpy.float), 1358 v) 1359 self.aB = adjuster(self.F, vscale=0.5, vse=0.5, vsmax=1.3) 1360 self.aV = adjuster(self.F, vscale=1.0) 1361 self.steptype = None
1362 1363
1364 - def step(self):
1365 stepper.step(self) 1366 Wboot = max(self.archive.distinct_count()-1, 0) 1367 WV = max(self.np_eff, 0.1*Wboot) 1368 Wprobe = (Wboot+WV) * self.F 1369 W = float(WV + Wprobe + Wboot) 1370 Pboot = Wboot / W 1371 PV = Pboot + WV / W 1372 again = True 1373 while again: 1374 P = random.random() 1375 try: 1376 if P < Pboot: 1377 accepted = self.step_boot() 1378 elif P < PV: 1379 accepted = self.stepV() 1380 else: 1381 accepted = self.step_probe() 1382 except NoBoot, x: 1383 if Debug > 0: 1384 die.info('NoBoot: %s' % str(x)) 1385 again = True 1386 else: 1387 again = False 1388 return accepted
1389 1390 1391 1392 1393 # def prmlist(self): 1394 # return self.archive.prmlist() 1395 1396 # def last(self): 1397 # return self.archive.last() 1398 1399
1400 - def status(self):
1401 with self.lock: 1402 o = ['a0vs=%g' % self.aB.vscale, 1403 'a0acc=%g; a0try=%g; a0state=%d' % self.aB.status(), 1404 'a1vs=%g' % self.aV.vscale, 1405 'a1acc=%g; a1try=%g; a1state=%d' % self.aV.status(), 1406 'nboot=%d' % len(self.archive), 1407 'logP=%g' % self.current().logp_nocompute(), 1408 'type=%s' % self.steptype 1409 ] 1410 return '; '.join(o) + ';'
1411 1412
1413 - def stepV(self):
1414 self.steptype = 'stepV' 1415 vs1 = self.aV.vs() 1416 move = self.V.sample() * vs1 1417 if hasattr(self.archive, 'variance') and self.archive.distinct_count() >= min(5, self.np): 1418 move *= (self.archive.variance()/self.v.diagonal())**0.25 1419 1420 # print "self.current()=", self.current() 1421 try: 1422 tmp = self.current().new(move) 1423 delta = tmp.logp() - self.current().logp() 1424 except ValueError, x: 1425 die.warn('Cache trouble!: %s' % str(x)) 1426 die.info('current= %s' % str(self.current())) 1427 raise 1428 except NotGoodPosition, x: 1429 if Debug>2: 1430 die.warn('StepV: %s' % str(x)) 1431 # Should I call self.aV.inctry(0) ? 1432 return 0 1433 if self.acceptable(delta): 1434 accepted = 1 1435 if Debug>2: 1436 die.info('StepV: Accepted logp=%g' % tmp.logp_nocompute()) 1437 # We cannot use self.lop[-1] for current because lop is 1438 # sometimes sorted. When it is, the most recently added 1439 # position could end up anywhere in the list. Thus, we keep 1440 # a separate self._current. 1441 self._set_current(tmp) 1442 self.aV.inctry(accepted) 1443 else: 1444 self._set_failed( tmp ) 1445 accepted = 0 1446 if Debug>2: 1447 die.info('StepV: Rejected logp=%g vs. %g, T=%g' 1448 % (tmp.logp_nocompute(), self.current().logp_nocompute(), self.acceptable.T())) 1449 self.aV.inctry(accepted) 1450 self._set_current(self.current()) 1451 return accepted
1452 1453
1454 - def step_boot(self):
1455 self.steptype = 'step_boot' 1456 vs0 = self.aB.vs() 1457 assert vs0 < 10.0, "Vs0 too big!=%s" % str(vs0) 1458 if Debug>3: 1459 die.note('VsB', vs0) 1460 1461 if len(self.archive) <= 2: 1462 # print 'NoBoot', len(self.archive) 1463 raise NoBoot, "Cannot find bootstrap points" 1464 p1 = self.archive.choose() 1465 p2 = self.archive.choose() 1466 # if p1 is p2: 1467 if p1.uid() == p2.uid(): 1468 # print 'NoBoot dup' 1469 raise NoBoot, "Bootstrap: Duplicate uid" 1470 1471 # die.note('p1.prms', p1.prms()) 1472 # die.note('p2.prms', p2.prms()) 1473 move = (p1.vec() - p2.vec()) * vs0 1474 # print '# move=', p1.prms(), p2.prms(), vs0, move 1475 1476 try: 1477 tmp = self.current().new(move) 1478 delta = tmp.logp() - self.current().logp() 1479 except ValueError, x: 1480 die.warn('Cache trouble!: %s' % str(x)) 1481 die.info('current= %s' % str(self.current())) 1482 raise 1483 except NotGoodPosition: 1484 if Debug>2: 1485 die.warn('StepBoot: Nogood position') 1486 return 0 1487 if self.acceptable(delta): 1488 accepted = 1 1489 if Debug>2: 1490 die.info('StepBoot: Accepted logp=%g' % tmp.logp_nocompute()) 1491 self._set_current(tmp) 1492 self.aB.inctry(accepted) 1493 else: 1494 self._set_failed( tmp ) 1495 accepted = 0 1496 if Debug>2: 1497 die.info('StepBoot: Rejected logp=%g vs. %g, T=%g' 1498 % (tmp.logp_nocompute(), self.current().logp_nocompute(), self.acceptable.T())) 1499 self.aB.inctry(accepted) 1500 self._set_current(self.current()) 1501 return accepted
1502 1503
1504 - def step_probe(self):
1505 self.steptype = 'step_probe' 1506 cur = self.current() 1507 try: 1508 tmp = self.acceptable.probe(cur, self) 1509 if tmp is None: 1510 raise NoBoot, "No probe wanted." 1511 delta = tmp.logp() - cur.logp() 1512 except NotGoodPosition, ex: 1513 if Debug>2: 1514 die.warn('step_probe: %s' % str(ex)) 1515 return 0 1516 except ValueError, ex: 1517 die.warn('Cache trouble!: %s' % str(ex)) 1518 die.info('current= %s' % str(cur)) 1519 raise 1520 1521 if self.acceptable(delta): 1522 accepted = 1 1523 if Debug>2: 1524 die.info('step_probe: Accepted logp=%g' % tmp.logp_nocompute()) 1525 # We cannot use self.lop[-1] for current because lop is 1526 # sometimes sorted. When it is, the most recently added 1527 # position could end up anywhere in the list. Thus, we keep 1528 # a separate self._current. 1529 self._set_current(tmp) 1530 else: 1531 self._set_failed( tmp ) 1532 accepted = 0 1533 if Debug>2: 1534 die.info('step_probe: Rejected logp=%g vs. %g, T=%g' 1535 % (tmp.logp_nocompute(), cur.logp_nocompute(), self.acceptable.T())) 1536 self._set_current(self.current()) 1537 return accepted
1538 1539 1540
1541 - def _set_current(self, newcurrent):
1542 """@raise NotGoodPosition: if newcurrent doesn't have a finite logp() value. 1543 """ 1544 g_implements.check(newcurrent, position_base) 1545 # print 'bootstepper.set_current', threading.currentThread().getName() 1546 stepper._set_current(self, newcurrent) 1547 out = [] 1548 with self.lock: 1549 maxdups = self.maxdups if self.since_last_rst < self.np_eff else -1 1550 """After a reset, the optimizer may not have good step directions, 1551 so there may be many failed steps. Thus, you don't want to 1552 fill up the archive with lots of duplicates of the current position 1553 that are basically meaningless. Thus, we forbid duplicates for 1554 a while.""" 1555 1556 q = self.archive.append(newcurrent, maxdups=maxdups) 1557 out.append(q) 1558 if self.needs_a_reset(): 1559 # Shorten the archive -- if we've found a new best 1560 # point, that makes it likely that we have not 1561 # yet converged. Consequently, since were 1562 # probably not near stationarity, the old history 1563 # is probably useless. 1564 self.reset_adjusters() 1565 self.archive.reset() 1566 self.reset() 1567 out.append('r') 1568 return ''.join(out)
1569 1570
1571 - def _set_failed(self, f):
1572 """We remember the most recent failure for debugging purposes. 1573 But, just because a step failed doesn't mean it is awful. If the archive is 1574 sorted (i.e. we are in optimization mode, not sampling mode) and 1575 if the failed step is better than most of the archive, then remember it. 1576 """ 1577 self.last_failed = f 1578 with self.lock: 1579 refPos = len(self.archive)//4 1580 try: 1581 if (f is not None 1582 and self.archive.sorted 1583 and f.logp_nocompute()>self.archive.lop[refPos].logp_nocompute()): 1584 self.archive.append(f, 1) 1585 except NotGoodPosition: 1586 pass
1587 1588
1589 - def reset_adjusters(self):
1590 self.aB.reset() 1591 self.aV.reset()
1592 1593
1594 - def ergodic(self):
1595 """A crude measure of how ergodic the MCMC is. 1596 @return: The inverse of how many steps it takes to cross the 1597 minimum in the slowest direction. Or zero, if it is too 1598 soon after some major violation of the MCMC assumptions. 1599 """ 1600 if self.since_last_rst*self.aB.f() < 3*self.np_eff: 1601 # Presumably, we are still stabilizing. The factor of 3 1602 # is there to make sure that we have stopped sorting the archive. 1603 # (See Archive.sorted}. 1604 return 0.0 1605 if self.aB.state < 0: 1606 # The step size is known to be mis-adjusted, 1607 # so you cannot use it to compute a measure 1608 # of the ergodicity. 1609 # print '# state<0 in ergodic', self.aB.state 1610 return 0.0 1611 1612 # This is calibrated from the equilibrium vscale from long runs 1613 # in 1-50 dimensional Gaussian problems. 1614 1615 vsbar = 1.7/math.sqrt(self.np) # Should this be np_eff ? 1616 vs = self.aB.vscale 1617 # print 'ergodic: vs=', vs, "erg=", min(1.0, vs/vsbar)**2 1618 return self.F * min(1.0, vs/vsbar) ** 2
1619 1620
1621 - def set_strategy(self, ss):
1622 tmp = self.archive.strategy 1623 self.archive.strategy = ss 1624 return tmp
1625 1626 set_sort_strategy = set_strategy
1627
1628 1629 -def bootstepper(logp, x, v, c=None, strategy=BootStepper.SANNEAL, fixer=None, repeatable=True):
1630 """This is (essentially) another interface to the class constructor. 1631 It's really there for backwards compatibility. 1632 """ 1633 pd = problem_definition_F(logp_fcn=logp, c=c, fixer=fixer) 1634 position_constructor = [position_nonrepeatable, position_repeatable][repeatable] 1635 return BootStepper(make_list_of_positions(x, position_constructor, pd), v, strategy=strategy)
1636
1637 # boot2stepper = bootstepper 1638 1639 -def _logp1(x, c):
1640 # print "x=", x, "c=", c 1641 # print "type=", type(x), type(c) 1642 return -numpy.sum(x*x*c*c)
1643
1644 1645 -def _logp2(x, c):
1646 r = numpy.identity(x.vec().shape[0], numpy.float) 1647 r[0,0] = 0.707107 1648 r[0,1] = 0.707107 1649 r[1,0] = -0.707107 1650 r[1,1] = 0.707107 1651 xt = numpy.dot(x, r) 1652 return _logp1(xt, c)
1653
1654 1655 -def test_(stepper):
1656 import dictops 1657 start = numpy.array([1.0, 2.0, 2, 9, 1]) 1658 c = numpy.array([100.0, 30.0, 10.0, 3.0, 0.1]) 1659 V = numpy.identity(len(c), numpy.float) 1660 # V = numpy.array([[ 20.69626808, 20.6904984 ], [ 20.6904984, 20.69477235]]) 1661 1662 x = stepper(_logp1, start, V, c=c) 1663 v = numpy.zeros((x.np, x.np)) 1664 n_per_steptype = dictops.dict_of_averages() 1665 for i in range(1000): 1666 accepted = x.step() 1667 n_per_steptype.add(x.steptype, accepted) 1668 lpsum = 0.0 1669 lps2 = 0.0 1670 na = 0 1671 psum = numpy.zeros((x.np,)) 1672 N = 30000 1673 nreset = 0 1674 nsorted = 0 1675 rid = x.reset_id() 1676 for i in range(N): 1677 accepted = x.step() 1678 na += accepted 1679 n_per_steptype.add(x.steptype, accepted) 1680 nsorted += x.archive.sorted 1681 if x.reset_id() != rid: 1682 rid = x.reset_id() 1683 nreset += 1 1684 lp = x.current().logp() 1685 lpsum += lp 1686 lps2 += lp**2 1687 p = x.current().vec() 1688 numpy.add(psum, p, psum) 1689 v += numpy.outer(p, p) 1690 for steptype in n_per_steptype.keys(): 1691 print 'Step %s has %.0f successes out of %.0f trials: %.2f' % ( 1692 steptype, n_per_steptype.get_both(steptype)[0], 1693 n_per_steptype.get_both(steptype)[1], 1694 n_per_steptype.get_avg(steptype) 1695 ) 1696 # assert N*x.PBootLim**1.5 < nboot < N*x.PBootLim**0.7 1697 assert nsorted < N//30 1698 assert N//8 < na < N//2 1699 assert nreset < 3 1700 lpsum /= N 1701 assert abs(lpsum+0.5*x.np) < 0.15, "Lpsum=%g, expected value=%g" % (lpsum, -0.5*x.np) 1702 lps2 /= N-1 1703 lpvar = lps2 - lpsum**2 1704 assert abs(lpvar-0.5*x.np) < 1.0 1705 numpy.divide(psum, N, psum) 1706 assert numpy.alltrue(numpy.less(numpy.absolute(psum), 6.0/numpy.sqrt(c))) 1707 numpy.divide(v, N-1, v) 1708 for i in range(x.np): 1709 for j in range(x.np): 1710 if i == j: 1711 # print '2*v[ii]*c=', 2*v[i,j]*c[i]**2 1712 assert abs(math.log(2*v[i,j]*c[i]*c[j])) < 0.1 1713 else: 1714 assert abs(v[i,j]) < 20/(c[i]*c[j])
1715
1716 1717 1718 -def diag_variance(start):
1719 """Hand this a list of vectors and it will compute 1720 the variance of each component, then return a diagonal 1721 covariance matrix. 1722 """ 1723 tmp = gpkmisc.vec_variance(start) 1724 if not numpy.alltrue(numpy.greater(tmp, 0.0)): 1725 raise ValueError, ("Zero variance for components %s" 1726 % ','.join(['%d'%q for q in 1727 numpy.nonzero(1-numpy.greater(tmp, 0.0))[0] 1728 ] 1729 ) 1730 ) 1731 return gpkmisc.make_diag(tmp)
1732
1733 1734 -def test():
1735 test_(stepper=bootstepper)
1736 1737 1738 if __name__ == '__main__': 1739 test() 1740