Package gmisclib :: Module weighted_percentile
[frames] | no frames]

Source Code for Module gmisclib.weighted_percentile

  1  """This computes order statistics on data with weights. 
  2  """ 
  3   
  4  import numpy 
  5   
  6   
7 -def wp(data, wt, percentiles):
8 """Compute weighted percentiles. 9 If the weights are equal, this is the same as normal percentiles. 10 Elements of the C{data} and C{wt} arrays correspond to 11 each other and must have equal length (unless C{wt} is C{None}). 12 13 @param data: The data. 14 @type data: A L{numpy.ndarray} array or a C{list} of numbers. 15 @param wt: How important is a given piece of data. 16 @type wt: C{None} or a L{numpy.ndarray} array or a C{list} of numbers. 17 All the weights must be non-negative and the sum must be 18 greater than zero. 19 @param percentiles: what percentiles to use. (Not really percentiles, 20 as the range is 0-1 rather than 0-100.) 21 @type percentiles: a C{list} of numbers between 0 and 1. 22 @rtype: [ C{float}, ... ] 23 @return: the weighted percentiles of the data. 24 """ 25 assert numpy.greater_equal(percentiles, 0.0).all(), "Percentiles less than zero" 26 assert numpy.less_equal(percentiles, 1.0).all(), "Percentiles greater than one" 27 data = numpy.asarray(data) 28 assert len(data.shape) == 1 29 if wt is None: 30 wt = numpy.ones(data.shape, numpy.float) 31 else: 32 wt = numpy.asarray(wt, numpy.float) 33 assert wt.shape == data.shape 34 assert numpy.greater_equal(wt, 0.0).all(), "Not all weights are non-negative." 35 assert len(wt.shape) == 1 36 n = data.shape[0] 37 assert n > 0 38 i = numpy.argsort(data) 39 sd = numpy.take(data, i, axis=0) 40 sw = numpy.take(wt, i, axis=0) 41 aw = numpy.add.accumulate(sw) 42 if not aw[-1] > 0: 43 raise ValueError, "Nonpositive weight sum" 44 w = (aw-0.5*sw)/aw[-1] 45 spots = numpy.searchsorted(w, percentiles) 46 o = [] 47 for (s, p) in zip(spots, percentiles): 48 if s == 0: 49 o.append(sd[0]) 50 elif s == n: 51 o.append(sd[n-1]) 52 else: 53 f1 = (w[s] - p)/(w[s] - w[s-1]) 54 f2 = (p - w[s-1])/(w[s] - w[s-1]) 55 assert f1>=0 and f2>=0 and f1<=1 and f2<=1 56 assert abs(f1+f2-1.0) < 1e-6 57 o.append(sd[s-1]*f1 + sd[s]*f2) 58 return o
59 60 61
62 -def wtd_median(data, wt):
63 """The weighted median is the point where half the weight is above 64 and half the weight is below. If the weights are equal, this is the 65 same as the median. Elements of the C{data} and C{wt} arrays correspond to 66 each other and must have equal length (unless C{wt} is C{None}). 67 68 @param data: The data. 69 @type data: A L{numpy.ndarray} array or a C{list} of numbers. 70 @param wt: How important is a given piece of data. 71 @type wt: C{None} or a L{numpy.ndarray} array or a C{list} of numbers. 72 All the weights must be non-negative and the sum must be 73 greater than zero. 74 @rtype: C{float} 75 @return: the weighted median of the data. 76 """ 77 spots = wp(data, wt, [0.5]) 78 assert len(spots)==1 79 return spots[0]
80 81
82 -def wtd_median_across(list_of_vectors, wt):
83 """Takes a weighted component-by-component median of a sequence of vectors. 84 @param list_of_vectors: the data to be combined 85 @type list_of_vectors: any sequence of lists or numpy.ndarray. 86 All the inside lists must be of the same length. 87 @type wt: a vector of weights (one weight for each input vector) or None. 88 @param wt: sequence of numbers or None 89 @return: the median vector. 90 @rtype: C{numpy.ndarray} 91 """ 92 lov = list(list_of_vectors) 93 n = len(lov[0]) 94 for v in lov: 95 if len(v) != n: 96 for (i,vv) in enumerate(lov): 97 if len(vv) != n: 98 raise ValueError, "Vector lengths don't match: %d[0] and %d[%d]" % (n, len(vv), i) 99 m = len(lov) 100 tmp = numpy.zeros((m,)) 101 rv = numpy.zeros((n,)) 102 for i in range(n): 103 for (j,v) in enumerate(lov): 104 tmp[j] = v[i] 105 rv[i] = wtd_median(tmp, wt) 106 return rv
107 108 109
110 -def test_wp():
111 assert numpy.allclose(wp([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], None, 112 [0.0, 1.0, 0.5, 0.51, 0.49, 0.01, 0.99]), 113 [1.0, 10.0, 5.5, 5.6, 5.4, 1.0, 10.0], 0.0001) 114 assert numpy.allclose(wp([0, 1, 2, 3, 4], [0.1, 1.9, 1.9, 0.1, 1], 115 [0.0, 1.0, 0.01, 0.02, 0.99]), 116 [0.0, 4.0, 0.0, 0.05, 4.0], 0.0001)
117 118
119 -def test_median():
120 d = [1,1,1,1,2,2,2,2] 121 w = [1,1,1,1,2,1,1,1] 122 assert 1.0 <= wtd_median(d,w) <= 2.0 123 d = [1.0,1,1,1, 2,2,2,2] 124 w = [1,1.1,2,1, 2,1,1,1] 125 assert 1.0 <= wtd_median(d,w) <= 2.0 126 d = [1,1,1,1,2,3,3,3,3.0] 127 w = [1,1,1,1,1.0,1,1,1,1] 128 assert abs(wtd_median(d,w)-2.0) < 0.001 129 d = [1,1,1,1,2,3,3,3,3.0] 130 w = [1,1,1.1,1,1.0,1,1,1,1] 131 assert 1.0 <= wtd_median(d,w) <= 2.0 132 d = [1,1,1,1,2,3,3,3,3.0] 133 w = [1.3,1.3,1.3,1.3,1.0,1,1,1,1] 134 assert 1.0 <= wtd_median(d,w) <= 2.0 135 d = [1,1,1,1,2,3,4,4.0] 136 w = [1.,1.,1.,1.,0.0,1,2,2] 137 assert abs(wtd_median(d,w)-3.0) < 0.001
138 139
140 -def test():
141 test_wp() 142 test_median()
143 144 if __name__ == '__main__': 145 test() 146