1 """This computes order statistics on data with weights.
2 """
3
4 import numpy
5
6
7 -def wp(data, wt, percentiles):
8 """Compute weighted percentiles.
9 If the weights are equal, this is the same as normal percentiles.
10 Elements of the C{data} and C{wt} arrays correspond to
11 each other and must have equal length (unless C{wt} is C{None}).
12
13 @param data: The data.
14 @type data: A L{numpy.ndarray} array or a C{list} of numbers.
15 @param wt: How important is a given piece of data.
16 @type wt: C{None} or a L{numpy.ndarray} array or a C{list} of numbers.
17 All the weights must be non-negative and the sum must be
18 greater than zero.
19 @param percentiles: what percentiles to use. (Not really percentiles,
20 as the range is 0-1 rather than 0-100.)
21 @type percentiles: a C{list} of numbers between 0 and 1.
22 @rtype: [ C{float}, ... ]
23 @return: the weighted percentiles of the data.
24 """
25 assert numpy.greater_equal(percentiles, 0.0).all(), "Percentiles less than zero"
26 assert numpy.less_equal(percentiles, 1.0).all(), "Percentiles greater than one"
27 data = numpy.asarray(data)
28 assert len(data.shape) == 1
29 if wt is None:
30 wt = numpy.ones(data.shape, numpy.float)
31 else:
32 wt = numpy.asarray(wt, numpy.float)
33 assert wt.shape == data.shape
34 assert numpy.greater_equal(wt, 0.0).all(), "Not all weights are non-negative."
35 assert len(wt.shape) == 1
36 n = data.shape[0]
37 assert n > 0
38 i = numpy.argsort(data)
39 sd = numpy.take(data, i, axis=0)
40 sw = numpy.take(wt, i, axis=0)
41 aw = numpy.add.accumulate(sw)
42 if not aw[-1] > 0:
43 raise ValueError, "Nonpositive weight sum"
44 w = (aw-0.5*sw)/aw[-1]
45 spots = numpy.searchsorted(w, percentiles)
46 o = []
47 for (s, p) in zip(spots, percentiles):
48 if s == 0:
49 o.append(sd[0])
50 elif s == n:
51 o.append(sd[n-1])
52 else:
53 f1 = (w[s] - p)/(w[s] - w[s-1])
54 f2 = (p - w[s-1])/(w[s] - w[s-1])
55 assert f1>=0 and f2>=0 and f1<=1 and f2<=1
56 assert abs(f1+f2-1.0) < 1e-6
57 o.append(sd[s-1]*f1 + sd[s]*f2)
58 return o
59
60
61
80
81
107
108
109
111 assert numpy.allclose(wp([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], None,
112 [0.0, 1.0, 0.5, 0.51, 0.49, 0.01, 0.99]),
113 [1.0, 10.0, 5.5, 5.6, 5.4, 1.0, 10.0], 0.0001)
114 assert numpy.allclose(wp([0, 1, 2, 3, 4], [0.1, 1.9, 1.9, 0.1, 1],
115 [0.0, 1.0, 0.01, 0.02, 0.99]),
116 [0.0, 4.0, 0.0, 0.05, 4.0], 0.0001)
117
118
138
139
143
144 if __name__ == '__main__':
145 test()
146