>>> h = Histogram()
"""
+ def __init__(self):
+ self.headings = None
+
def calculate_bin_edges(self, data, bin_width):
"""
>>> h = Histogram()
All bins should be of equal width (so we can calculate which
bin a data point belongs to).
-
- `data` should be a numpy array.
"""
- self.headings = None
- self.bin_edges = bin_edges
+ data = numpy.array(data)
+ self.bin_edges = numpy.array(bin_edges)
bin_width = self.bin_edges[1] - self.bin_edges[0]
bin_is = numpy.floor((data - self.bin_edges[0])/bin_width)
- self.counts = []
- for i in range(len(self.bin_edges)-1):
- self.counts.append(sum(bin_is == i).sum())
+ self.counts = numpy.zeros((len(self.bin_edges)-1,), dtype=numpy.int)
+ for i in range(len(self.counts)):
+ self.counts[i] = (bin_is == i).sum()
+ self.counts = numpy.array(self.counts)
self.total = float(len(data)) # some data might be outside the bins
self.mean = data.mean()
self.std_dev = data.std()
>>> h.counts
[10.0, 40.0, 5.0]
>>> h.bin_edges # doctest: +ELLIPSIS
- [1.5e-10, 2.000...e-10, 2.500...e-10, 3e-10]
+ [1.5e-10, 2...e-10, 2.5...e-10, 3e-10]
>>> h.probabilities # doctest: +ELLIPSIS
[0.181..., 0.727..., 0.0909...]
"""
return abs(other.std_dev - self.std_dev)
def chi_squared_residual(self, other):
- assert self.bin_edges == other.bin_edges
+ assert (self.bin_edges == other.bin_edges).all()
residual = 0
for probA,probB in zip(self.probabilities, other.probabilities):
residual += (probA-probB)**2 / probB
return residual
def jensen_shannon_residual(self, other):
- assert self.bin_edges == other.bin_edges
+ assert (self.bin_edges == other.bin_edges).all()
def d_KL_term(p,q):
"""
Kullback-Leibler divergence for a single bin, with the