Source code for marxs.math.random

# Licensed under GPL version 3 - see LICENSE.rst
import numpy as np

[docs] class RandomArbitraryPdf(object): '''Take random draw from an arbitrary (and arbitrarily binned) pdf. The PDF is approximated as piecewice constant. The object is initialized with a parameters that describe the PDF, the object can then be called with the number of random samples that are required. Parameters ---------- x : np.array **Upper** bin edge for input bins pdf : np.array Value of the pdf for each bin. ``pdf[0]`` is ignored, since the lower edge of that bin is undefined. randomize_in_bin : bool If ``True`` randomize the return value over each bin. If ``False`` the return value will be exactly the **upper** bin edge. sort : bool When calculating the cdf from a pdf that covers a large dynamical range, round-off errors may occur. If ``True`` this sorts the input bins in size (keeping an index for reverse the operation later) to avoid numerical errors. Examples -------- First, and example with two bins (1-2 and 2-3). The probability density for each bin is 1 and 6, respectively (the 0.2 is ignored, because the range XXX - 1 is not a valid bin since the lower bound is undefined). >>> import numpy as np >>> # Set seed so this example returns exact same numbers every time >>> np.random.seed(0) >>> a = RandomArbitraryPdf(np.array([1,2,3]), np.array([0.2,1,6])) >>> a(10) array([2.79172504, 2.52889492, 2.56804456, 2.92559664, 2.07103606, 2.0871293 , 2.0202184 , 2.83261985, 2.77815675, 2.87001215]) As you can see, in this case the chance to draw from the 1-2 interval is only 1/7 and it did not happen this time. If, instead, we want to have exact (upper) bin edges returned it looks like this: >>> a = RandomArbitraryPdf(np.array([1,2,3]), np.array([0,1,6]), randomize_in_bin=False) >>> a(10) array([3, 3, 3, 3, 2, 3, 3, 3, 3, 3]) As expected, most numbers are 3, with a 2 mixed in. References ---------- https://en.wikipedia.org/wiki/Pseudorandom_number_generator#Non-uniform_generators http://stackoverflow.com/questions/21100716/ ''' def __init__(self, x, pdf, randomize_in_bin=True, sort=True): if not len(x) == len(pdf): raise ValueError('x and pdf must have same number of elements.') if not np.all(np.array(pdf) >= 0): raise ValueError('pdf cannot have negative elements.') self.x = np.asarray(x) self.bin_width = np.hstack(([0], np.diff(x))) if not np.all(self.bin_width >=0): raise ValueError('x must be input in increasing order.') pdf = np.asarray(pdf) * self.bin_width self.sort = sort self.randomize_in_bin = randomize_in_bin # sort the pdf - otherwise bins with small numbers might be lost to round-off errors # idea is from http://stackoverflow.com/questions/21100716/ if self.sort: self.sortindex = np.argsort(pdf) self.pdf = pdf[self.sortindex] else: self.pdf = pdf # cumulative distribution function self.cdf = np.cumsum(self.pdf)
[docs] def __call__(self, N): """Draw from the distribution function. See docstring of class.""" #pick numbers which are uniformly random over the cumulative distribution function choice = np.random.uniform(high=self.cdf[-1], size=N) # Now here is the difficult and comparatively expensive part: # We need a reverse lookup to find the bin in the cdf so that we an use it # to map this back to the x values of the pdf # np.searchsorted is a binary search with O(log n). index = np.searchsorted(self.cdf, choice) #if necessary, map the indices back to their original ordering if self.sort: index = self.sortindex[index] if self.randomize_in_bin: return self.x[index - 1] + self.bin_width[index] * np.random.rand(N) else: return self.x[index]