Source code for marxs.math.random

# Licensed under GPL version 3 - see LICENSE.rst
import numpy as np


[docs]
class RandomArbitraryPdf(object):
    '''Take random draw from an arbitrary (and arbitrarily binned) pdf.

    The PDF is approximated as piecewice constant.
    The object is initialized with a parameters that describe the PDF, the object can
    then be called with the number of random samples that are required.

    Parameters
    ----------
    x : np.array
        **Upper** bin edge for input bins
    pdf : np.array
        Value of the pdf for each bin. ``pdf[0]`` is ignored, since the lower edge
        of that bin is undefined.
    randomize_in_bin : bool
        If ``True`` randomize the return value over each bin. If ``False`` the return
        value will be exactly the **upper** bin edge.
    sort : bool
        When calculating the cdf from a pdf that covers a large dynamical range,
        round-off errors may occur. If ``True`` this sorts the input bins in size
        (keeping an index for reverse the operation later) to avoid numerical errors.

    Examples
    --------
    First, and example with two bins (1-2 and 2-3). The probability density for each
    bin is 1 and 6, respectively (the 0.2 is ignored, because the range XXX - 1 is not
    a valid bin since the lower bound is undefined).

    >>> import numpy as np
    >>> # Set seed so this example returns exact same numbers every time
    >>> np.random.seed(0)
    >>> a = RandomArbitraryPdf(np.array([1,2,3]), np.array([0.2,1,6]))
    >>> a(10)
    array([2.79172504, 2.52889492, 2.56804456, 2.92559664, 2.07103606,
           2.0871293 , 2.0202184 , 2.83261985, 2.77815675, 2.87001215])

    As you can see, in this case the chance to draw from the 1-2 interval is only
    1/7 and it did not happen this time.

    If, instead, we want to have exact (upper) bin edges returned it looks like this:
    >>> a = RandomArbitraryPdf(np.array([1,2,3]), np.array([0,1,6]), randomize_in_bin=False)
    >>> a(10)
    array([3, 3, 3, 3, 2, 3, 3, 3, 3, 3])

    As expected, most numbers are 3, with a 2 mixed in.

    References
    ----------
    https://en.wikipedia.org/wiki/Pseudorandom_number_generator#Non-uniform_generators

    http://stackoverflow.com/questions/21100716/
    '''
    def __init__(self, x, pdf, randomize_in_bin=True, sort=True):
        if not len(x) == len(pdf):
            raise ValueError('x and pdf must have same number of elements.')
        if not np.all(np.array(pdf) >= 0):
            raise ValueError('pdf cannot have negative elements.')

        self.x = np.asarray(x)
        self.bin_width = np.hstack(([0], np.diff(x)))
        if not np.all(self.bin_width >=0):
            raise ValueError('x must be input in increasing order.')
        pdf = np.asarray(pdf) * self.bin_width
        self.sort = sort
        self.randomize_in_bin = randomize_in_bin

        # sort the pdf - otherwise bins with small numbers might be lost to round-off errors
        # idea is from http://stackoverflow.com/questions/21100716/
        if self.sort:
            self.sortindex = np.argsort(pdf)
            self.pdf = pdf[self.sortindex]
        else:
            self.pdf = pdf
        # cumulative distribution function
        self.cdf = np.cumsum(self.pdf)


[docs]
    def __call__(self, N):
        """Draw from the distribution function. See docstring of class."""
        #pick numbers which are uniformly random over the cumulative distribution function
        choice = np.random.uniform(high=self.cdf[-1], size=N)
        # Now here is the difficult and comparatively expensive part:
        # We need a reverse lookup to find the bin in the cdf so that we an use it
        # to map this back to the x values of the pdf
        # np.searchsorted is a binary search with O(log n).
        index = np.searchsorted(self.cdf, choice)
        #if necessary, map the indices back to their original ordering
        if self.sort:
            index = self.sortindex[index]
        if self.randomize_in_bin:
            return self.x[index - 1] + self.bin_width[index] * np.random.rand(N)
        else:
            return self.x[index]
Navigation

Source code for marxs.math.random