Source code for larray.random

# Portions (part of the docstrings) of this file come from numpy/random/mtrand/mtrand.pyx
# that file (and thus those portions) are licensed under the terms below

# mtrand.pyx -- A Pyrex wrapper of Jean-Sebastien Roy's RandomKit
#
# Copyright 2005 Robert Kern (robert.kern@gmail.com)
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import numpy as np

from larray.core.axis import Axis, AxisCollection       # noqa: F401
from larray.core.array import Array, asarray
from larray.core.array import raw_broadcastable
import larray as la                                     # noqa: F401


__all__ = ['randint', 'normal', 'uniform', 'permutation', 'choice']


def generic_random(np_func, args, min_axes, meta) -> Array:
    args, res_axes = raw_broadcastable(args, min_axes=min_axes)
    res_data = np_func(*args, size=res_axes.shape)
    return Array(res_data, res_axes, meta=meta)


# We choose to place the axes argument in place of the numpy size argument, instead of having axes as the first
# argument, because that would make it ugly for scalars. As a consequence, it is slightly ugly when arguments
# before axes are not required.
[docs]def randint(low, high=None, axes=None, dtype='l', meta=None) -> Array:
    r"""Return random integers from `low` (inclusive) to `high` (exclusive).

    Return random integers from the "discrete uniform" distribution of the specified dtype in the "half-open" interval
    [`low`, `high`). If `high` is None (the default), then results are from [0, `low`).

    Parameters
    ----------
    low : int
        Lowest (signed) integer to be drawn from the distribution (unless ``high=None``, in which case this parameter
        is one above the *highest* such integer).
    high : int, optional
        If provided, one above the largest (signed) integer to be drawn from the distribution (see above for behavior
        if ``high=None``).
    axes : int, tuple of int, str, Axis or tuple/list/AxisCollection of Axis, optional
        Axes (or shape) of the resulting array. If ``axes`` is None (the default), a single value is returned.
        Otherwise, if the resulting axes have a shape of, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn.
    dtype : data-type, optional
        Desired dtype of the result. All dtypes are determined by their name, i.e., 'int64', 'int', etc, so byteorder
        is not available and a specific precision may have different C types depending on the platform.
        The default value is 'np.int'.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    Generate a single int between 0 and 9, inclusive:

    >>> la.random.randint(10)                                   # doctest: +SKIP
    6

    Generate an array of 10 ints between 1 and 5, inclusive:

    >>> la.random.randint(1, 6, 10)                             # doctest: +SKIP
    {0}*  0  1  2  3  4  5  6  7  8  9
          1  1  5  1  5  4  3  4  2  1

    Generate a 2 x 3 array of ints between 0 and 4, inclusive:

    >>> la.random.randint(5, axes=(2, 3))                       # doctest: +SKIP
    {0}*\{1}*  0  1  2
            0  4  4  1
            1  1  2  2
    >>> la.random.randint(5, axes='a=a0,a1;b=b0..b2')           # doctest: +SKIP
    a\b  b0  b1  b2
     a0   0   3   1
     a1   4   0   1

    With varying low and high (each depending on a different axis)

    >>> low = la.sequence('a=a0,a1')
    >>> low
    a  a0  a1
        0   1
    >>> high = la.sequence('b=b0..b2', initial=3)
    >>> high
    b  b0  b1  b2
        3   4   5

    In other words, we want to generate values between low and high (high included) for each cell. Let's
    note that low..high:

    a\b    b0    b1    b2
     a0  0..2  0..3  0..4
     a1  1..2  1..3  1..4

    >>> la.random.randint(low, high)                            # doctest: +SKIP
    a\b  b0  b1  b2
     a0   0   2   2
     a1   2   3   4
    """
    return generic_random(np.random.randint, (low, high), axes, meta)


[docs]def normal(loc=0.0, scale=1.0, axes=None, meta=None) -> Array:
    r"""
    Draw random samples from a normal (Gaussian) distribution.

    Its probability density function is often called the bell curve because of its characteristic shape (see the
    example below)

    Parameters
    ----------
    loc : float or array_like of floats
        Mean ("centre") of the distribution.
    scale : float or array_like of floats
        Standard deviation (spread or "width") of the distribution.
    axes : int, tuple of int, str, Axis or tuple/list/AxisCollection of Axis, optional
        Minimum axes the resulting array must have. Defaults to None. The resulting array axes will be the union of
        those mentioned in ``axes`` and those of ``loc`` and ``scale``. If ``loc`` and ``scale`` are scalars and
        ``axes`` is None, a single value is returned. Otherwise, if the resulting axes have a shape of, e.g.,
        ``(m, n, k)``, then ``m * n * k`` samples are drawn.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array or scalar
        Drawn samples from the parameterized normal distribution.

    Notes
    -----
    The normal distributions occurs often in nature.  For example, it describes the commonly occurring distribution of
    samples influenced by a large number of tiny, random disturbances, each with its own unique distribution [2]_.

    The probability density function for the Gaussian distribution, first derived by De Moivre and 200 years later by
    both Gauss and Laplace independently [2]_, is

    .. math:: p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }}
                     e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} },

    where :math:`\mu` is the mean and :math:`\sigma` the standard deviation. The square of the standard deviation,
    :math:`\sigma^2`, is called the variance.

    The function has its peak at the mean, and its "spread" increases with the standard deviation (the function reaches
    0.607 times its maximum at :math:`x + \sigma` and :math:`x - \sigma` [2]_).  This implies that
    `la.random.normal` is more likely to return samples lying close to the mean, rather than those far away.

    References
    ----------
    .. [1] Wikipedia, "Normal distribution",
           http://en.wikipedia.org/wiki/Normal_distribution
    .. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability,
           Random Variables and Random Signal Principles", 4th ed., 2001, pp. 51, 51, 125.

    Examples
    --------
    Generate a 2 x 3 array with numbers drawn from the distribution:

    >>> la.random.normal(0, 1, axes=(2, 3))                                         # doctest: +SKIP
    {0}*\{1}*                   0                     1                   2
            0  0.3564325741877542    0.8944149721039006  1.7206904920773107
            1  0.6904447654719367  -0.09395966570976753   0.185136309092257

    With named and labelled axes

    >>> la.random.normal(0, 1, axes='a=a0,a1;b=b0..b2')                             # doctest: +SKIP
    a\b                  b0                   b1                   b2
     a0  2.3096106652701827  -0.4269082412118316  -1.0862791566867225
     a1  0.8598817639620348   -2.386411240813283  0.10116503197279443

    With varying loc and scale (each depending on a different axis)

    >>> mu = la.sequence('a=a0,a1', initial=5, inc=5)
    >>> mu
    a  a0  a1
        5  10
    >>> sigma = la.sequence('b=b0..b2', initial=1)
    >>> sigma
    b  b0  b1  b2
        1   2   3
    >>> la.random.normal(mu, sigma)                                                 # doctest: +SKIP
    a\b                  b0                  b1                  b2
     a0   5.939369790854615  2.5043856460438403    8.33560126941519
     a1  10.759526714752091  10.093213549397403  11.705881778249683

    Draw 1000 samples from the distribution:

    >>> mu, sigma = 0, 0.1  # mean and standard deviation
    >>> sample = la.random.normal(mu, sigma, 1000)

    Verify the mean and the variance:

    >>> abs(mu - la.mean(sample)) < 0.01
    True
    >>> abs(sigma - la.std(sample, ddof=1)) < 0.01
    True

    Display the histogram of the samples, along with the probability density function:

    >>> import matplotlib.pyplot as plt                                         # doctest: +SKIP
    >>> count, bins, ignored = plt.hist(sample, 30, normed=True)                # doctest: +SKIP
    >>> pdf = 1 / (sigma * la.sqrt(2 * la.pi)) \
    ...       * la.exp(- (bins - mu) ** 2 / (2 * sigma ** 2))                   # doctest: +SKIP
    >>> _ = plt.plot(bins, pdf, linewidth=2, color='r')                         # doctest: +SKIP
    >>> plt.show()                                                              # doctest: +SKIP
    """
    return generic_random(np.random.normal, (loc, scale), axes, meta)


[docs]def uniform(low=0.0, high=1.0, axes=None, meta=None) -> Array:
    r"""
    Draw samples from a uniform distribution.

    Samples are uniformly distributed over the half-open interval ``[low, high)`` (includes low, but excludes high).
    In other words, any value within the given interval is equally likely to be drawn by `uniform`.

    Parameters
    ----------
    low : float or array_like of floats, optional
        Lower boundary of the output interval.  All values generated will be greater than or equal to low.
        Defaults to 0.0.
    high : float or array_like of floats, optional
        Upper boundary of the output interval.  All values generated will be less than high.
        Defaults to 1.0.
    axes : int, tuple of int, str, Axis or tuple/list/AxisCollection of Axis, optional
        Minimum axes the resulting array must have. Defaults to None. The resulting array axes will be the union of
        those mentioned in ``axes`` and those of ``low`` and ``high``. If ``low`` and ``high`` are scalars and
        ``axes`` is None, a single value is returned. Otherwise, if the resulting axes have a shape of, e.g.,
        ``(m, n, k)``, then ``m * n * k`` samples are drawn.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array or scalar
        Drawn samples from the parameterized uniform distribution.

    See Also
    --------
    randint : Discrete uniform distribution, yielding integers.

    Notes
    -----
    The probability density function of the uniform distribution is

    .. math:: p(x) = \frac{1}{b - a}

    anywhere within the interval ``[a, b)``, and zero elsewhere.

    When ``high`` == ``low``, values of ``low`` will be returned.
    If ``high`` < ``low``, the results are officially undefined and may eventually raise an error, i.e. do not rely on
    this function to behave when passed arguments satisfying that inequality condition.

    Examples
    --------
    Generate a single sample from the distribution:

    >>> la.random.uniform()                                                          # doctest: +SKIP
    0.4616049008844396

    Generate a 2 x 3 array with numbers drawn from the distribution:

    >>> la.random.uniform(0, 5, axes=(2, 3))                                         # doctest: +SKIP
    {0}*\{1}*                   0                  1                  2
            0  3.4951791043804192  3.888533056628081  4.347461073315136
            1   2.146211610940853  0.509146487437932  2.790852715735223

    With named and labelled axes

    >>> la.random.uniform(1, 2, axes='a=a0,a1;b=b0..b2')                             # doctest: +SKIP
    a\b                  b0                  b1                  b2
     a0  1.4167729850467825  1.6953091052066793  1.2321770607672526
     a1  1.4386221912579358  1.8480607144284926  1.1726213637670433

    With varying low and high (each depending on a different axis)

    >>> low = la.sequence('a=a0,a1')
    >>> low
    a  a0  a1
        0   1
    >>> high = la.sequence('b=b0..b2', initial=1, inc=0.5)
    >>> high
    b   b0   b1   b2
       1.0  1.5  2.0
    >>> la.random.uniform(low, high)                                                 # doctest: +SKIP
    a\b                   b0                  b1                  b2
     a0  0.44608671494167573   0.948315996350121    1.74189664009661
     a1                  1.0  1.1099944474264194  1.1362792569316835

    Draw 1000 samples from the distribution:

    >>> s = la.random.uniform(-1, 0, 1000)

    All values are within the given interval:

    >>> la.all(s >= -1)
    True
    >>> la.all(s < 0)
    True

    Display the histogram of the samples, along with the probability density function:

    >>> import matplotlib.pyplot as plt                                     # doctest: +SKIP
    >>> count, bins, ignored = plt.hist(s, 15, normed=True)                 # doctest: +SKIP
    >>> _ = plt.plot(bins, np.ones_like(bins), linewidth=2, color='r')      # doctest: +SKIP
    >>> plt.show()                                                          # doctest: +SKIP
    """
    return generic_random(np.random.uniform, (low, high), axes, meta)


[docs]def permutation(x, axis=0) -> Array:
    r"""
    Randomly permute a sequence along an axis, or return a permuted range.

    Parameters
    ----------
    x : int or array_like
        If `x` is an integer, randomly permute ``sequence(x)``.
        If `x` is an array, returns a randomly shuffled copy.
    axis : int, str or Axis, optional
        Axis along which to permute. Defaults to the first axis.

    Returns
    -------
    Array
        Permuted sequence or array range.

    Examples
    --------
    >>> la.random.permutation(10)                               # doctest: +SKIP
    {0}*  0  1  2  3  4  5  6  7  8  9
          6  8  0  9  4  7  1  5  3  2
    >>> la.random.permutation([1, 4, 9, 12, 15])                # doctest: +SKIP
    {0}*  0   1   2  3  4
          1  15  12  9  4
    >>> la.random.permutation(la.ndtest(5))                     # doctest: +SKIP
    a  a3  a1  a2  a4  a0
        3   1   2   4   0
    >>> arr = la.ndtest((3, 3))                                 # doctest: +SKIP
    >>> la.random.permutation(arr)                              # doctest: +SKIP
    a\b  b0  b1  b2
     a1   3   4   5
     a2   6   7   8
     a0   0   1   2
    >>> la.random.permutation(arr, axis='b')                    # doctest: +SKIP
    a\b  b1  b2  b0
     a0   1   2   0
     a1   4   5   3
     a2   7   8   6
    """
    if isinstance(x, (int, np.integer)):
        return Array(np.random.permutation(x))
    else:
        x = asarray(x)
        axis = x.axes[axis]
        g = axis.i[np.random.permutation(len(axis))]
        return x[g]


[docs]def choice(choices=None, axes=None, replace=True, p=None, meta=None) -> Array:
    r"""
    Generate a random sample from given choices.

    Parameters
    ----------
    choices : 1-D array-like or int, optional
        Values to choose from.
        If an array, a random sample is generated from its elements.
        If an int n, the random sample is generated as if choices was la.sequence(n)
        If p is a 1-D Array, choices are taken from its axis.
    axes : int, tuple of int, str, Axis or tuple/list/AxisCollection of Axis, optional
        Axes (or shape) of the resulting array. If ``axes`` is None (the default), a single value is returned.
        Otherwise, if the resulting axes have a shape of, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn.
    replace : boolean, optional
        Whether the sample is with or without replacement.
    p : array-like, optional
        The probabilities associated with each entry in choices.
        If p is a 1-D Array, choices are taken from its axis labels. If p is an N-D Array, each cell represents the
        probability that the combination of labels will occur.
        If not given the sample assumes a uniform distribution over all entries in choices.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array or scalar
        The generated random samples with given ``axes`` (or shape).

    Raises
    ------
    ValueError
        If choices is an int and less than zero, if choices or p are not 1-dimensional,
        if choices is an array-like of size 0, if p is not a vector of probabilities,
        if choices and p have different lengths, or
        if replace=False and the sample size is greater than the population size.

    See Also
    --------
    randint, permutation

    Examples
    --------
    Generate one random value out of given choices (each choice has the same probability of occurring):

    >>> la.random.choice(['hello', 'world', '!'])                                       # doctest: +SKIP
    hello

    With given probabilities:

    >>> la.random.choice(['hello', 'world', '!'], p=[0.1, 0.8, 0.1])                    # doctest: +SKIP
    world

    Generate a 2 x 3 array with given axes and values drawn from the given choices using given probabilities:

    >>> la.random.choice([5, 10, 15], p=[0.3, 0.5, 0.2], axes='a=a0,a1;b=b0..b2')       # doctest: +SKIP
    a\b  b0  b1  b2
     a0  15  10  10
     a1  10   5  10

    Same as above with labels and probabilities given as a one dimensional Array

    >>> proba = Array([0.3, 0.5, 0.2], Axis([5, 10, 15], 'outcome'))                   # doctest: +SKIP
    >>> proba                                                                           # doctest: +SKIP
    outcome    5   10   15
             0.3  0.5  0.2
    >>> choice(p=proba, axes='a=a0,a1;b=b0..b2')                                        # doctest: +SKIP
    a\b  b0  b1  b2
     a0  10  15   5
     a1  10   5  10

    Generate a uniform random sample of size 3 from la.sequence(5):

    >>> la.random.choice(5, 3)                                                          # doctest: +SKIP
    {0}*  0  1  2
          3  2  0
    >>> # This is equivalent to la.random.randint(0, 5, 3)

    Generate a non-uniform random sample of size 3 from the given choices without replacement:

    >>> la.random.choice(['hello', 'world', '!'], 3, replace=False, p=[0.1, 0.6, 0.3])  # doctest: +SKIP
    {0}*      0  1      2
          world  !  hello

    Using an N-dimensional array as probabilities:

    >>> proba = Array([[0.15, 0.25, 0.10],
    ...                 [0.20, 0.10, 0.20]], 'a=a0,a1;b=b0..b2')                        # doctest: +SKIP
    >>> proba                                                                           # doctest: +SKIP
    a\b    b0    b1   b2
     a0  0.15  0.25  0.1
     a1   0.2   0.1  0.2
    >>> choice(p=proba, axes='draw=d0..d5')                                             # doctest: +SKIP
    draw\axis   a   b
           d0  a1  b2
           d1  a1  b1
           d2  a0  b1
           d3  a0  b0
           d4  a1  b2
           d5  a0  b1
    """
    axes = AxisCollection(axes)
    if isinstance(p, Array):
        if choices is not None:
            raise ValueError("choices argument cannot be used when p argument is an Array")

        if p.ndim > 1:
            flat_p = p.data.reshape(-1)
            flat_indices = choice(p.size, axes=axes, replace=replace, p=flat_p)
            return p.axes._flat_lookup(flat_indices)
        else:
            choices = p.axes[0].labels
            p = p.data
    if choices is None:
        raise ValueError("choices argument must be provided unless p is an Array")
    return Array(np.random.choice(choices, axes.shape, replace, p), axes, meta=meta)