Source code for nengo.dists

import warnings

import numpy as np

from nengo.exceptions import ValidationError
from nengo.params import (
    BoolParam,
    IntParam,
    NdarrayParam,
    NumberParam,
    Parameter,
    Unconfigurable,
    FrozenObject,
)
import nengo.utils.numpy as npext


[docs]class Distribution(FrozenObject):
    """A base class for probability distributions.

    The only thing that a probabilities distribution need to define is a
    `.Distribution.sample` method. This base class ensures that all
    distributions accept the same arguments for the sample function.
    """

    def _sample_shape(self, n, d=None):
        """Returns output shape for sample method."""
        return (n,) if d is None else (n, d)

[docs]    def sample(self, n, d=None, rng=np.random):
        """Samples the distribution.

        Parameters
        ----------
        n : int
            Number samples to take.
        d : int or None, optional
            The number of dimensions to return. If this is an int, the return
            value will be of shape ``(n, d)``. If None, the return
            value will be of shape ``(n,)``.
        rng : `numpy.random.mtrand.RandomState`, optional
            Random number generator state.

        Returns
        -------
        samples : (n,) or (n, d) array_like
            Samples as a 1d or 2d array depending on ``d``. The second
            dimension enumerates the dimensions of the process.
        """
        raise NotImplementedError("Distributions should implement sample.")


[docs]def get_samples(dist_or_samples, n, d=None, rng=np.random):
    """Convenience function to sample a distribution or return samples.

    Use this function in situations where you accept an argument that could
    be a distribution, or could be an ``array_like`` of samples.

    Examples
    --------

    .. testcode::

       from nengo.dists import get_samples

       rng = np.random.RandomState(seed=0)

       def mean(values, n=100):
           samples = get_samples(values, n=n, rng=rng)
           print("%.4f" % np.mean(samples))

       mean([1, 2, 3, 4])
       mean(nengo.dists.Gaussian(0, 1))

    .. testoutput::

       2.5000
       0.0598

    Parameters
    ----------
    dist_or_samples : `.Distribution` or (n, d) array_like
        Source of the samples to be returned.
    n : int
        Number samples to take.
    d : int or None, optional
        The number of dimensions to return.
    rng : RandomState, optional
        Random number generator.

    Returns
    -------
    samples : (n, d) array_like

    """
    if isinstance(dist_or_samples, Distribution):
        return dist_or_samples.sample(n, d=d, rng=rng)
    return np.array(dist_or_samples)


[docs]class PDF(Distribution):
    """An arbitrary distribution from a PDF.

    Parameters
    ----------
    x : vector_like (n,)
        Values of the points to sample from (interpolated).
    p : vector_like (n,)
        Probabilities of the ``x`` points.
    """

    x = NdarrayParam("x", shape="*")
    p = NdarrayParam("p", shape="*")

    def __init__(self, x, p):
        super().__init__()

        psum = np.sum(p)
        if np.abs(psum - 1) > 1e-8:
            raise ValidationError(
                "PDF must sum to one (sums to %f)" % psum, attr="p", obj=self
            )

        self.x = x
        self.p = p
        if len(self.x) != len(self.p):
            raise ValidationError(
                "`x` and `p` must be the same length", attr="p", obj=self
            )

        # make cumsum = [0] + cumsum, cdf = 0.5 * (cumsum[:-1] + cumsum[1:])
        cumsum = np.cumsum(p)
        cumsum *= 0.5
        cumsum[1:] = cumsum[:-1] + cumsum[1:]
        self.cdf = cumsum

[docs]    def sample(self, n, d=None, rng=np.random):
        shape = self._sample_shape(n, d)
        return np.interp(rng.uniform(size=shape), self.cdf, self.x)


[docs]class Uniform(Distribution):
    """A uniform distribution.

    It's equally likely to get any scalar between ``low`` and ``high``.

    Note that the order of ``low`` and ``high`` doesn't matter;
    if ``low < high`` this will still work, and ``low`` will still
    be a closed interval while ``high`` is open.

    Parameters
    ----------
    low : Number
        The closed lower bound of the uniform distribution; samples >= low
    high : Number
        The open upper bound of the uniform distribution; samples < high
    integer : boolean, optional
        If true, sample from a uniform distribution of integers. In this case,
        low and high should be integers.
    """

    low = NumberParam("low")
    high = NumberParam("high")
    integer = BoolParam("integer")

    def __init__(self, low, high, integer=False):
        super().__init__()
        self.low = low
        self.high = high
        self.integer = integer

[docs]    def sample(self, n, d=None, rng=np.random):
        shape = self._sample_shape(n, d)
        if self.integer:
            return rng.randint(low=self.low, high=self.high, size=shape)
        else:
            return rng.uniform(low=self.low, high=self.high, size=shape)


[docs]class Gaussian(Distribution):
    """A Gaussian distribution.

    This represents a bell-curve centred at ``mean`` and with
    spread represented by the standard deviation, ``std``.

    Parameters
    ----------
    mean : Number
        The mean of the Gaussian.
    std : Number
        The standard deviation of the Gaussian.

    Raises
    ------
    ValidationError if std is <= 0

    """

    mean = NumberParam("mean")
    std = NumberParam("std", low=0, low_open=True)

    def __init__(self, mean, std):
        super().__init__()
        self.mean = mean
        self.std = std

[docs]    def sample(self, n, d=None, rng=np.random):
        shape = self._sample_shape(n, d)
        return rng.normal(loc=self.mean, scale=self.std, size=shape)


[docs]class Exponential(Distribution):
    """An exponential distribution (optionally with high values clipped).

    If ``high`` is left to its default value of infinity, this is a standard
    exponential distribution. If ``high`` is set, then any sampled values at
    or above ``high`` will be clipped so they are slightly below ``high``.
    This is useful for thresholding.

    The probability distribution function (PDF) is given by::

               |  0                                 if x < shift
        p(x) = | 1/scale * exp(-(x - shift)/scale)  if x >= shift and x < high
               |  n                                 if x == high - eps
               |  0                                 if x >= high

    where ``n`` is such that the PDF integrates to one, and ``eps`` is an
    infinitesimally small number such that samples of ``x`` are strictly less
    than ``high`` (in practice, ``eps`` depends on floating point precision).

    Parameters
    ----------
    scale : float
        The scale parameter (inverse of the rate parameter lambda). Larger
        values make the distribution narrower (sharper peak).
    shift : float, optional
        Amount to shift the distribution by. There will be no values smaller
        than this shift when sampling from the distribution.
    high : float, optional
        All values larger than or equal to this value will be clipped to
        slightly less than this value.
    """

    scale = NumberParam("scale", low=0, low_open=True)
    shift = NumberParam("shift")
    high = NumberParam("high")

    def __init__(self, scale, shift=0.0, high=np.inf):
        super().__init__()
        self.scale = scale
        self.shift = shift
        self.high = high

[docs]    def sample(self, n, d=None, rng=np.random):
        shape = self._sample_shape(n, d)
        x = rng.exponential(self.scale, shape) + self.shift
        high = np.nextafter(self.high, np.asarray(-np.inf, dtype=x.dtype))
        return np.clip(x, self.shift, high)


[docs]class UniformHypersphere(Distribution):
    """Uniform distribution on or in an n-dimensional unit hypersphere.

    Sample points are uniformly distributed across the volume (default) or
    surface of an n-dimensional unit hypersphere.

    Parameters
    ----------
    surface : bool, optional
        Whether sample points should be distributed uniformly
        over the surface of the hyperphere (True),
        or within the hypersphere (False).
    min_magnitude : Number, optional
        Lower bound on the returned vector magnitudes (such that they are in
        the range ``[min_magnitude, 1]``). Must be in the range [0, 1).
        Ignored if ``surface`` is ``True``.
    """

    surface = BoolParam("surface")
    min_magnitude = NumberParam("min_magnitude", low=0, high=1, high_open=True)

    def __init__(self, surface=False, min_magnitude=0):
        super().__init__()
        if surface and min_magnitude > 0:
            warnings.warn("min_magnitude ignored because surface is True")
        self.surface = surface
        self.min_magnitude = min_magnitude

[docs]    def sample(self, n, d=None, rng=np.random):
        if d is None or d < 1:  # check this, since other dists allow d = None
            raise ValidationError("Dimensions must be a positive integer", "d")

        samples = rng.randn(n, d)
        samples /= npext.norm(samples, axis=1, keepdims=True)

        if self.surface:
            return samples

        # Generate magnitudes for vectors from uniform distribution.
        # The (1 / d) exponent ensures that samples are uniformly distributed
        # in n-space and not all bunched up at the centre of the sphere.
        samples *= rng.uniform(low=self.min_magnitude ** d, high=1, size=(n, 1)) ** (
            1.0 / d
        )

        return samples


[docs]class Choice(Distribution):
    """Discrete distribution across a set of possible values.

    The same as Numpy random's `~numpy.random.mtrand.RandomState.choice`,
    except can take vector or matrix values for the choices.

    Parameters
    ----------
    options : (N, ...) array_like
        The options (choices) to choose between. The choice is always done
        along the first axis, so if ``options`` is a matrix, the options are
        the rows of that matrix.
    weights : (N,) array_like, optional
        Weights controlling the probability of selecting each option. Will
        automatically be normalized. If None, weights be uniformly distributed.
    """

    options = NdarrayParam("options", shape=("*", "..."))
    weights = NdarrayParam("weights", shape=("*"), optional=True)

    def __init__(self, options, weights=None):
        super().__init__()
        self.options = options
        self.weights = weights

        weights = np.ones(len(self.options)) if self.weights is None else self.weights
        if len(weights) != len(self.options):
            raise ValidationError(
                "Number of weights (%d) must match number of options (%d)"
                % (len(weights), len(self.options)),
                attr="weights",
                obj=self,
            )
        if not all(weights >= 0):
            raise ValidationError(
                "All weights must be non-negative", attr="weights", obj=self
            )
        total = float(weights.sum())
        if total <= 0:
            raise ValidationError(
                "Sum of weights must be positive (got %f)" % total,
                attr="weights",
                obj=self,
            )
        self.p = weights / total

    @property
    def dimensions(self):
        return np.prod(self.options.shape[1:])

[docs]    def sample(self, n, d=None, rng=np.random):
        if d is not None and self.dimensions != d:
            raise ValidationError(
                "Options must be of dimensionality %d "
                "(got %d)" % (d, self.dimensions),
                attr="options",
                obj=self,
            )

        i = np.searchsorted(np.cumsum(self.p), rng.rand(n))
        return self.options[i]


[docs]class Samples(Distribution):
    """A set of samples.

    This class is a subclass of `.Distribution` so that it can be used in any
    situation that calls for a  `.Distribution`. However, the call to
    `.Distribution.sample` must match the dimensions of the samples or
    a `.ValidationError` will be raised.

    Parameters
    ----------
    samples : (n, d) array_like
        ``n`` and ``d`` must match what is eventually passed to
         `.Distribution.sample`.
    """

    samples = NdarrayParam("samples", shape=("...",))

    def __init__(self, samples):
        super().__init__()
        self.samples = samples

[docs]    def sample(self, n, d=None, rng=np.random):
        samples = np.array(self.samples)
        shape = (n,) if d is None else (n, d)

        if d is None:
            samples = samples.squeeze()

        if d is not None and samples.ndim == 1:
            samples = samples[..., np.newaxis]

        if samples.shape[0] != shape[0]:
            raise ValidationError(
                "Wrong number of samples requested; got "
                "%d, should be %d" % (n, samples.shape[0]),
                attr="samples",
                obj=self,
            )
        elif d is None and len(samples.shape) != 1:
            raise ValidationError(
                "Wrong sample dimensionality requested; got "
                "'None', should be %d" % (samples.shape[1],),
                attr="samples",
                obj=self,
            )
        elif d is not None and samples.shape[1] != shape[1]:
            raise ValidationError(
                "Wrong sample dimensionality requested; got "
                "%d, should be %d" % (d, samples.shape[1]),
                attr="samples",
                obj=self,
            )

        return samples


[docs]class SqrtBeta(Distribution):
    """Distribution of the square root of a Beta distributed random variable.

    Given ``n + m`` dimensional random unit vectors, the length of subvectors
    with ``m`` elements will be distributed according to this distribution.

    Parameters
    ----------
    n: int
        Number of subvectors.
    m: int, optional
        Length of each subvector.

    See also
    --------
    nengo.dists.SubvectorLength
    """

    n = IntParam("n", low=0)
    m = IntParam("m", low=0)

    def __init__(self, n, m=1):
        super().__init__()
        self.n = n
        self.m = m

[docs]    def sample(self, num, d=None, rng=np.random):
        shape = self._sample_shape(num, d)
        return np.sqrt(rng.beta(self.m / 2.0, self.n / 2.0, size=shape))

[docs]    def cdf(self, x):
        """Cumulative distribution function.

        .. note:: Requires SciPy.

        Parameters
        ----------
        x : array_like
            Evaluation points in [0, 1].

        Returns
        -------
        cdf : array_like
            Probability that ``X <= x``.
        """
        from scipy.special import betainc  # pylint: disable=import-outside-toplevel

        sq_x = x * x
        return np.where(
            sq_x < 1.0, betainc(self.m / 2.0, self.n / 2.0, sq_x), np.ones_like(x)
        )

[docs]    def pdf(self, x):
        """Probability distribution function.

        .. note:: Requires SciPy.

        Parameters
        ----------
        x : array_like
            Evaluation points in [0, 1].

        Returns
        -------
        pdf : array_like
            Probability density at ``x``.
        """
        from scipy.special import beta  # pylint: disable=import-outside-toplevel

        return (
            2
            / beta(0.5 * self.m, 0.5 * self.n)
            * x ** (self.m - 1)
            * (1 - x * x) ** (0.5 * self.n - 1)
        )

[docs]    def ppf(self, y):
        """Percent point function (inverse cumulative distribution).

        .. note:: Requires SciPy.

        Parameters
        ----------
        y : array_like
            Cumulative probabilities in [0, 1].

        Returns
        -------
        ppf : array_like
            Evaluation points ``x`` in [0, 1] such that ``P(X <= x) = y``.
        """
        from scipy.special import betaincinv  # pylint: disable=import-outside-toplevel

        sq_x = betaincinv(self.m / 2.0, self.n / 2.0, y)
        return np.sqrt(sq_x)


[docs]class SubvectorLength(SqrtBeta):
    """Distribution of the length of a subvectors of a unit vector.

    Parameters
    ----------
    dimensions : int
        Dimensionality of the complete unit vector.
    subdimensions : int, optional
        Dimensionality of the subvector.

    See also
    --------
    nengo.dists.SqrtBeta
    """

    def __init__(self, dimensions, subdimensions=1):
        super().__init__(dimensions - subdimensions, subdimensions)

    @property
    def dimensions(self):
        return self.n + self.m

    @property
    def subdimensions(self):
        return self.m


[docs]class CosineSimilarity(SubvectorLength):
    """Distribution of the cosine of the angle between two random vectors.

    The "cosine similarity" is the cosine of the angle between two vectors,
    which is equal to the dot product of the vectors, divided by the L2-norms
    of the individual vectors. When these vectors are unit length, this is then
    simply the distribution of their dot product.

    This is also equivalent to the distribution of a single coefficient from a
    unit vector (a single dimension of ``UniformHypersphere(surface=True)``).
    Furthermore, ``CosineSimilarity(d+2)`` is equivalent to the distribution of
    a single coordinate from points uniformly sampled from the d-dimensional
    unit ball (a single dimension of
    ``UniformHypersphere(surface=False).sample(n, d)``). These relationships
    have been detailed in [Voelker2017]_.

    This can be used to calculate an intercept ``c = ppf(1 - p)`` such that
    ``dot(u, v) >= c`` with probability ``p``, for random unit vectors ``u``
    and ``v``. In other words, a neuron with intercept ``ppf(1 - p)`` will
    fire with probability ``p`` for a random unit length input.

    .. [Voelker2017]
       `Aaron R. Voelker, Jan Gosmann, and Terrence C. Stewart.
       Efficiently sampling vectors and coordinates from the n-sphere and
       n-ball. Technical Report, Centre for Theoretical Neuroscience,
       Waterloo, ON, 2017
       <http://compneuro.uwaterloo.ca/publications/voelker2017.html>`_

    Parameters
    ----------
    dimensions: int
        Dimensionality of the complete unit vector.

    See also
    --------
    nengo.dists.SqrtBeta
    """

    def __init__(self, dimensions):
        super().__init__(dimensions)

[docs]    def sample(self, num, d=None, rng=np.random):
        shape = self._sample_shape(num, d)
        sign = Choice((1, -1)).sample(np.prod(shape), rng=rng).reshape(*shape)
        return sign * super().sample(num, d, rng=rng)

[docs]    def cdf(self, x):
        return (super().cdf(x) * np.sign(x) + 1) / 2.0

[docs]    def pdf(self, x):
        return super().pdf(x) / 2.0

[docs]    def ppf(self, y):
        x = super().ppf(abs(y * 2 - 1))
        return np.where(y > 0.5, x, -x)


[docs]class DistributionParam(Parameter):
    """A Distribution."""

    equatable = True

    def coerce(self, instance, dist):
        self.check_type(instance, dist, Distribution)
        return super().coerce(instance, dist)


[docs]class DistOrArrayParam(NdarrayParam):
    """Can be a Distribution or samples from a distribution."""

    def __init__(
        self,
        name,
        default=Unconfigurable,
        sample_shape=None,
        sample_dtype=np.float64,
        optional=False,
        readonly=None,
    ):
        super().__init__(
            name=name,
            default=default,
            shape=sample_shape,
            dtype=sample_dtype,
            optional=optional,
            readonly=readonly,
        )

    def coerce(self, instance, distorarray):
        if isinstance(distorarray, Distribution):
            return Parameter.coerce(self, instance, distorarray)
        return super().coerce(instance, distorarray)