from __future__ import absolute_import
import warnings
import numpy as np
from nengo.exceptions import ValidationError
from nengo.params import (BoolParam, IntParam, NdarrayParam, NumberParam,
Parameter, Unconfigurable, FrozenObject)
import nengo.utils.numpy as npext
[docs]class Distribution(FrozenObject):
"""A base class for probability distributions.
The only thing that a probabilities distribution need to define is a
`.sample` method. This base class ensures that all distributions
accept the same arguments for the sample function.
"""
def _sample_shape(self, n, d=None):
"""Returns output shape for sample method."""
return (n,) if d is None else (n, d)
[docs] def sample(self, n, d=None, rng=np.random):
"""Samples the distribution.
Parameters
----------
n : int
Number samples to take.
d : int or None, optional (Default: None)
The number of dimensions to return. If this is an int, the return
value will be of shape ``(n, d)``. If None, the return
value will be of shape ``(n,)``.
rng : `numpy.random.RandomState`, optional
Random number generator state.
Returns
-------
samples : (n,) or (n, d) array_like
Samples as a 1d or 2d array depending on ``d``. The second
dimension enumerates the dimensions of the process.
"""
raise NotImplementedError("Distributions should implement sample.")
[docs]class PDF(Distribution):
"""An arbitrary distribution from a PDF.
Parameters
----------
x : vector_like (n,)
Values of the points to sample from (interpolated).
p : vector_like (n,)
Probabilities of the `x` points.
"""
x = NdarrayParam('x', shape='*')
p = NdarrayParam('p', shape='*')
def __init__(self, x, p):
super(PDF, self).__init__()
psum = np.sum(p)
if np.abs(psum - 1) > 1e-8:
raise ValidationError(
"PDF must sum to one (sums to %f)" % psum, attr='p', obj=self)
self.x = x
self.p = p
if len(self.x) != len(self.p):
raise ValidationError(
"`x` and `p` must be the same length", attr='p', obj=self)
# make cumsum = [0] + cumsum, cdf = 0.5 * (cumsum[:-1] + cumsum[1:])
cumsum = np.cumsum(p)
cumsum *= 0.5
cumsum[1:] = cumsum[:-1] + cumsum[1:]
self.cdf = cumsum
def __repr__(self):
return "PDF(x=%r, p=%r)" % (self.x, self.p)
def sample(self, n, d=None, rng=np.random):
shape = self._sample_shape(n, d)
return np.interp(rng.uniform(size=shape), self.cdf, self.x)
[docs]class Gaussian(Distribution):
"""A Gaussian distribution.
This represents a bell-curve centred at ``mean`` and with
spread represented by the standard deviation, ``std``.
Parameters
----------
mean : Number
The mean of the Gaussian.
std : Number
The standard deviation of the Gaussian.
Raises
------
ValidationError if std is <= 0
"""
mean = NumberParam('mean')
std = NumberParam('std', low=0, low_open=True)
def __init__(self, mean, std):
super(Gaussian, self).__init__()
self.mean = mean
self.std = std
def __repr__(self):
return "Gaussian(mean=%r, std=%r)" % (self.mean, self.std)
def sample(self, n, d=None, rng=np.random):
shape = self._sample_shape(n, d)
return rng.normal(loc=self.mean, scale=self.std, size=shape)
[docs]class Exponential(Distribution):
"""An exponential distribution (optionally with high values clipped).
If ``high`` is left to its default value of infinity, this is a standard
exponential distribution. If ``high`` is set, then any sampled values at
or above ``high`` will be clipped so they are slightly below ``high``.
This is useful for thresholding and, by extension,
`.networks.AssociativeMemory`.
The probability distribution function (PDF) is given by::
| 0 if x < shift
p(x) = | 1/scale * exp(-(x - shift)/scale) if x >= shift and x < high
| n if x == high - eps
| 0 if x >= high
where `n` is such that the PDF integrates to one, and `eps` is an
infintesimally small number such that samples of `x` are strictly less than
`high` (in practice, `eps` depends on the floating point precision).
Parameters
----------
scale : float
The scale parameter (inverse of the rate parameter lambda). Larger
values make the distribution narrower (sharper peak).
shift : float, optional (Default: 0)
Amount to shift the distribution by. There will be no values smaller
than this shift when sampling from the distribution.
high : float, optional (Default: np.inf)
All values larger than or equal to this value will be clipped to
slightly less than this value.
"""
scale = NumberParam('scale', low=0, low_open=True)
shift = NumberParam('shift')
high = NumberParam('high')
def __init__(self, scale, shift=0., high=np.inf):
super(Exponential, self).__init__()
self.scale = scale
self.shift = shift
self.high = high
def sample(self, n, d=None, rng=np.random):
shape = self._sample_shape(n, d)
x = rng.exponential(self.scale, shape) + self.shift
high = np.nextafter(self.high, np.asarray(-np.inf, dtype=x.dtype))
return np.clip(x, self.shift, high)
[docs]class Choice(Distribution):
"""Discrete distribution across a set of possible values.
The same as `numpy.random.choice`, except can take vector or matrix values
for the choices.
Parameters
----------
options : (N, ...) array_like
The options (choices) to choose between. The choice is always done
along the first axis, so if `options` is a matrix, the options are
the rows of that matrix.
weights : (N,) array_like, optional (Default: None)
Weights controlling the probability of selecting each option. Will
automatically be normalized. If None, weights be uniformly distributed.
"""
options = NdarrayParam('options', shape=('*', '...'))
weights = NdarrayParam('weights', shape=('*'), optional=True)
def __init__(self, options, weights=None):
super(Choice, self).__init__()
self.options = options
self.weights = weights
weights = (np.ones(len(self.options)) if self.weights is None else
self.weights)
if len(weights) != len(self.options):
raise ValidationError(
"Number of weights (%d) must match number of options (%d)"
% (len(weights), len(self.options)), attr='weights', obj=self)
if not all(weights >= 0):
raise ValidationError("All weights must be non-negative",
attr='weights', obj=self)
total = float(weights.sum())
if total <= 0:
raise ValidationError("Sum of weights must be positive (got %f)"
% total, attr='weights', obj=self)
self.p = weights / total
def __repr__(self):
return "Choice(options=%r%s)" % (
self.options,
"" if self.weights is None else ", weights=%r" % self.weights)
@property
def dimensions(self):
return np.prod(self.options.shape[1:])
def sample(self, n, d=None, rng=np.random):
if d is not None and self.dimensions != d:
raise ValidationError("Options must be of dimensionality %d "
"(got %d)" % (d, self.dimensions),
attr='options', obj=self)
i = np.searchsorted(np.cumsum(self.p), rng.rand(n))
return self.options[i]
[docs]class Samples(Distribution):
"""A set of samples.
This class is a subclass of `.Distribution` so that it can be used in any
situation that calls for a `.Distribution`. However, the call to `.sample`
must match the dimensions of the samples or a `.ValidationError`
will be raised.
Parameters
----------
samples : (n, d) array_like
``n`` and ``d`` must match what is eventually passed to `.sample`.
"""
samples = NdarrayParam('samples', shape=('...',))
def __init__(self, samples):
super(Samples, self).__init__()
self.samples = samples
def __repr__(self):
return "Samples(samples=%r)" % (self.samples,)
def sample(self, n, d=None, rng=np.random):
samples = np.array(self.samples)
shape = (n,) if d is None else (n, d)
if d is None:
samples = samples.squeeze()
if d is not None and samples.ndim == 1:
samples = samples[..., np.newaxis]
if samples.shape[0] != shape[0]:
raise ValidationError("Wrong number of samples requested; got "
"%d, should be %d" % (n, samples.shape[0]),
attr='samples', obj=self)
elif d is None and len(samples.shape) != 1:
raise ValidationError("Wrong sample dimensionality requested; got "
"'None', should be %d" % (samples.shape[1],),
attr='samples', obj=self)
elif d is not None and samples.shape[1] != shape[1]:
raise ValidationError("Wrong sample dimensionality requested; got "
"%d, should be %d" % (d, samples.shape[1]),
attr='samples', obj=self)
return samples
[docs]class SqrtBeta(Distribution):
"""Distribution of the square root of a Beta distributed random variable.
Given ``n + m`` dimensional random unit vectors, the length of subvectors
with ``m`` elements will be distributed according to this distribution.
Parameters
----------
n: int
Number of subvectors.
m: int, optional (Default: 1)
Length of each subvector.
See also
--------
nengo.dists.SubvectorLength
"""
n = IntParam('n', low=0)
m = IntParam('m', low=0)
def __init__(self, n, m=1):
super(SqrtBeta, self).__init__()
self.n = n
self.m = m
def __repr__(self):
return "%s(n=%r, m=%r)" % (type(self).__name__, self.n, self.m)
def sample(self, num, d=None, rng=np.random):
shape = self._sample_shape(num, d)
return np.sqrt(rng.beta(self.m / 2.0, self.n / 2.0, size=shape))
[docs] def cdf(self, x):
"""Cumulative distribution function.
.. note:: Requires SciPy.
Parameters
----------
x : array_like
Evaluation points in [0, 1].
Returns
-------
cdf : array_like
Probability that `X <= x`.
"""
from scipy.special import betainc
sq_x = x * x
return np.where(
sq_x < 1., betainc(self.m / 2.0, self.n / 2.0, sq_x),
np.ones_like(x))
[docs] def pdf(self, x):
"""Probability distribution function.
.. note:: Requires SciPy.
Parameters
----------
x : array_like
Evaluation points in [0, 1].
Returns
-------
pdf : array_like
Probability density at ``x``.
"""
from scipy.special import beta
return (2 / beta(self.m / 2.0, self.n / 2.0) * x ** (self.m - 1) *
(1 - x * x) ** (self.n / 2.0 - 1))
[docs] def ppf(self, y):
"""Percent point function (inverse cumulative distribution).
.. note:: Requires SciPy.
Parameters
----------
y : array_like
Cumulative probabilities in [0, 1].
Returns
-------
ppf : array_like
Evaluation points ``x`` in [0, 1] such that ``P(X <= x) = y``.
"""
from scipy.special import betaincinv
sq_x = betaincinv(self.m / 2.0, self.n / 2.0, y)
return np.sqrt(sq_x)
[docs]class SubvectorLength(SqrtBeta):
"""Distribution of the length of a subvectors of a unit vector.
Parameters
----------
dimensions : int
Dimensionality of the complete unit vector.
subdimensions : int, optional (Default: 1)
Dimensionality of the subvector.
See also
--------
nengo.dists.SqrtBeta
"""
def __init__(self, dimensions, subdimensions=1):
super(SubvectorLength, self).__init__(
dimensions - subdimensions, subdimensions)
def __repr__(self):
return "%s(%r, subdimensions=%r)" % (
type(self).__name__, self.n + self.m, self.m)
[docs]class CosineSimilarity(SubvectorLength):
"""Distribution of the cosine of the angle between two random vectors.
The "cosine similarity" is the cosine of the angle between two vectors,
which is equal to the dot product of the vectors, divided by the L2-norms
of the individual vectors. When these vectors are unit length, this is then
simply the distribution of their dot product.
This is also equivalent to the distribution of a single coefficient from a
unit vector (a single dimension of ``UniformHypersphere(surface=True)``).
Furthermore, ``CosineSimilarity(d+2)`` is equivalent to the distribution of
a single coordinate from points uniformly sampled from the d-dimensional
unit ball (a single dimension of
``UniformHypersphere(surface=False).sample(n, d)``). These relationships
have been detailed in [Voelker2017]_.
This can be used to calculate an intercept ``c = ppf(1 - p)`` such that
``dot(u, v) >= c`` with probability ``p``, for random unit vectors ``u``
and ``v``. In other words, a neuron with intercept ``ppf(1 - p)`` will
fire with probability ``p`` for a random unit length input.
.. [Voelker2017]
`Aaron R. Voelker, Jan Gosmann, and Terrence C. Stewart.
Efficiently sampling vectors and coordinates from the n-sphere and
n-ball. Technical Report, Centre for Theoretical Neuroscience,
Waterloo, ON, 2017
<http://compneuro.uwaterloo.ca/publications/voelker2017.html>`_
Parameters
----------
dimensions: int
Dimensionality of the complete unit vector.
See also
--------
nengo.dists.SqrtBeta
"""
def __init__(self, dimensions):
super(CosineSimilarity, self).__init__(dimensions)
def sample(self, num, d=None, rng=np.random):
shape = self._sample_shape(num, d)
sign = Choice((1, -1)).sample(np.prod(shape), rng=rng).reshape(*shape)
return sign * super(CosineSimilarity, self).sample(num, d, rng=rng)
def cdf(self, x):
return (super(CosineSimilarity, self).cdf(x) * np.sign(x) + 1) / 2.0
def pdf(self, x):
return super(CosineSimilarity, self).pdf(x) / 2.0
def ppf(self, y):
x = super(CosineSimilarity, self).ppf(abs(y*2 - 1))
return np.where(y > 0.5, x, -x)
class DistributionParam(Parameter):
"""A Distribution."""
equatable = True
def coerce(self, instance, dist):
self.check_type(instance, dist, Distribution)
return super(DistributionParam, self).coerce(instance, dist)
class DistOrArrayParam(NdarrayParam):
"""Can be a Distribution or samples from a distribution."""
def __init__(self, name, default=Unconfigurable, sample_shape=None,
optional=False, readonly=None):
super(DistOrArrayParam, self).__init__(
name, default, sample_shape, optional, readonly)
def coerce(self, instance, distorarray):
if isinstance(distorarray, Distribution):
return Parameter.coerce(self, instance, distorarray)
return super(DistOrArrayParam, self).coerce(instance, distorarray)
[docs]def get_samples(dist_or_samples, n, d=None, rng=np.random):
"""Convenience function to sample a distribution or return samples.
Use this function in situations where you accept an argument that could
be a distribution, or could be an ``array_like`` of samples.
Examples
--------
>>> def mean(values, n=100):
... samples = get_samples(values, n=n)
... return np.mean(samples)
>>> mean([1, 2, 3, 4])
2.5
>>> mean(nengo.dists.Gaussian(0, 1))
0.057277898442269548
Parameters
----------
dist_or_samples : `.Distribution` or (n, d) array_like
Source of the samples to be returned.
n : int
Number samples to take.
d : int or None, optional (Default: None)
The number of dimensions to return.
rng : RandomState, optional (Default: np.random)
Random number generator.
Returns
-------
samples : (n, d) array_like
"""
if isinstance(dist_or_samples, Distribution):
return dist_or_samples.sample(n, d=d, rng=rng)
return np.array(dist_or_samples)