Source code for nengo_spa.examine

"""Functions to examine data and vectors produced by SPA."""

from itertools import combinations

import nengo.utils.numpy as npext
import numpy as np
from nengo.exceptions import ValidationError

from nengo_spa.semantic_pointer import SemanticPointer
from nengo_spa.typechecks import is_iterable
from nengo_spa.vocabulary import Vocabulary


[docs]def similarity(data, vocab, normalize=False): """Return the similarity between simulation data and Semantic Pointers. Computes the dot products between all Semantic Pointers in the Vocabulary and the simulation data for each timestep. If ``normalize=True``, normalizes all vectors to compute the cosine similarity. Parameters ---------- data: (D,) or (T, D) array_like The *D*-dimensional data for *T* timesteps used for comparison. vocab: Vocabulary or array_like Vocabulary (or list of vectors) used to calculate the similarity values. normalize : bool, optional Whether to normalize all vectors, to compute the cosine similarity. """ if isinstance(data, SemanticPointer): data = data.v if isinstance(vocab, Vocabulary): vectors = vocab.vectors elif is_iterable(vocab): if isinstance(next(iter(vocab)), SemanticPointer): vocab = [p.v for p in vocab] vectors = np.array(vocab, copy=False, ndmin=2) else: raise ValidationError( f"{type(vocab).__name__:r} object is not a valid vocabulary", attr="vocab" ) dots = np.dot(vectors, data.T) if normalize: # Zero-norm vectors should return zero, so avoid divide-by-zero error eps = np.nextafter(0, 1) # smallest float above zero dnorm = np.maximum(npext.norm(data.T, axis=0, keepdims=True), eps) vnorm = np.maximum(npext.norm(vectors, axis=1, keepdims=True), eps) if len(dots.shape) == 1: vnorm = np.squeeze(vnorm) dots /= dnorm dots /= vnorm return dots.T
[docs]def pairs(vocab): """Return expressions for all possible combinations to bind *vocab*'s keys. Examples -------- >>> vocab = nengo_spa.Vocabulary(32) >>> vocab.populate('A; B; C') >>> sorted(nengo_spa.pairs(vocab)) ['A*B', 'A*C', 'B*C'] """ return set(x + "*" + y for x, y in combinations(vocab.keys(), 2))
[docs]def text( v, vocab, minimum_count=1, maximum_count=None, threshold=0.1, join=";", terms=None, normalize=False, ): """Return a human-readable text version of the provided vector. This is meant to give a quick text version of a vector for display purposes. To do this, compute the dot product between the vector and all the terms in the vocabulary. The top few vectors are chosen for inclusion in the text. It will try to only return terms with a match above the *threshold*, but will always return at least *minimum_count* and at most maximum_count terms. Terms are sorted from most to least similar. Parameters ---------- v : SemanticPointer or array_like The vector to convert into text. minimum_count : int, optional Always return at least this many terms in the text. maximum_count : int, optional Never return more than this many terms in the text. If None, all terms will be returned. threshold : float, optional How small a similarity for a term to be ignored. join : str, optional The text separator to use between terms. terms : list, optional Only consider terms in this list of strings. normalize : bool Whether to normalize the vector before computing similarity. """ if not isinstance(v, SemanticPointer): v = SemanticPointer(v) if normalize: v = v.normalized() if terms is None: terms = vocab.keys() vectors = vocab.vectors else: vectors = vocab.parse_n(*terms) matches = list(zip(similarity(v, vectors), terms)) matches.sort() matches.reverse() r = [] for m in matches: if minimum_count is not None and len(r) < minimum_count: r.append(m) elif maximum_count is not None and len(r) == maximum_count: break elif threshold is None or m[0] > threshold: r.append(m) else: break return join.join([f"{sim:0.2f}{key}" for (sim, key) in r])