Source code for nengo_dl.op_builders

"""
Build classes for basic Nengo operators.
"""

from collections import defaultdict
from distutils.version import LooseVersion
import logging
import warnings

from nengo.builder.operator import Reset, Copy, ElementwiseInc, DotInc, SimPyFunc
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import gen_sparse_ops

from nengo_dl import utils
from nengo_dl.builder import Builder, OpBuilder
from nengo_dl.compat import tf_compat, SparseDotInc, SparseMatrix

logger = logging.getLogger(__name__)


[docs]class ResetInc(Reset):
    """
    A version of Reset that increments the target value rather than setting it.
    """

    @property
    def dst(self):
        """Overridden to return from incs rather than sets."""
        return self.incs[0]


[docs]@Builder.register(Reset)
@Builder.register(ResetInc)
class ResetBuilder(OpBuilder):
    """
    Build a group of `~nengo.builder.operator.Reset` operators.
    """

    def __init__(self, ops, signals, config):
        super(ResetBuilder, self).__init__(ops, signals, config)

        logger.debug("val %s", [op.value for op in ops])
        logger.debug("dst %s", [op.dst for op in ops])

        self.mode = "inc" if type(ops[0]) == ResetInc else "update"

        dtype = np.asarray(ops[0].value).dtype
        if np.issubdtype(dtype, np.floating):
            dtype = signals.dtype.as_numpy_dtype

        # unlike other ops, Reset signals might be spread across multiple
        # bases, which we need to handle
        scatters = defaultdict(list)
        for op in ops:
            scatters[signals[op.dst].key] += [op]
        self.scatters = []
        for group in scatters.values():
            value = np.concatenate(
                [
                    np.resize(np.asarray(x.value).astype(dtype), x.dst.shape)
                    for x in group
                ],
                axis=0,
            )
            value = np.tile(
                value[..., None],
                tuple(1 for _ in value.shape) + (signals.minibatch_size,),
            )
            self.scatters += [
                (signals.combine([x.dst for x in group]), signals.constant(value))
            ]

        logger.debug("scatters")
        logger.debug("\n".join([str(x) for x in self.scatters]))

[docs]    def build_step(self, signals):
        for data, val in self.scatters:
            signals.scatter(data, val, mode=self.mode)

[docs]    @staticmethod
    def mergeable(x, y):
        return True


[docs]@Builder.register(Copy)
class CopyBuilder(OpBuilder):
    """
    Build a group of `~nengo.builder.operator.Copy` operators.
    """

    def __init__(self, ops, signals, config):
        super(CopyBuilder, self).__init__(ops, signals, config)

        logger.debug("src %s", [op.src for op in ops])
        logger.debug("src_slice %s", [getattr(op, "src_slice", None) for op in ops])
        logger.debug("dst %s", [op.dst for op in ops])
        logger.debug("dst_slice %s", [getattr(op, "dst_slice", None) for op in ops])

        srcs = []
        dsts = []
        for op in ops:
            srcs += [signals[op.src][op.src_slice]]
            dsts += [signals[op.dst][op.dst_slice]]

        self.mode = "inc" if ops[0].inc else "update"

        self.src_data = signals.combine(srcs)
        self.dst_data = signals.combine(dsts)

        if not self.src_data.minibatched and self.dst_data.minibatched:
            # broadcast indices so that the un-minibatched src data gets
            # copied to each minibatch dimension in dst
            self.src_data = self.src_data.broadcast(-1, signals.minibatch_size)

[docs]    def build_step(self, signals):
        signals.scatter(self.dst_data, signals.gather(self.src_data), mode=self.mode)

[docs]    @staticmethod
    def mergeable(x, y):
        return True


# class ElementwiseSet(ElementwiseInc):
#     @property
#     def Y(self):
#         return self.sets[0]


[docs]@Builder.register(ElementwiseInc)
# @Builder.register(ElementwiseSet)
class ElementwiseIncBuilder(OpBuilder):
    """
    Build a group of `~nengo.builder.operator.ElementwiseInc` operators.
    """

    def __init__(self, ops, signals, config):
        super(ElementwiseIncBuilder, self).__init__(ops, signals, config)

        logger.debug("dst %s", [op.Y for op in ops])
        logger.debug("A %s", [op.A for op in ops])
        logger.debug("X %s", [op.X for op in ops])

        self.mode = "inc" if type(ops[0]) == ElementwiseInc else "update"

        self.Y_data = signals.combine([op.Y for op in ops])

        # group all the A's and X's
        self.A_data = signals.combine([op.A for op in ops])
        self.X_data = signals.combine([op.X for op in ops])

        # separate data from each op along the first dimension
        if self.A_data.shape[0] != self.X_data.shape[0]:
            self.A_data = self.A_data.reshape((len(ops), -1) + self.A_data.shape[1:])
            self.X_data = self.X_data.reshape((len(ops), -1) + self.X_data.shape[1:])

        # add empty trailing dimensions for elementwise broadcasting
        while self.A_data.ndim < self.X_data.ndim:
            self.A_data = self.A_data.reshape(self.A_data.shape + (1,))

        # add broadcast dimension for minibatch, if needed
        if not self.A_data.minibatched and self.X_data.minibatched:
            self.A_data = self.A_data.reshape(self.A_data.shape + (1,))

[docs]    def build_step(self, signals):
        A = signals.gather(self.A_data)
        X = signals.gather(self.X_data)

        result = tf.multiply(A, X)

        signals.scatter(self.Y_data, result, mode=self.mode)

[docs]    @staticmethod
    def mergeable(x, y):
        # for these operations we enforce that the first dimensions
        # match (we know all the other dimensions match due to the generic
        # checks).
        # this allows us to stack all the arguments into continuous array
        # blocks, allowing for more efficient multiplication (mainly
        # because it allows us to take advantage of broadcasting)
        for s0, s1 in zip(x.all_signals, y.all_signals):
            shape0 = s0.shape[0] if s0.shape != () else 1
            shape1 = s1.shape[0] if s1.shape != () else 1
            if shape0 != shape1:
                return False

        return True


[docs]def sparse_matmul(A_indices, A_data, A_shape, X):
    """
    Matrix multiplication between sparse matrix A and dense matrix X

    Parameters
    ----------
    A_indices : ``tf.Tensor``
        N, 2) rray of [row,col] non-zero entries
    A_data : ``tf.Tensor``
        (N,) array of data in the nonzero entries specified in ``A_indices``
    A_shape : tuple of int
        Shape of full A matrix
    X : ``tf.Tensor``
        Dense matrix being multiplied by A

    Returns
    -------
    dot : ``tf.Tensor``
        Result of matrix multiplication between A and X
    """

    must_downcast = A_data.dtype.base_dtype != tf.float32 and (
        "gpu" in A_data.device.lower()
        or (A_data.device == "" and utils.tf_gpu_installed)
    )
    if must_downcast:
        assert A_data.dtype.base_dtype == X.dtype.base_dtype
        warnings.warn(
            "Downcasting data to float32 in sparse_matmul, since "
            "only float32 is supported on the GPU."
        )
        A = tf.cast(A_data, tf.float32)
        X = tf.cast(X, tf.float32)
    else:
        A = A_data

    if LooseVersion(tf.__version__) < LooseVersion("1.7.0"):
        mat_mul = gen_sparse_ops._sparse_tensor_dense_mat_mul
    else:
        mat_mul = gen_sparse_ops.sparse_tensor_dense_mat_mul
    dot = mat_mul(A_indices, A, A_shape, X)

    if must_downcast:
        dot = tf.cast(dot, A_data.dtype.base_dtype)

    return dot


# class DotSet(DotInc):
#     @property
#     def Y(self):
#         return self.sets[0]


[docs]@Builder.register(DotInc)
# @Builder.register(DotSet)
class DotIncBuilder(OpBuilder):
    """
    Build a group of `~nengo.builder.operator.DotInc` operators.
    """

    def __init__(self, ops, signals, config):
        # note: bypassing the DotIncBuilder init
        # pylint: disable=bad-super-call
        super(DotIncBuilder, self).__init__(ops, signals, config)

        logger.debug("dst %s", [op.Y for op in ops])
        logger.debug("A %s", [op.A for op in ops])
        logger.debug("X %s", [op.X for op in ops])

        self.mode = "inc" if type(ops[0]) == DotInc else "update"

        # check if all the signals have the same size for the first dimension
        self.len_match = True
        for i, s0 in enumerate(ops[0].all_signals):
            shape0 = s0.shape[0] if s0.shape != () else 1

            for op in ops:
                s1 = op.all_signals[i]
                shape1 = s1.shape[0] if s1.shape != () else 1
                if shape0 != shape1:
                    self.len_match = False
                    break

            if not self.len_match:
                break

        self.Y_data = signals.combine([op.Y for op in ops])

        # group all the A's and X's
        A_data = signals.combine([op.A for op in ops])
        X_data = signals.combine([op.X for op in ops])

        if self.len_match:
            # if the first dimensions all match, then we can used the
            # (batched) matrix multiplication op

            # separate data from each op along the first dimension
            self.A_data = A_data.reshape((len(ops), -1, A_data.shape[1]))
            self.X_data = X_data.reshape((len(ops), -1))

            if self.A_data.minibatched:
                # add broadcast dimension to X
                self.X_data = self.X_data.reshape(self.X_data.shape + (1,))

                # precompute transposition indices
                self.perm = tf.constant((0, 3, 1, 2))
                self.perm_inv = tf.constant((0, 2, 3, 1))
        else:
            # if the first dimensions don't match, then we create a block
            # diagonal matrix out of all the op matrices, and then multiply
            # them using a sparse matrix multiplication

            self.A_data = A_data.reshape((-1,))
            self.X_data = X_data

            assert not self.A_data.minibatched
            assert self.X_data.minibatched and self.Y_data.minibatched

            sparse_indices = []
            corner = np.zeros(2, dtype=np.int64)
            for op in ops:
                block_shape = (op.A.shape[0], op.A.shape[1])
                idxs = np.reshape(
                    np.dstack(
                        np.meshgrid(
                            np.arange(block_shape[0]),
                            np.arange(block_shape[1]),
                            indexing="ij",
                        )
                    ),
                    (-1, 2),
                )
                idxs += corner
                corner += block_shape
                sparse_indices += [idxs]

            sparse_indices = np.concatenate(sparse_indices, axis=0)
            self.sparse_indices = signals.constant(
                sparse_indices,
                dtype=(
                    tf.int32
                    if np.all(sparse_indices < np.iinfo(np.int32).max)
                    else tf.int64
                ),
            )
            self.A_shape = tf.constant(corner, dtype=tf.int64)

[docs]    def build_step(self, signals):
        A = signals.gather(self.A_data)
        X = signals.gather(self.X_data)

        if self.len_match:
            if self.A_data.minibatched and self.X_data.minibatched:
                # dot = tf.einsum("ijkl,ikl->ijl", A, X)

                # note: this is just a duplicate of what einsum does
                # internally; we do it manually so that we can move the
                # perm/perm_inv constants into the pre-build step
                A = tf.transpose(a=A, perm=self.perm)
                X = tf.transpose(a=X, perm=self.perm)
                dot = tf.matmul(A, X)
                dot = tf.transpose(a=dot, perm=self.perm_inv)
                dot.set_shape(self.A_data.shape[:2] + (1, signals.minibatch_size))
            elif not self.A_data.minibatched and self.X_data.minibatched:
                dot = tf.matmul(A, X)
            else:
                # note: these cases never come up (so far) in nengo, since X
                # is always minibatched. but preserving them here for
                # posterity, in case they are ever used

                # A minibatched, X not minibatched
                # dot = tf.einsum("ijkl,ik->ijl", A, X)
                # A not minibatched, X not minibatched
                # dot = tf.einsum("ijk,ik->ij", A, X)
                raise NotImplementedError
        else:
            dot = sparse_matmul(self.sparse_indices, A, self.A_shape, X)

            dot.set_shape(self.Y_data.shape + (signals.minibatch_size,))

        signals.scatter(self.Y_data, dot, mode=self.mode)

[docs]    @staticmethod
    def mergeable(x, y):
        # if the matrix (A) is minibatched, then the first dimensions need
        # to match up (to allow us to transpose the dimensions)
        if x.A.minibatched:
            for s0, s1 in zip(x.all_signals, y.all_signals):
                shape0 = s0.shape[0] if s0.shape != () else 1
                shape1 = s1.shape[0] if s1.shape != () else 1
                if shape0 != shape1:
                    return False

        return True


[docs]@Builder.register(SimPyFunc)
class SimPyFuncBuilder(OpBuilder):
    """
    Build a group of `~nengo.builder.operator.SimPyFunc` operators.
    """

    def __init__(self, ops, signals, config):
        super(SimPyFuncBuilder, self).__init__(ops, signals, config)

        logger.debug("t %s", [op.t for op in ops])
        logger.debug("x %s", [op.x for op in ops])
        logger.debug("fn %s", [op.fn for op in ops])

        self.time_input = ops[0].t is not None
        self.input_data = signals.combine([op.x for op in ops])

        if ops[0].output is not None:
            self.output_data = signals.combine([op.output for op in ops])
            self.output_dtype = self.output_data.dtype
        else:
            self.output_data = None
            self.output_dtype = signals.dtype

        def merged_func(time, inputs):  # pragma: no cover (runs in TF)
            outputs = []
            offset = 0
            for op in ops:
                if op.output is None:
                    func = op.fn
                else:
                    func = utils.align_func(op.output.shape, self.output_dtype)(op.fn)

                func_input = inputs[offset : offset + op.x.shape[0]]
                offset += op.x.shape[0]

                mini_out = []
                for j in range(signals.minibatch_size):
                    if op.t is None:
                        func_out = func(func_input[..., j])
                    else:
                        func_out = func(time, func_input[..., j])

                    if op.output is None:
                        # just return time as a noop (since we need to
                        # return something)
                        func_out = time
                    mini_out += [func_out]
                outputs += [np.stack(mini_out, axis=-1)]

            return np.concatenate(outputs, axis=0)

        self.merged_func = merged_func
        self.merged_func.__name__ = "_".join([utils.function_name(op.fn) for op in ops])
        self.output_shape = (
            (len(ops),) if self.output_data is None else self.output_data.shape
        )
        self.output_shape += (signals.minibatch_size,)

[docs]    def build_step(self, signals):
        time = signals.time if self.time_input else []
        inputs = [] if self.input_data is None else signals.gather(self.input_data)

        with tf.device("/cpu:0"):
            node_outputs = tf_compat.py_func(
                self.merged_func,
                [time, inputs],
                self.output_dtype,
                name=self.merged_func.__name__,
            )
        node_outputs.set_shape(self.output_shape)

        if self.output_data is not None:
            signals.scatter(self.output_data, node_outputs)

        # note: we only need to run the node for side effects, not the
        # assignment operator. if the result of the assignment is actually
        # used anywhere, then it will be run as part of the normal graph.
        return node_outputs

[docs]    @staticmethod
    def mergeable(x, y):
        # for these we need to make a special check that the functions
        # all do/do not get time as input, otherwise we could end
        # up confusing a node that only gets a scalar float input with
        # a node that only gets time as input
        return x.t == y.t


[docs]@Builder.register(SparseDotInc)
class SparseDotIncBuilder(OpBuilder):
    """
    Build a group of `~nengo.builder.operator.SparseDotInc` operators.
    """

    def __init__(self, ops, signals, config):
        super().__init__(ops, signals, config)

        self.Y_data = signals.combine([op.Y for op in ops])

        # group all the A's and X's
        self.A_data = signals.combine([op.A for op in ops])
        self.X_data = signals.combine([op.X for op in ops])

        # the only way A would be minibatched is if it is targeted by an
        # online learning rule, which isn't supported for sparse transforms
        assert not self.A_data.minibatched
        assert self.X_data.minibatched and self.Y_data.minibatched

        # arrange the sparse matrices into a (sparse) block diagonal matrix
        # by adding an offset to each sparse matrix's indices
        sparse_indices = []
        corner = np.zeros(2, dtype=np.int64)
        for op in ops:
            if isinstance(op.A.initial_value, SparseMatrix):
                idxs = np.array(op.A.initial_value.indices)
            else:
                initial_value = op.A.initial_value.tocoo()
                idxs = np.stack((initial_value.row, initial_value.col), axis=1)

            block_shape = (op.A.shape[0], op.A.shape[1])
            idxs += corner
            corner += block_shape
            sparse_indices += [idxs]

        sparse_indices = np.concatenate(sparse_indices, axis=0)
        self.sparse_indices = signals.constant(
            sparse_indices,
            dtype=(
                tf.int32
                if np.all(sparse_indices < np.iinfo(np.int32).max)
                else tf.int64
            ),
        )
        self.A_shape = tf.constant(corner, dtype=tf.int64)

[docs]    def build_step(self, signals):
        A = signals.gather(self.A_data)
        X = signals.gather(self.X_data)

        dot = sparse_matmul(self.sparse_indices, A, self.A_shape, X)

        dot.set_shape(self.Y_data.shape + (signals.minibatch_size,))

        signals.scatter(self.Y_data, dot, mode="inc")

[docs]    @staticmethod
    def mergeable(x, y):
        return True