Source code for nengo_dl.benchmarks

"""
Benchmark networks and utilities for evaluating NengoDL's performance.
"""

import inspect
import itertools
import os
import random
import time

import click
import matplotlib.pyplot as plt
import nengo
import numpy as np
import tensorflow as tf

import nengo_dl


[docs]def cconv(dimensions, neurons_per_d, neuron_type): """ Circular convolution (EnsembleArray) benchmark. Parameters ---------- dimensions : int Number of dimensions for vector values neurons_per_d : int Number of neurons to use per vector dimension neuron_type : `~nengo.neurons.NeuronType` Simulation neuron type Returns ------- net : `nengo.Network` benchmark network """ with nengo.Network(label="cconv", seed=0) as net: net.config[nengo.Ensemble].neuron_type = neuron_type net.config[nengo.Ensemble].gain = nengo.dists.Choice([1, -1]) net.config[nengo.Ensemble].bias = nengo.dists.Uniform(-1, 1) net.cconv = nengo.networks.CircularConvolution( neurons_per_d, dimensions) net.inp_a = nengo.Node([0] * dimensions) net.inp_b = nengo.Node([1] * dimensions) nengo.Connection(net.inp_a, net.cconv.A) nengo.Connection(net.inp_b, net.cconv.B) net.p = nengo.Probe(net.cconv.output) return net
[docs]def integrator(dimensions, neurons_per_d, neuron_type): """ Single integrator ensemble benchmark. Parameters ---------- dimensions : int Number of dimensions for vector values neurons_per_d : int Number of neurons to use per vector dimension neuron_type : `~nengo.neurons.NeuronType` Simulation neuron type Returns ------- net : `nengo.Network` benchmark network """ with nengo.Network(label="integrator", seed=0) as net: net.config[nengo.Ensemble].neuron_type = neuron_type net.config[nengo.Ensemble].gain = nengo.dists.Choice([1, -1]) net.config[nengo.Ensemble].bias = nengo.dists.Uniform(-1, 1) net.integ = nengo.networks.EnsembleArray(neurons_per_d, dimensions) nengo.Connection(net.integ.output, net.integ.input, synapse=0.01) net.inp = nengo.Node([0] * dimensions) nengo.Connection(net.inp, net.integ.input, transform=0.01) net.p = nengo.Probe(net.integ.output) return net
[docs]def pes(dimensions, neurons_per_d, neuron_type): """ PES learning rule benchmark. Parameters ---------- dimensions : int Number of dimensions for vector values neurons_per_d : int Number of neurons to use per vector dimension neuron_type : `~nengo.neurons.NeuronType` Simulation neuron type Returns ------- net : `nengo.Network` benchmark network """ with nengo.Network(label="pes", seed=0) as net: net.config[nengo.Ensemble].neuron_type = neuron_type net.config[nengo.Ensemble].gain = nengo.dists.Choice([1, -1]) net.config[nengo.Ensemble].bias = nengo.dists.Uniform(-1, 1) net.inp = nengo.Node([1] * dimensions) net.pre = nengo.Ensemble(neurons_per_d * dimensions, dimensions) net.post = nengo.Node(size_in=dimensions) nengo.Connection(net.inp, net.pre) conn = nengo.Connection( net.pre, net.post, learning_rule_type=nengo.PES()) nengo.Connection(net.post, conn.learning_rule, transform=-1) nengo.Connection(net.inp, conn.learning_rule) net.p = nengo.Probe(net.post) return net
[docs]def basal_ganglia(dimensions, neurons_per_d, neuron_type): """ Basal ganglia network benchmark. Parameters ---------- dimensions : int Number of dimensions for vector values neurons_per_d : int Number of neurons to use per vector dimension neuron_type : `~nengo.neurons.NeuronType` Simulation neuron type Returns ------- net : `nengo.Network` benchmark network """ with nengo.Network(label="basal_ganglia", seed=0) as net: net.config[nengo.Ensemble].neuron_type = neuron_type net.inp = nengo.Node([1] * dimensions) net.bg = nengo.networks.BasalGanglia(dimensions, neurons_per_d) nengo.Connection(net.inp, net.bg.input) net.p = nengo.Probe(net.bg.output) return net
[docs]def mnist(use_tensor_layer=True): """ A network designed to stress-test tensor layers (based on mnist net). Parameters ---------- use_tensor_layer : bool If True, use individual tensor_layers to build the network, as opposed to a single TensorNode containing all layers. Returns ------- net : `nengo.Network` benchmark network """ with nengo.Network() as net: # create node to feed in images net.inp = nengo.Node(np.ones(28 * 28)) if use_tensor_layer: nengo_nl = nengo.RectifiedLinear() ensemble_params = dict(max_rates=nengo.dists.Choice([100]), intercepts=nengo.dists.Choice([0])) amplitude = 1 synapse = None x = nengo_dl.tensor_layer( net.inp, tf.layers.conv2d, shape_in=(28, 28, 1), filters=32, kernel_size=3 ) x = nengo_dl.tensor_layer(x, nengo_nl, **ensemble_params) x = nengo_dl.tensor_layer( x, tf.layers.conv2d, shape_in=(26, 26, 32), transform=amplitude, filters=32, kernel_size=3 ) x = nengo_dl.tensor_layer(x, nengo_nl, **ensemble_params) x = nengo_dl.tensor_layer( x, tf.layers.average_pooling2d, shape_in=(24, 24, 32), synapse=synapse, transform=amplitude, pool_size=2, strides=2) x = nengo_dl.tensor_layer( x, tf.layers.dense, units=128 ) x = nengo_dl.tensor_layer(x, nengo_nl, **ensemble_params) x = nengo_dl.tensor_layer(x, tf.layers.dropout, rate=0.4, transform=amplitude) x = nengo_dl.tensor_layer(x, tf.layers.dense, units=10) else: nl = tf.nn.relu # def softlif_layer(x, sigma=1, tau_ref=0.002, tau_rc=0.02, # amplitude=1): # # x -= 1 # z = tf.nn.softplus(x / sigma) * sigma # z += 1e-10 # rates = amplitude / (tau_ref + tau_rc * tf.log1p(1 / z)) # return rates @nengo_dl.reshaped((28, 28, 1)) def mnist_node(_, x): # pragma: no cover x = tf.layers.conv2d(x, filters=32, kernel_size=3, activation=nl) x = tf.layers.conv2d(x, filters=32, kernel_size=3, activation=nl) x = tf.layers.average_pooling2d(x, pool_size=2, strides=2) x = tf.contrib.layers.flatten(x) x = tf.layers.dense(x, 128, activation=nl) x = tf.layers.dropout(x, rate=0.4) x = tf.layers.dense(x, 10) return x node = nengo_dl.TensorNode(mnist_node, size_in=28 * 28, size_out=10) x = node nengo.Connection(net.inp, node, synapse=None) net.p = nengo.Probe(x) return net
[docs]def spaun(dimensions): """ Builds the Spaun network from [1]_ Parameters ---------- dimensions : int Number of dimensions for vector values Returns ------- net : `nengo.Network` benchmark network References ---------- .. [1] Chris Eliasmith, Terrence C. Stewart, Xuan Choo, Trevor Bekolay, Travis DeWolf, Yichuan Tang, and Daniel Rasmussen (2012). A large-scale model of the functioning brain. Science, 338:1202-1205. Notes ----- This network needs to be installed via .. code-block:: bash pip install git+https://github.com/drasmuss/spaun2.0.git """ from _spaun.configurator import cfg from _spaun.vocabulator import vocab from _spaun.experimenter import experiment from _spaun.modules.stim import stim_data from _spaun.modules.vision import vis_data from _spaun.modules.motor import mtr_data from _spaun.spaun_main import Spaun vocab.sp_dim = dimensions cfg.mtr_arm_type = None cfg.set_seed(1) experiment.initialize( "A", stim_data.get_image_ind, stim_data.get_image_label, cfg.mtr_est_digit_response_time, "", cfg.rng) vocab.initialize( stim_data.stim_SP_labels, experiment.num_learn_actions, cfg.rng) vocab.initialize_mtr_vocab(mtr_data.dimensions, mtr_data.sps) vocab.initialize_vis_vocab(vis_data.dimensions, vis_data.sps) return Spaun()
[docs]def random_network(dimensions, neurons_per_d, neuron_type, n_ensembles, connections_per_ensemble, seed=0): """ Basal ganglia network benchmark. Parameters ---------- dimensions : int Number of dimensions for vector values neurons_per_d : int Number of neurons to use per vector dimension neuron_type : `~nengo.neurons.NeuronType` Simulation neuron type n_ensembles : int Number of ensembles in the network connections_per_ensemble : int Outgoing connections from each ensemble Returns ------- net : `nengo.Network` benchmark network """ random.seed(seed) with nengo.Network(label="random", seed=seed) as net: net.inp = nengo.Node([0] * dimensions) net.out = nengo.Node(size_in=dimensions) net.p = nengo.Probe(net.out) ensembles = [ nengo.Ensemble(neurons_per_d * dimensions, dimensions, neuron_type=neuron_type) for _ in range(n_ensembles)] dec = np.ones((neurons_per_d * dimensions, dimensions)) for ens in net.ensembles: # add a connection to input and output node, so we never have # any "orphan" ensembles nengo.Connection(net.inp, ens) nengo.Connection(ens, net.out, solver=nengo.solvers.NoSolver(dec)) posts = random.sample(ensembles, connections_per_ensemble) for post in posts: nengo.Connection(ens, post, solver=nengo.solvers.NoSolver(dec)) return net
[docs]def run_profile(net, train=False, n_steps=150, do_profile=True, **kwargs): """ Run profiler on a benchmark network. Parameters ---------- net : `~nengo.Network` The nengo Network to be profiled. train : bool If True, profile the ``sim.train`` function. Otherwise, profile the ``sim.run`` function. n_steps : int The number of timesteps to run the simulation. do_profile : bool Whether or not to run profiling Notes ----- kwargs will be passed on to `.Simulator` """ with net: nengo_dl.configure_settings(inference_only=not train) with nengo_dl.Simulator(net, **kwargs) as sim: # note: we run a few times to try to eliminate startup overhead (only # the data from the last run will be kept) if train: opt = tf.train.GradientDescentOptimizer(0.001) x = np.random.randn(sim.minibatch_size, n_steps, net.inp.size_out) y = np.random.randn(sim.minibatch_size, n_steps, net.p.size_in) for _ in range(2): sim.train({net.inp: x, net.p: y}, optimizer=opt, n_epochs=1, profile=do_profile) start = time.time() sim.train({net.inp: x, net.p: y}, optimizer=opt, n_epochs=1, profile=do_profile) exec_time = time.time() - start print("Execution time:", exec_time) else: for _ in range(2): sim.run_steps(n_steps, profile=do_profile) start = time.time() sim.run_steps(n_steps, profile=do_profile) exec_time = time.time() - start print("Execution time:", exec_time) return exec_time
@click.group(chain=True) def main(): """Command-line interface for benchmarks.""" @main.command() @click.pass_obj @click.option("--benchmark", default="cconv", help="Name of benchmark network") @click.option("--dimensions", default=128, help="Number of dimensions") @click.option("--neurons_per_d", default=64, help="Neurons per dimension") @click.option("--neuron_type", default="RectifiedLinear", help="Nengo neuron model") @click.option("--kwarg", type=str, multiple=True, help="Arbitrary kwarg to pass to benchmark network (key=value)") def build(obj, benchmark, dimensions, neurons_per_d, neuron_type, kwarg): """Builds one of the benchmark networks""" # get benchmark network by name benchmark = globals()[benchmark] # get the neuron type object from string class name try: neuron_type = getattr(nengo, neuron_type)() except AttributeError: neuron_type = getattr(nengo_dl, neuron_type)() # set up kwargs kwargs = dict((k, int(v)) for k, v in [a.split("=") for a in kwarg]) # add the special cli kwargs if applicable; note we could just do # everything through --kwarg, but it is convenient to have a # direct option for the common arguments params = inspect.signature(benchmark).parameters for kw in ("benchmark", "dimensions", "neurons_per_d", "neuron_type"): if kw in params: kwargs[kw] = locals()[kw] # build benchmark and add to context for chaining print("Building %s with %s" % ( nengo_dl.utils.function_name(benchmark, sanitize=False), kwargs)) obj["net"] = benchmark(**kwargs) @main.command() @click.pass_obj @click.option("--train/--no-train", default=False, help="Whether to profile training (as opposed to running) " "the network") @click.option("--n_steps", default=150, help="Number of steps for which to run the simulation") @click.option("--batch_size", default=1, help="Number of inputs to the model") @click.option("--device", default="/gpu:0", help="TensorFlow device on which to run the simulation") @click.option("--unroll", default=25, help="Number of steps for which to unroll the simulation") @click.option("--time-only", is_flag=True, default=False, help="Only count total time, rather than profiling internals") def profile(obj, train, n_steps, batch_size, device, unroll, time_only): """Runs profiling on a network (call after 'build')""" if "net" not in obj: raise ValueError("Must call `build` before `profile`") obj["time"] = run_profile( obj["net"], do_profile=not time_only, train=train, n_steps=n_steps, minibatch_size=batch_size, device=device, unroll_simulation=unroll) @main.command() def matmul_vs_reduce(): # pragma: no cover """ Compares two different approaches to batched matrix multiplication (tf.matmul vs tf.multiply+tf.reduce_sum). This is relevant for figuring out which approach is more efficient on a given system for different matrix shapes (determining which method we use in DotIncBuilder). """ # a_shape = (n_ops, s0, s1, 1) # x_shape = (n_ops, 1, s1, mini) # for matmul we omit the 1 dimensions a_c = tf.placeholder(tf.float64, shape=(None, None, None), name="a_c") x_c = tf.placeholder(tf.float64, shape=(None, None, None), name="b_c") a_d = tf.placeholder(tf.float64, shape=(None, None, None, 1), name="a_d") x_d = tf.placeholder(tf.float64, shape=(None, 1, None, None), name="b_d") c = tf.matmul(a_c, x_c) d = tf.reduce_sum(tf.multiply(a_d, x_d), axis=-2) reps = 100 n_ops_range = [1, 4, 8, 16, 32, 64] mini_range = [1, 16, 32, 64, 128] s0_range = [1, 64, 128, 192, 256] s1_range = [1, 64, 128, 192, 256] matmul_times = np.zeros((len(n_ops_range), len(mini_range), len(s0_range), len(s1_range))) reduce_times = np.zeros_like(matmul_times) params = itertools.product( enumerate(n_ops_range), enumerate(mini_range), enumerate(s0_range), enumerate(s1_range)) with tf.Session() as sess: for (i, n_ops), (j, mini), (k, s0), (l, s1) in params: print(n_ops, mini, s0, s1) a_val = np.random.randn(n_ops, s0, s1, 1) x_val = np.random.randn(n_ops, 1, s1, mini) for r in range(reps + 3): if r == 3: start = time.time() c_val = sess.run(c, feed_dict={a_c: a_val[..., 0], x_c: x_val[:, 0]}) matmul_times[i, j, k, l] = (time.time() - start) / reps for r in range(reps + 3): if r == 3: start = time.time() d_val = sess.run(d, feed_dict={a_d: a_val, x_d: x_val}) reduce_times[i, j, k, l] = (time.time() - start) / reps assert np.allclose(c_val, d_val) fig, ax = plt.subplots(len(n_ops_range), len(mini_range), sharex=True, sharey=True) X, Y = np.meshgrid(s0_range, s1_range) Z = matmul_times - reduce_times v = np.sort(np.concatenate((np.linspace(np.min(Z), np.max(Z), 10), [0]))) for i, n_ops in enumerate(n_ops_range): for j, mini in enumerate(mini_range): cs = ax[i][j].contourf(X, Y, Z[i, j], v) if i == 0: ax[i][j].set_title("mini %d" % mini) if j == 0: ax[i][j].set_ylabel("ops %d" % n_ops) DATA_DIR = os.path.join(os.path.dirname(nengo_dl.__file__), "..", "data") np.savez(os.path.join(DATA_DIR, "matmul_benchmarks"), n_ops_range, mini_range, s0_range, s1_range, Z) fig.subplots_adjust(right=0.8) cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7]) fig.colorbar(cs, cax=cbar_ax) plt.show() @main.command() @click.option("--device", default="/gpu:0", help="TensorFlow device on which to run benchmarks") def performance_samples(device): # pragma: no cover """ Run a brief sample of the benchmarks to check overall performance. This is mainly used to quickly check that there haven't been any unexpected performance regressions. """ # TODO: automatically run some basic performance tests during CI default_kwargs = {"n_steps": 1000, "device": device, "unroll_simulation": 25, "progress_bar": False, "do_profile": False} print("cconv + relu") net = cconv(128, 64, nengo.RectifiedLinear()) run_profile(net, minibatch_size=64, **default_kwargs) print("cconv + lif") net = cconv(128, 64, nengo.LIF()) run_profile(net, minibatch_size=64, **default_kwargs) print("integrator training + relu") net = integrator(128, 32, nengo.RectifiedLinear()) run_profile(net, minibatch_size=64, train=True, **default_kwargs) print("integrator training + lif") net = integrator(128, 32, nengo.LIF()) run_profile(net, minibatch_size=64, train=True, **default_kwargs) print("random") net = random_network(128, 64, nengo.RectifiedLinear(), n_ensembles=50, connections_per_ensemble=5, seed=0) run_profile(net, **default_kwargs) print("spaun") net = spaun(1) run_profile(net, **default_kwargs) # example benchmark data # CPU: 4.00GHz Intel Core i7-6700K # GPU: NVIDIA GeForce GTX 980 Ti # TensorFlow version: 1.10.0 # Nengo version: 2.8.0 # NengoDL version: 1.2.0 # cconv + relu # Execution time: 1.0098507404327393 # cconv + lif # Execution time: 2.074916362762451 # integrator training + relu # Execution time: 1.8205187320709229 # integrator training + lif # Execution time: 2.669060707092285 # random # Execution time: 21.686023235321045 # spaun # Execution time: 9.540623426437378 if __name__ == "__main__": main(obj={}) # pragma: no cover