Source code for nengo.utils.nco

"""Implementation of the Nengo cache object (NCO) protocol.

Nengo cache objects store a Numpy array and some associated, picklable Python
object in a single, uncompressed file. These files are not platform independent
as they are optimized for fast reading and writing, and cached data is not
supposed to be shared across platforms.

The protocol version 0 is as follows:

* A header consisting of:
    * 3 bytes with the magic string 'NCO'
    * 1 unsigned byte indicating the protocol version
    * unsigned long int denoting the start of the Python object data
    * unsigned long int denoting the end of the Python object data
    * unsigned long int denoting the start of the array data
    * unsigned long int denoting the end of the array data
* Potentially some padding bytes.
* The Python object data pickled by the (c)pickle module using the highest
  available protocol.
* Potentially some padding bytes.
* The array data in NPY format.

Files will be written with padding to have both the Python object data and the
array data an alignment of 16 bytes.

The Numpy NPY format is documented here:
https://numpy.org/devdocs/reference/generated/numpy.lib.format.html

As of legacy version 1 of the cache, multiple NCO files will be concatenated
into one file. The start and end of each subfile will be stored in a cache
index, but can also be recovered from reading the headers of the NCO files in
order as each one gives the start of the next header (corresponding to the
end of the array data).
"""

import os
import pickle
import struct

import numpy as np

from .cache import byte_align
from ..exceptions import CacheIOError


[docs]class Subfile:
    """A file-like object for limiting reads to a subrange of a file.

    This class only supports reading and seeking. Writing is not supported.

    Parameters
    ----------
    fileobj : file-like object
        Complete files.
    start : int
        Offset of the first readable position in the file.
    end : int
        Offset of the last readable position + 1 in the file.
    """

    def __init__(self, fileobj, start, end):
        self.fileobj = fileobj
        self.start = start
        self.end = end
        self.max_size = end - start

        self.fileobj.seek(start)

    def read(self, size=None):
        size = min(size, self.max_size) if size is not None else self.max_size
        self.max_size -= size
        return self.fileobj.read(size)

    def readline(self, size=None):
        size = min(size, self.max_size) if size is not None else self.max_size
        data = self.fileobj.readline(size)
        self.max_size = self.end - self.fileobj.tell()
        return data

    def seek(self, offset, whence=os.SEEK_SET):
        if whence == os.SEEK_CUR:
            offset = self.fileobj.tell() + offset
        elif whence == os.SEEK_SET:
            offset = self.start + offset
        elif whence == os.SEEK_END:
            offset = self.end + offset
        else:
            raise NotImplementedError()
        offset = max(self.start, min(self.end, offset))
        self.max_size = self.end - offset
        self.fileobj.seek(offset)

    def tell(self):
        return self.fileobj.tell() - self.start


MAGIC_STRING = "NCO".encode("utf-8")
SUPPORTED_PROTOCOLS = [0]
HEADER_FORMAT = "@{}sBLLLL".format(len(MAGIC_STRING))
HEADER_SIZE = struct.calcsize(HEADER_FORMAT)
ALIGNMENT = 16


[docs]def write(fileobj, metadata, array):
    """Writes a Nengo cache object.

    Parameters
    ----------
    fileobj : file-like object
        File object to write the data to.
    metadata : object
        Python object with metadata (will be pickled).
    array : ndarray
        Numpy array with the actual data to store.
    """
    start = fileobj.tell()
    pickle_start = byte_align(start + HEADER_SIZE, ALIGNMENT)
    fileobj.seek(pickle_start)
    pickle.dump(metadata, fileobj, pickle.HIGHEST_PROTOCOL)
    pickle_end = fileobj.tell()

    array_start = byte_align(pickle_end, ALIGNMENT)
    fileobj.seek(array_start)
    np.save(fileobj, array)
    array_end = fileobj.tell()

    header = struct.pack(
        HEADER_FORMAT, MAGIC_STRING, 0, pickle_start, pickle_end, array_start, array_end
    )
    fileobj.seek(start)
    fileobj.write(header)
    fileobj.seek(array_end)


[docs]def read(fileobj):
    """Reads a Nengo cache object.

    Parameters
    ----------
    fileobj : file-like object
        The file object to read from.

    Returns
    -------
    metadata, array
        Returns a tuple with the Python object containing the metadata as first
        element and the array with the actual data as second element.
    """
    header = fileobj.read(HEADER_SIZE)
    magic, version, pickle_start, pickle_end, array_start, array_end = struct.unpack(
        HEADER_FORMAT, header
    )

    if magic != MAGIC_STRING:
        raise CacheIOError("Not a Nengo cache object file.")
    if version not in SUPPORTED_PROTOCOLS:
        raise CacheIOError("NCO protocol version {} is not supported.".format(version))

    metadata = pickle.load(Subfile(fileobj, pickle_start, pickle_end))
    array = np.load(Subfile(fileobj, array_start, array_end))
    return metadata, array