"""Implementation of the Nengo cache object (NCO) protocol.
Nengo cache objects store a Numpy array and some associated, picklable Python
object in a single, uncompressed file. These files are not platform independent
as they are optimized for fast reading and writing, and cached data is not
supposed to be shared across platforms.
The protocol version 0 is as follows:
* A header consisting of:
* 3 bytes with the magic string 'NCO'
* 1 unsigned byte indicating the protocol version
* unsigned long int denoting the start of the Python object data
* unsigned long int denoting the end of the Python object data
* unsigned long int denoting the start of the array data
* unsigned long int denoting the end of the array data
* Potentially some padding bytes.
* The Python object data pickled by the (c)pickle module using the highest
available protocol.
* Potentially some padding bytes.
* The array data in NPY format.
Files will be written with padding to have both the Python object data and the
array data an alignment of 16 bytes.
The Numpy NPY format is documented here:
https://numpy.org/devdocs/reference/generated/numpy.lib.format.html
As of legacy version 1 of the cache, multiple NCO files will be concatenated
into one file. The start and end of each subfile will be stored in a cache
index, but can also be recovered from reading the headers of the NCO files in
order as each one gives the start of the next header (corresponding to the
end of the array data).
"""
import os
import pickle
import struct
import numpy as np
from .cache import byte_align
from ..exceptions import CacheIOError
[docs]class Subfile:
"""A file-like object for limiting reads to a subrange of a file.
This class only supports reading and seeking. Writing is not supported.
Parameters
----------
fileobj : file-like object
Complete files.
start : int
Offset of the first readable position in the file.
end : int
Offset of the last readable position + 1 in the file.
"""
def __init__(self, fileobj, start, end):
self.fileobj = fileobj
self.start = start
self.end = end
self.max_size = end - start
self.fileobj.seek(start)
def read(self, size=None):
size = min(size, self.max_size) if size is not None else self.max_size
self.max_size -= size
return self.fileobj.read(size)
def readline(self, size=None):
size = min(size, self.max_size) if size is not None else self.max_size
data = self.fileobj.readline(size)
self.max_size = self.end - self.fileobj.tell()
return data
def seek(self, offset, whence=os.SEEK_SET):
if whence == os.SEEK_CUR:
offset = self.fileobj.tell() + offset
elif whence == os.SEEK_SET:
offset = self.start + offset
elif whence == os.SEEK_END:
offset = self.end + offset
else:
raise NotImplementedError()
offset = max(self.start, min(self.end, offset))
self.max_size = self.end - offset
self.fileobj.seek(offset)
def tell(self):
return self.fileobj.tell() - self.start
MAGIC_STRING = "NCO".encode("utf-8")
SUPPORTED_PROTOCOLS = [0]
HEADER_FORMAT = "@{}sBLLLL".format(len(MAGIC_STRING))
HEADER_SIZE = struct.calcsize(HEADER_FORMAT)
ALIGNMENT = 16
[docs]def write(fileobj, metadata, array):
"""Writes a Nengo cache object.
Parameters
----------
fileobj : file-like object
File object to write the data to.
metadata : object
Python object with metadata (will be pickled).
array : ndarray
Numpy array with the actual data to store.
"""
start = fileobj.tell()
pickle_start = byte_align(start + HEADER_SIZE, ALIGNMENT)
fileobj.seek(pickle_start)
pickle.dump(metadata, fileobj, pickle.HIGHEST_PROTOCOL)
pickle_end = fileobj.tell()
array_start = byte_align(pickle_end, ALIGNMENT)
fileobj.seek(array_start)
np.save(fileobj, array)
array_end = fileobj.tell()
header = struct.pack(
HEADER_FORMAT, MAGIC_STRING, 0, pickle_start, pickle_end, array_start, array_end
)
fileobj.seek(start)
fileobj.write(header)
fileobj.seek(array_end)
[docs]def read(fileobj):
"""Reads a Nengo cache object.
Parameters
----------
fileobj : file-like object
The file object to read from.
Returns
-------
metadata, array
Returns a tuple with the Python object containing the metadata as first
element and the array with the actual data as second element.
"""
header = fileobj.read(HEADER_SIZE)
magic, version, pickle_start, pickle_end, array_start, array_end = struct.unpack(
HEADER_FORMAT, header
)
if magic != MAGIC_STRING:
raise CacheIOError("Not a Nengo cache object file.")
if version not in SUPPORTED_PROTOCOLS:
raise CacheIOError("NCO protocol version {} is not supported.".format(version))
metadata = pickle.load(Subfile(fileobj, pickle_start, pickle_end))
array = np.load(Subfile(fileobj, array_start, array_end))
return metadata, array