""" Sets the default options for HDF5 data.

Author: Nils Geib,
import numpy as np
from types import SimpleNamespace as SN

[docs]class HDF5Options: """ A class that handles the correct HDF5 options for different data sets. The reason is simply that native HDF5 compression will actually increase the file size for small arrays (< 300 bytes). This class selects different HDF5 options based on the dataset over the method ``__call__``. It can be subclassed to support more sophisticated selection strategies. """
[docs] def __init__(self): self.compression_threshold = 300 # bytes self.libver = 'latest' self.driver = None self.kwds = dict() self.encoding = 'utf-8' self.compressed_dataset = SN( compression='gzip', chunks=True, fletcher32=True, shuffle=True, compression_opts=9) self.dataset = SN( compression=None, chunks=None, fletcher32=False, shuffle=False)
[docs] def copy(self): ret = HDF5Options() ret.compression_threshold = self.compression_threshold ret.libver = self.libver ret.driver = self.driver ret.encoding = self.encoding ret.kwds = dict(**self.kwds) ret.compressed_dataset = SN(**self.compressed_dataset.__dict__) ret.dataset = SN(**self.dataset.__dict__) return ret
def __call__(self, arr): ''' Returns the correct dataset creation options for an array. ''' arr = np.asanyarray(arr) if arr.nbytes > self.compression_threshold: kwargs = self.compressed_dataset.__dict__ else: kwargs = self.dataset.__dict__ return kwargs