Source code for pymodaq_data.h5modules.saving

# -*- coding: utf-8 -*-
"""
Created the 15/11/2022

@author: Sebastien Weber
"""
import copy
import datetime
import enum

from dateutil import parser
from numbers import Number
import os
from pathlib import Path
from typing import Union, Iterable


import numpy as np

from pymodaq_utils.logger import set_logger, get_module_name

from pymodaq_utils import utils
from pymodaq_utils.config import GlobalConfig as Config
from pymodaq_data.data import DataDim, DataToExport, Axis, DataWithAxes
from pymodaq_utils.enums import BaseEnum, enum_checker
from pymodaq_utils.warnings import deprecation_msg


from .backends import (H5Backend, backends_available, SaveType, InvalidSave, InvalidExport,
                       Node, GroupType, InvalidDataDimension, InvalidScanType,
                       GROUP, VLARRAY)
from . import browsing


config = Config()
logger = set_logger(get_module_name(__file__))



[docs]
class FileType(BaseEnum):
    detector = 0
    actuator = 1
    axis = 2
    scan = 3

    


[docs]
class DataType(BaseEnum):
    data = 'Data'
    axis = 'Axis'
    live_scan = 'Live'
    external_h5 = 'ExtData'
    strings = 'Strings'
    bkg = 'Bkg'
    data_enlargeable = 'EnlData'
    error = 'ErrorBar'




[docs]
class H5SaverLowLevel(H5Backend):
    """Object containing basic methods in order to structure and interact with a h5file compatible
    with the h5browser

    See Also
    --------
    H5Browser

    Attributes
    ----------
    h5_file: pytables hdf5 file
        object used to save all datas and metadas
    h5_file_path: str or Path
        The file path
    """

    def __init__(self, save_type: SaveType = 'scan', backend: str = None):
        if backend is None:
            backend = config('data', 'data_saving', 'backend')[0]
        H5Backend.__init__(self, backend)

        self.save_type = enum_checker(SaveType, save_type)

        self.h5_file_path = None
        self.h5_file_name = None
        self.file_loaded = False

        self._current_group = None
        self._raw_group: Union[GROUP, str] = '/RawData'
        self._logger_array = None

        self._flush_interval = 0
        self._write_count = 0
        fill_str = config('data', 'data_saving', 'data_type', 'fill_value')[0]
        self.fill_value: float = np.nan if fill_str == 'nan' else float(fill_str)


[docs]
    @classmethod
    def from_file(cls, path: Union[Path, str], save_type: SaveType = 'scan',
                  new_file: bool = False, metadata: dict = None) -> 'H5SaverLowLevel':
        """Create and initialise an H5SaverLowLevel from a file path.

        Convenience factory that combines the constructor and :meth:`init_file`
        call into a single expression.
        Parameters
        ----------
        path: Path or str
            Full path to the HDF5 file.
        save_type: SaveType
            Type label stored in the file (default ``'scan'``).
        new_file: bool
            If ``True`` a new file is created, otherwise the existing file is
            opened for appending.
        metadata: dict or None
            Extra attributes written to the raw-data group on creation.

        Returns
        -------
        H5SaverLowLevel
            A fully initialised instance ready for reading/writing.
        """
        h5saver = cls(save_type)
        h5saver.init_file(file_name=Path(path), new_file=new_file, metadata=metadata)
        return h5saver



[docs]
    def set_swmr_flush_interval(self, interval: int):
        """Set how often to flush data for SWMR readers.

        Parameters
        ----------
        interval: int
            0 = flush only at end, N = every N writes
        """
        self._flush_interval = interval
        self._write_count = 0



[docs]
    def tick_flush(self):
        """Increment the write counter and flush if the interval is reached.

        No-op if SWMR is not active or flush interval is 0.
        To be called by data savers after each logical data write.
        """
        if not self.is_swmr_active or self._flush_interval <= 0:
            return
        self._write_count += 1
        if self._write_count % self._flush_interval == 0:
            self.flush()


    @property
    def raw_group(self):
        return self._raw_group

    @property
    def h5_file(self):
        return self._h5file


[docs]
    def init_file(self, file_name: Path, raw_group_name='RawData', new_file=False,
                  metadata: dict = None, swmr_mode: bool = False):
        """Initializes a new h5 file,

        Should have an extension with h5 in it.

        Parameters
        ----------
        file_name: Path
            a complete Path pointing to a h5 file
        raw_group_name: str
            Base node name
        new_file: bool
            If True create a new file, otherwise append to a potential existing one
        metadata: dict
            A dictionary to be saved as attributes
        swmr_mode: bool
            If True, prepare the file for SWMR (h5py backend only)

        Returns
        -------
        bool
            True if new file has been created, False otherwise
        """
        datetime_now = datetime.datetime.now()

        if file_name is not None and isinstance(file_name, Path):
            if 'h5' not in file_name.suffix:
                self.h5_file_name = file_name.stem + ".h5"
            else:
                self.h5_file_name = file_name.name
            self.h5_file_path = file_name.parent
            fullpath = self.h5_file_path.joinpath(self.h5_file_name)
            if not fullpath.is_file():
                new_file = True
            elif swmr_mode and not new_file and self.is_swmr_capable:
                # SWMR requires the file to have been created with libver='latest'
                # (superblock v3+). Check for the marker attribute.
                try:
                    with self.h5_library.File(str(fullpath), 'r') as tmp:
                        if not tmp.attrs.get('swmr_compatible', False):
                            logger.info('Existing file is not SWMR-compatible, '
                                        'creating a new file')
                            new_file = True
                except Exception:
                    new_file = True

        else:
            return

        self.close_file()
        self.open_file(self.h5_file_path.joinpath(self.h5_file_name), 'w' if new_file else 'a',
                       title='PyMoDAQ file', swmr_mode=swmr_mode)

        self._raw_group = self.get_set_group(self.root(), raw_group_name, title='Data from PyMoDAQ modules')
        self.get_set_logger(self._raw_group)

        if new_file:
            self._raw_group.attrs['type'] = self.save_type.name  # first possibility to set a node attribute
            self.root().set_attr('file', self.h5_file_name)  # second possibility

            self.set_attr(self.root(), 'date', datetime_now.date().isoformat())
            self.set_attr(self.root(), 'time', datetime_now.time().isoformat())

            if metadata is not None:
                for metadata_key in metadata:
                    self._raw_group.attrs[metadata_key] = metadata[metadata_key]



[docs]
    def save_file(self, filename=None):
        if isinstance(filename, str) or isinstance(filename, Path) and filename != '':
            file_path = Path(filename)
            if str(file_path) != '':
                super().save_file_as(filename)



[docs]
    def get_set_logger(self, where: Node = None) -> VLARRAY:
        """ Retrieve or create (if absent) a logger enlargeable array to store logs
        Get attributed to the class attribute ``logger_array``
        Parameters
        ----------
        where: node
               location within the tree where to save or retrieve the array

        Returns
        -------
        vlarray
            enlargeable array accepting strings as elements
        """
        if where is None:
            where = self.raw_group
        if isinstance(where, Node):
            where = where.node
        logger = 'Logger'
        if logger not in list(self.get_children(where)):
            # check if logger node exist
            self._logger_array = self.add_string_array(where, logger)
            self._logger_array.attrs['type'] = 'log'
        else:
            self._logger_array = self.get_node(where, name=logger)
        return self._logger_array



[docs]
    def add_log(self, msg):
        self._logger_array.append(msg)



[docs]
    def add_string_array(self, where, name, title='', metadata=dict([])):
        array = self.create_vlarray(where, name, dtype='string', title=title)
        array.attrs['shape'] = (0,)
        array.attrs['data_type'] = 'strings'

        for metadat in metadata:
            array.attrs[metadat] = metadata[metadat]
        return array

    

[docs]
    def add_array(self, where: Union[GROUP, str], name: str, data_type: DataType, array_to_save: np.ndarray = None,
                  data_shape: tuple = None, array_type: np.dtype = None, fill_value=None, data_dimension: DataDim = None,
                  scan_shape: tuple = tuple([]), add_scan_dim=False, enlargeable: bool = False,
                  title: str = '', metadata=dict([])):

        """save data arrays on the hdf5 file together with metadata
        Parameters
        ----------
        where: GROUP
            node where to save the array
        name: str
            name of the array in the hdf5 file
        data_type: DataType
            mandatory so that the h5Browser can interpret correctly the array
        data_shape: Iterable
            the shape of the array to save, mandatory if array_to_save is None
        data_dimension: DataDim
         The data's dimension
        scan_shape: Iterable
            the shape of the scan dimensions
        title: str
            the title attribute of the array node
        array_to_save: ndarray or None
            data to be saved in the array. If None, array_type and data_shape should be specified in order to init
            correctly the memory
        array_type: np.dtype or numpy types
            eg np.float, np.int32 ...
        fill_value: float or int
            value to be used to fill the array if array_to_save is None
        enlargeable: bool
            if False, data are saved as a CARRAY, otherwise as a EARRAY (for ragged data, see add_string_array)
        metadata: dict
            dictionnary whose keys will be saved as the array attributes
        add_scan_dim: if True, the scan axes dimension (scan_shape iterable) is prepended to the array shape on the hdf5
                      In that case, the array is usually initialized as zero and further populated

        Returns
        -------
        array (CARRAY or EARRAY)

        See Also
        --------
        add_data, add_string_array
        """
        if array_type is None:
            if array_to_save is None:
                array_type = config('data', 'data_saving', 'data_type', 'dynamic')[0]
            else:
                array_type = array_to_save.dtype

        data_type = enum_checker(DataType, data_type)
        data_dimension = enum_checker(DataDim, data_dimension)

        if enlargeable:
            # if data_shape == (1,):
            #     data_shape = None
            array = self.create_earray(where, utils.capitalize(name), dtype=np.dtype(array_type),
                                       data_shape=data_shape, title=title)
        else:
            if add_scan_dim:  # means it is an array initialization to zero
                shape = list(scan_shape[:])
                if not(len(data_shape) == 1 and data_shape[0] == 1):  # means data are not ndarrays of scalars
                    shape.extend(data_shape)
                if array_to_save is None:
                    fill_value = self.fill_value if fill_value is None else fill_value
                    array_to_save = np.full(shape, fill_value, dtype=np.dtype(array_type))

            array = self.create_carray(where, utils.capitalize(name), obj=array_to_save, title=title)
        self.set_attr(array, 'data_type', data_type.name)
        self.set_attr(array, 'data_dimension', data_dimension.name)

        for metadat in metadata:
            self.set_attr(array, metadat, metadata[metadat])
        return array



[docs]
    def get_set_group(self, where, name, title='', **kwargs):
        """Get the group located at where if it exists otherwise creates it

        This also set the _current_group property
        """

        self._current_group = super().get_set_group(where, name, title, **kwargs)
        return self._current_group



[docs]
    def get_groups(self, where: Union[str, GROUP], group_type: Union[str, GroupType, BaseEnum]):
        """Get all groups hanging from a Group and of a certain type"""
        groups = []
        if isinstance(group_type, enum.Enum):
            group_type = group_type.name
        for node_name in list(self.get_children(where)):
            group = self.get_node(where, node_name)
            if 'type' in group.attrs and group.attrs['type'].lower() == group_type.lower():
                groups.append(group)
        return groups



[docs]
    def get_last_group(self, where: GROUP, group_type: Union[str, GroupType, enum.Enum]):
        groups = self.get_groups(where, group_type)
        if len(groups) != 0:
            return groups[-1]
        else:
            return None



[docs]
    def get_node_from_attribute_match(self, where, attr_name, attr_value):
        """Get a Node starting from a given node (Group) matching a pair of node attribute name and value"""
        for node in self.walk_nodes(where):
            if attr_name in node.attrs and node.attrs[attr_name] == attr_value:
                return node



[docs]
    def get_node_from_title(self, where, title: str):
        """Get a Node starting from a given node (Group) matching the given title"""
        return self.get_node_from_attribute_match(where, 'TITLE', title)



[docs]
    def add_data_group(self, where, data_dim: DataDim, title='', settings_as_xml='', metadata=None,
                       group_name: str = None):
        """Creates a group node at given location in the tree

        Parameters
        ----------
        where: group node
               where to create data group
        data_dim: DataDim
            the dimensionality of the data group
        title: str, optional
               a title for this node, will be saved as metadata
        settings_as_xml: str, optional
                         XML string created from a Parameter object to be saved as metadata
        metadata: dict, optional
                  will be saved as a new metadata attribute with name: key and value: dict value
        group_name: str, optional
            the name of the group to create if None, the name of the DataDim enum is used (default)

        Returns
        -------
        group: group node

        See Also
        --------
        :py:meth:`add_group`
        """
        data_dim = enum_checker(DataDim, data_dim)
        if group_name is None:
            group_name = data_dim.name
        if metadata is None:
            metadata = {}

        metadata.update(settings=settings_as_xml)
        group = self.add_group(group_name, GroupType.data, where, title, metadata)
        return group



[docs]
    def add_incremental_group(self, group_type: Union[str, GroupType, enum.Enum], where, title='', settings_as_xml='',
                              metadata=None):
        """
        Add a node in the h5 file tree of the group type with an increment in the given name
        Parameters
        ----------
        group_type: str or GroupType enum
            one of the possible values of **group_types**
        where: str or node
            parent node where to create the new group
        title: str
            node title
        settings_as_xml: str
            XML string containing Parameter representation
        metadata: dict
            extra metadata to be saved with this new group node

        Returns
        -------
        node: newly created group node
        """
        if metadata is None:
            metadata = {}
        if isinstance(group_type, enum.Enum):
            group_type = group_type.name

        nodes = [name for name in self.get_children(self.get_node(where))]
        nodes_tmp = []
        for node in nodes:
            if utils.capitalize(group_type.lower()) in node:
                nodes_tmp.append(node)
        nodes_tmp.sort()
        if len(nodes_tmp) == 0:
            ind_group = -1
        else:
            ind_group = int(nodes_tmp[-1][-3:])
        group = self.get_set_group(where, f'{utils.capitalize(group_type.lower())}{ind_group + 1:03d}', title)
        self.set_attr(group, 'settings', settings_as_xml)
        if group_type.lower() != 'ch':
            self.set_attr(group, 'type', group_type.lower())
        else:
            self.set_attr(group, 'type', '')
        for metadat in metadata:
            self.set_attr(group, metadat, metadata[metadat])
        return group



[docs]
    def add_act_group(self, where, title='', settings_as_xml='', metadata=None):
        """
        Add a new group of type detector
        See Also
        -------
        add_incremental_group
        """
        if metadata is None:
            metadata = {}
        group = self.add_incremental_group('actuator', where, title, settings_as_xml, metadata)
        return group



[docs]
    def add_det_group(self, where, title='', settings_as_xml='', metadata=None):
        """
        Add a new group of type detector
        See Also
        -------
        add_incremental_group
        """
        if metadata is None:
            metadata = {}
        group = self.add_incremental_group('detector', where, title, settings_as_xml, metadata)
        return group



[docs]
    def add_generic_group(self, where='/RawData', title='', settings_as_xml='', metadata=None,
                          group_type=GroupType.scan):
        """Add a new group of type given by the input argument group_type

        At creation adds the attributes description to be used elsewhere

        See Also
        -------
        add_incremental_group
        """
        if metadata is None:
            metadata = {}
        metadata.update(dict(description=''))
        group = self.add_incremental_group(group_type, where, title, settings_as_xml, metadata)
        return group



[docs]
    def add_scan_group(self, where='/RawData', title='', settings_as_xml='', metadata=None):
        """Add a new group of type scan

        deprecated, use add_generic_group with a group type as GroupType.scan
        """
        if metadata is None:
            metadata = {}
        metadata.update(dict(description=''))
        group = self.add_generic_group(where, title, settings_as_xml, metadata, group_type=GroupType.scan)
        return group



[docs]
    def add_ch_group(self, where, title='', settings_as_xml='', metadata=None):
        """
        Add a new group of type channel
        See Also
        -------
        add_incremental_group
        """
        if metadata is None:
            metadata = {}
        group = self.add_incremental_group('ch', where, title, settings_as_xml, metadata)
        return group




[docs]
    def add_move_group(self, where, title='', settings_as_xml='', metadata=None):
        """
        Add a new group of type actuator
        See Also
        -------
        add_incremental_group
        """
        if metadata is None:
            metadata = {}
        group = self.add_incremental_group('actuator', where, title, settings_as_xml, metadata)
        return group