Skip to content

utils

sleap_io.io.utils

Miscellaneous utilities for working with different I/O formats.

convert_predictions_to_dataframe(labels)

Convert predictions data to a Pandas dataframe.

Parameters:

Name Type Description Default
labels Labels

A general label object.

required

Returns:

Type Description
DataFrame

pd.DataFrame: A pandas data frame with the structured data with hierarchical columns. The column hierarchy is: "video_path", "skeleton_name", "track_name", "node_name", And it is indexed by the frames.

Raises:

Type Description
ValueError

If no frames in the label objects contain predicted instances.

Source code in sleap_io/io/utils.py
def convert_predictions_to_dataframe(labels: Labels) -> pd.DataFrame:
    """Convert predictions data to a Pandas dataframe.

    Args:
        labels: A general label object.

    Returns:
        pd.DataFrame: A pandas data frame with the structured data with
        hierarchical columns. The column hierarchy is:
                "video_path",
                "skeleton_name",
                "track_name",
                "node_name",
        And it is indexed by the frames.

    Raises:
        ValueError: If no frames in the label objects contain predicted instances.
    """
    # Form pairs of labeled_frames and predicted instances
    labeled_frames = labels.labeled_frames
    all_frame_instance_tuples: Generator[
        tuple[LabeledFrame, PredictedInstance], None, None
    ] = (
        (label_frame, instance)  # type: ignore
        for label_frame in labeled_frames
        for instance in label_frame.predicted_instances
    )

    # Extract the data
    data_list = list()
    for labeled_frame, instance in all_frame_instance_tuples:
        # Traverse the nodes of the instances's skeleton
        skeleton = instance.skeleton
        for node in skeleton.nodes:
            row_dict = dict(
                frame_idx=labeled_frame.frame_idx,
                x=instance.points[node].x,
                y=instance.points[node].y,
                score=instance.points[node].score,  # type: ignore[attr-defined]
                node_name=node.name,
                skeleton_name=skeleton.name,
                track_name=instance.track.name if instance.track else "untracked",
                video_path=labeled_frame.video.filename,
            )
            data_list.append(row_dict)

    if not data_list:
        raise ValueError("No predicted instances found in labels object")

    labels_df = pd.DataFrame(data_list)

    # Reformat the data with columns for dict-like hierarchical data access.
    index = [
        "skeleton_name",
        "track_name",
        "node_name",
        "video_path",
        "frame_idx",
    ]

    labels_tidy_df = (
        labels_df.set_index(index)
        .unstack(level=[0, 1, 2, 3])
        .swaplevel(0, -1, axis=1)  # video_path on top while x, y score on bottom
        .sort_index(axis=1)  # Better format for columns
        .sort_index(axis=0)  # Sorts by frames
    )

    return labels_tidy_df

read_hdf5_attrs(filename, dataset='/', attribute=None)

Read attributes from an HDF5 dataset.

Parameters:

Name Type Description Default
filename str

Path to an HDF5 file.

required
dataset str

Path to a dataset or group from which attributes will be read.

'/'
attribute Optional[str]

If specified, the attribute name to read. If None (the default), all attributes for the dataset will be returned.

None

Returns:

Type Description
Union[Any, dict[str, Any]]

The attributes in a dictionary, or the attribute field if attribute was provided.

Source code in sleap_io/io/utils.py
def read_hdf5_attrs(
    filename: str, dataset: str = "/", attribute: Optional[str] = None
) -> Union[Any, dict[str, Any]]:
    """Read attributes from an HDF5 dataset.

    Args:
        filename: Path to an HDF5 file.
        dataset: Path to a dataset or group from which attributes will be read.
        attribute: If specified, the attribute name to read. If `None` (the default),
            all attributes for the dataset will be returned.

    Returns:
        The attributes in a dictionary, or the attribute field if `attribute` was
        provided.
    """
    with h5py.File(filename, "r") as f:
        ds = f[dataset]
        if attribute is None:
            data = dict(ds.attrs)
        else:
            data = ds.attrs[attribute]
    return data

read_hdf5_dataset(filename, dataset)

Read data from an HDF5 file.

Parameters:

Name Type Description Default
filename str

Path to an HDF5 file.

required
dataset str

Path to a dataset.

required

Returns:

Type Description
ndarray

The data as an array.

Source code in sleap_io/io/utils.py
def read_hdf5_dataset(filename: str, dataset: str) -> np.ndarray:
    """Read data from an HDF5 file.

    Args:
        filename: Path to an HDF5 file.
        dataset: Path to a dataset.

    Returns:
        The data as an array.
    """
    with h5py.File(filename, "r") as f:
        data = f[dataset][()]
    return data

read_hdf5_group(filename, group='/')

Read an entire group from an HDF5 file.

Parameters:

Name Type Description Default
filename str

Path an HDF5 file.

required
group str

Path to a group within the HDF5 file. Defaults to "/" (read the entire file).

'/'

Returns:

Type Description
dict[str, ndarray]

A flat dictionary with keys corresponding to dataset paths and values corresponding to the datasets as arrays.

Source code in sleap_io/io/utils.py
def read_hdf5_group(filename: str, group: str = "/") -> dict[str, np.ndarray]:
    """Read an entire group from an HDF5 file.

    Args:
        filename: Path an HDF5 file.
        group: Path to a group within the HDF5 file. Defaults to "/" (read the entire
            file).

    Returns:
        A flat dictionary with keys corresponding to dataset paths and values
        corresponding to the datasets as arrays.
    """
    data = {}

    def read_datasets(k, v):
        if type(v) == h5py.Dataset:
            data[v.name] = v[()]

    with h5py.File(filename, "r") as f:
        f[group].visititems(read_datasets)

    return data

write_hdf5_attrs(filename, dataset, attributes)

Write attributes to an HDF5 dataset.

Parameters:

Name Type Description Default
filename str

Path to an HDF5 file.

required
dataset str

Path to a dataset or group to which attributes will be written.

required
attributes dict[str, Any]

The attributes in a dictionary with the keys as the attribute names.

required
Source code in sleap_io/io/utils.py
def write_hdf5_attrs(filename: str, dataset: str, attributes: dict[str, Any]):
    """Write attributes to an HDF5 dataset.

    Args:
        filename: Path to an HDF5 file.
        dataset: Path to a dataset or group to which attributes will be written.
        attributes: The attributes in a dictionary with the keys as the attribute names.
    """

    def _overwrite_hdf5_attr(
        group_or_dataset: Union[h5py.Group, h5py.Dataset], attr_name: str, data: Any
    ):
        """Overwrite attribute for group or dataset in HDF5 file.

        Args:
            group_or_dataset: Path to group or dataset in HDF5 file.
            attr_name: Name of attribute.
            data: Data to write to attribute.
        """
        try:
            del group_or_dataset.attrs[attr_name]
        except KeyError:
            pass
        group_or_dataset.attrs.create(attr_name, data)

    with h5py.File(filename, "a") as f:  # "a": read/write if exists, create otherwise
        ds = f[dataset]
        for attr_name, attr_value in attributes.items():
            _overwrite_hdf5_attr(ds, attr_name, attr_value)

write_hdf5_dataset(filename, dataset, data)

Write data to an HDF5 file.

Parameters:

Name Type Description Default
filename str

Path to an HDF5 file.

required
dataset str

Path to a dataset.

required
data ndarray

Data to write to dataset.

required
Source code in sleap_io/io/utils.py
def write_hdf5_dataset(filename: str, dataset: str, data: np.ndarray):
    """Write data to an HDF5 file.

    Args:
        filename: Path to an HDF5 file.
        dataset: Path to a dataset.
        data: Data to write to dataset.
    """
    with h5py.File(filename, "a") as f:  # "a": read/write if exists, create otherwise
        _overwrite_hdf5_dataset(f, dataset, data)

write_hdf5_group(filename, data)

Write an entire group to an HDF5 file.

Parameters:

Name Type Description Default
filename str

Path an HDF5 file.

required
data dict[str, ndarray]

A dictionary with keys corresponding to dataset/group paths and values corresponding to either sub group paths or the datasets as arrays.

required
Source code in sleap_io/io/utils.py
def write_hdf5_group(filename: str, data: dict[str, np.ndarray]):
    """Write an entire group to an HDF5 file.

    Args:
        filename: Path an HDF5 file.
        data: A dictionary with keys corresponding to dataset/group paths and values
            corresponding to either sub group paths or the datasets as arrays.
    """

    def overwrite_hdf5_group(
        file_or_group: Union[h5py.File, h5py.Group], group_name: str
    ) -> h5py.Group:
        """Overwrite group in HDF5 file.

        Args:
            file_or_group: Path to an HDF5 file or parent group.
            group_name: Path to a group.

        Return:
            group: (Sub-)group under specified file or parent group.
        """
        try:
            del file_or_group[group_name]
        except KeyError:
            pass
        group = file_or_group.create_group(group_name)
        return group

    def write_group(parent_group, data_to_write):
        for name, dataset_or_group in data_to_write.items():
            if isinstance(dataset_or_group, dict):
                # Create (sub-)group under parent group (top level being the file)
                group = overwrite_hdf5_group(parent_group, name)
                write_group(group, dataset_or_group)  # Recall with new parent
            else:
                # Create dataset if dataset_or_group is a dataset
                _overwrite_hdf5_dataset(
                    f=parent_group, dataset=name, data=dataset_or_group
                )

    with h5py.File(filename, "a") as f:  # "a": read/write if exists, create otherwise
        write_group(f, data)