Coco

`sleap_io.io.coco` ¶

Handles direct I/O operations for working with COCO-style pose datasets.

COCO-style pose format specification: - JSON annotation files containing images, annotations, and categories - Image directory structure can vary (flat, categorized, nested, multi-source) - Keypoint annotations with coordinates and visibility flags - Support for multiple animal categories with different skeletons - Visibility encoding: binary (0/1) or ternary (0/½)

Functions:

Name	Description
`create_skeleton_from_category`	Create a Skeleton object from a COCO category definition.
`decode_keypoints`	Decode COCO keypoint format to numpy array for Instance creation.
`parse_coco_json`	Parse COCO annotation JSON file and validate structure.
`read_labels`	Read COCO-style pose dataset and return a Labels object.
`read_labels_set`	Read multiple COCO annotation files and return a dictionary of Labels.
`resolve_image_path`	Resolve image file path handling various directory structures.

`create_skeleton_from_category(category)` ¶

Create a Skeleton object from a COCO category definition.

Parameters:

Name	Type	Description	Default
`category`	`Dict`	COCO category dictionary with keypoints and skeleton.	required

Returns:

Type	Description
`Skeleton`	Skeleton object corresponding to the category.

Source code in sleap_io/io/coco.py

def create_skeleton_from_category(category: Dict) -> Skeleton:
    """Create a Skeleton object from a COCO category definition.

    Args:
        category: COCO category dictionary with keypoints and skeleton.

    Returns:
        Skeleton object corresponding to the category.
    """
    if "keypoints" not in category:
        raise ValueError(f"Category '{category['name']}' has no keypoint definitions")

    # Create nodes from keypoint names
    keypoint_names = category["keypoints"]
    nodes = [Node(name) for name in keypoint_names]

    # Create edges from skeleton connections
    edges = []
    if "skeleton" in category:
        for connection in category["skeleton"]:
            if len(connection) == 2:
                # COCO skeleton uses 1-based indexing
                src_idx, dst_idx = connection[0] - 1, connection[1] - 1
                if 0 <= src_idx < len(nodes) and 0 <= dst_idx < len(nodes):
                    edges.append(Edge(nodes[src_idx], nodes[dst_idx]))

    skeleton_name = category.get("name", "unknown")
    return Skeleton(nodes, edges, name=skeleton_name)

`decode_keypoints(keypoints, num_keypoints, skeleton)` ¶

Decode COCO keypoint format to numpy array for Instance creation.

Parameters:

Name	Type	Description	Default
`keypoints`	`List[float]`	Flat list of [x1, y1, v1, x2, y2, v2, ...] values.	required
`num_keypoints`	`int`	Number of keypoints (for validation).	required
`skeleton`	`Skeleton`	Skeleton object defining the keypoint structure.	required

Returns:

Type	Description
`ndarray`	Numpy array of shape (num_keypoints, 3) with [x, y, visibility] values.

Source code in sleap_io/io/coco.py

def decode_keypoints(
    keypoints: List[float], num_keypoints: int, skeleton: Skeleton
) -> np.ndarray:
    """Decode COCO keypoint format to numpy array for Instance creation.

    Args:
        keypoints: Flat list of [x1, y1, v1, x2, y2, v2, ...] values.
        num_keypoints: Number of keypoints (for validation).
        skeleton: Skeleton object defining the keypoint structure.

    Returns:
        Numpy array of shape (num_keypoints, 3) with [x, y, visibility] values.
    """
    if len(keypoints) != num_keypoints * 3:
        raise ValueError(
            f"Keypoints length {len(keypoints)} doesn't match expected "
            f"{num_keypoints * 3}"
        )

    if len(skeleton.nodes) != num_keypoints:
        raise ValueError(
            f"Skeleton has {len(skeleton.nodes)} nodes but annotation has "
            f"{num_keypoints} keypoints"
        )

    points = []
    for i in range(num_keypoints):
        x = keypoints[i * 3]
        y = keypoints[i * 3 + 1]
        visibility = keypoints[i * 3 + 2]

        # Handle different visibility encodings
        # 0 = not labeled/not visible, 1 = labeled but not visible,
        # 2 = labeled and visible
        # For binary encoding: 0 = not visible, 1 = visible
        if visibility == 0:
            # Not labeled or not visible - use NaN coordinates
            points.append([np.nan, np.nan, False])
        elif visibility == 1:
            # Labeled but not visible (occluded) OR visible (in binary encoding)
            # For now, treat as visible since we can't distinguish binary vs ternary
            points.append([x, y, True])
        elif visibility == 2:
            # Labeled and visible
            points.append([x, y, True])
        else:
            # Unknown visibility value, default to visible
            points.append([x, y, True])

    return np.array(points, dtype=np.float32)

`parse_coco_json(json_path)` ¶

Parse COCO annotation JSON file and validate structure.

Parameters:

Name	Type	Description	Default
`json_path`	`Union[str, Path]`	Path to the COCO annotation JSON file.	required

Returns:

Type	Description
`Dict`	Parsed COCO annotation dictionary.

Raises:

Type	Description
`FileNotFoundError`	If JSON file doesn't exist.
`ValueError`	If JSON structure is invalid.

Source code in sleap_io/io/coco.py

def parse_coco_json(json_path: Union[str, Path]) -> Dict:
    """Parse COCO annotation JSON file and validate structure.

    Args:
        json_path: Path to the COCO annotation JSON file.

    Returns:
        Parsed COCO annotation dictionary.

    Raises:
        FileNotFoundError: If JSON file doesn't exist.
        ValueError: If JSON structure is invalid.
    """
    json_path = Path(json_path)

    if not json_path.exists():
        raise FileNotFoundError(f"COCO annotation file not found: {json_path}")

    with open(json_path, "r") as f:
        data = json.load(f)

    # Validate required COCO fields
    required_fields = ["images", "annotations", "categories"]
    for field in required_fields:
        if field not in data:
            raise ValueError(f"Missing required COCO field: {field}")

    # Validate that we have pose data (keypoints in categories)
    has_keypoints = any("keypoints" in cat for cat in data["categories"])
    if not has_keypoints:
        raise ValueError(
            "No keypoint definitions found in categories. "
            "This appears to be a detection-only COCO dataset."
        )

    return data

`read_labels(json_path, dataset_root=None, grayscale=False)` ¶

Read COCO-style pose dataset and return a Labels object.

Parameters:

Name	Type	Description	Default
`json_path`	`Union[str, Path]`	Path to the COCO annotation JSON file.	required
`dataset_root`	`Optional[Union[str, Path]]`	Root directory of the dataset. If None, uses parent directory of json_path.	`None`
`grayscale`	`bool`	If True, load images as grayscale (1 channel). If False, load as RGB (3 channels). Default is False.	`False`

Returns:

Type	Description
`Labels`	Parsed labels as a Labels instance.

Source code in sleap_io/io/coco.py

def read_labels(
    json_path: Union[str, Path],
    dataset_root: Optional[Union[str, Path]] = None,
    grayscale: bool = False,
) -> Labels:
    """Read COCO-style pose dataset and return a Labels object.

    Args:
        json_path: Path to the COCO annotation JSON file.
        dataset_root: Root directory of the dataset. If None, uses parent directory
                     of json_path.
        grayscale: If True, load images as grayscale (1 channel). If False, load as
                   RGB (3 channels). Default is False.

    Returns:
        Parsed labels as a Labels instance.
    """
    json_path = Path(json_path)

    if dataset_root is None:
        dataset_root = json_path.parent
    else:
        dataset_root = Path(dataset_root)

    # Parse COCO annotation file
    coco_data = parse_coco_json(json_path)

    # Create skeletons from categories
    skeletons = {}
    for category in coco_data["categories"]:
        if "keypoints" in category:
            skeleton = create_skeleton_from_category(category)
            skeletons[category["id"]] = skeleton

    # Track management: maps track_id -> Track object
    track_dict = {}

    # Create image id to annotation mapping
    image_annotations = {}
    for annotation in coco_data["annotations"]:
        image_id = annotation["image_id"]
        if image_id not in image_annotations:
            image_annotations[image_id] = []
        image_annotations[image_id].append(annotation)

    # Group images by shape (height, width) for shared Video objects
    shape_to_images = {}
    image_id_to_path = {}
    image_id_to_shape = {}

    for image_info in coco_data["images"]:
        image_id = image_info["id"]
        image_filename = image_info["file_name"]
        height = image_info.get("height", 0)
        width = image_info.get("width", 0)

        # Resolve image path
        try:
            image_path = resolve_image_path(image_filename, dataset_root)
            image_id_to_path[image_id] = image_path

            # Group by shape
            shape_key = (height, width)
            image_id_to_shape[image_id] = shape_key
            if shape_key not in shape_to_images:
                shape_to_images[shape_key] = []
            shape_to_images[shape_key].append(str(image_path))
        except FileNotFoundError:
            # Skip missing images
            continue

    # Create Video objects for each unique shape
    shape_to_video = {}
    for shape_key, image_paths in shape_to_images.items():
        height, width = shape_key
        # Create Video from the list of images with this shape
        video = Video.from_filename(
            image_paths,
            grayscale=grayscale,
        )
        # Store shape metadata from JSON (useful when images can't be read)
        channels = 1 if grayscale else 3
        video.backend_metadata["shape"] = (len(image_paths), height, width, channels)
        shape_to_video[shape_key] = video

    # Process images and annotations
    labeled_frames = []
    image_id_to_frame_idx = {}

    # Build frame index mapping for each image
    for shape_key, image_paths in shape_to_images.items():
        for frame_idx, image_path in enumerate(image_paths):
            # Find the image_id for this path
            for img_id, path in image_id_to_path.items():
                if str(path) == image_path:
                    image_id_to_frame_idx[img_id] = frame_idx
                    break

    for image_info in coco_data["images"]:
        image_id = image_info["id"]

        # Skip if image was not found
        if image_id not in image_id_to_path:
            continue

        # Get the video and frame index for this image
        shape_key = image_id_to_shape[image_id]
        video = shape_to_video[shape_key]
        frame_idx = image_id_to_frame_idx[image_id]

        # Create instances from annotations
        instances = []
        if image_id in image_annotations:
            for annotation in image_annotations[image_id]:
                category_id = annotation["category_id"]

                if category_id not in skeletons:
                    continue  # Skip non-pose annotations

                skeleton = skeletons[category_id]

                # Extract track ID from various possible sources
                track = None
                track_id = (
                    annotation.get("attributes", {}).get("object_id")
                    or annotation.get("track_id")
                    or annotation.get("instance_id")
                )

                if track_id is not None:
                    # Create or reuse Track object
                    if track_id not in track_dict:
                        track_dict[track_id] = Track(name=f"track_{track_id}")
                    track = track_dict[track_id]

                # Decode keypoints
                keypoints = annotation.get("keypoints", [])
                # Always use the skeleton length, not num_keypoints which may count
                # only visible points
                expected_keypoints = len(skeleton.nodes)

                if keypoints:
                    points_array = decode_keypoints(
                        keypoints, expected_keypoints, skeleton
                    )
                    instance = Instance.from_numpy(
                        points_data=points_array, skeleton=skeleton, track=track
                    )
                    instances.append(instance)

        # Create labeled frame
        if (
            instances or image_id in image_annotations
        ):  # Include frames even without instances
            labeled_frame = LabeledFrame(
                video=video, frame_idx=frame_idx, instances=instances
            )
            labeled_frames.append(labeled_frame)

    # Create Labels object (skeletons will be auto-added from instances)
    return Labels(labeled_frames=labeled_frames)

`read_labels_set(dataset_path, json_files=None, grayscale=False)` ¶

Read multiple COCO annotation files and return a dictionary of Labels.

This function is designed to handle datasets with multiple splits (train/val/test) or multiple annotation files.

Parameters:

Name	Type	Description	Default
`dataset_path`	`Union[str, Path]`	Root directory containing COCO annotation files.	required
`json_files`	`Optional[List[str]]`	List of specific JSON filenames to load. If None, automatically discovers all .json files in the dataset directory.	`None`
`grayscale`	`bool`	If True, load images as grayscale (1 channel). If False, load as RGB (3 channels). Default is False.	`False`

Returns:

Type	Description
`Dict[str, Labels]`	Dictionary mapping split names to Labels objects.

Source code in sleap_io/io/coco.py

def read_labels_set(
    dataset_path: Union[str, Path],
    json_files: Optional[List[str]] = None,
    grayscale: bool = False,
) -> Dict[str, Labels]:
    """Read multiple COCO annotation files and return a dictionary of Labels.

    This function is designed to handle datasets with multiple splits (train/val/test)
    or multiple annotation files.

    Args:
        dataset_path: Root directory containing COCO annotation files.
        json_files: List of specific JSON filenames to load. If None, automatically
                   discovers all .json files in the dataset directory.
        grayscale: If True, load images as grayscale (1 channel). If False, load as
                   RGB (3 channels). Default is False.

    Returns:
        Dictionary mapping split names to Labels objects.
    """
    dataset_path = Path(dataset_path)

    if json_files is None:
        # Auto-discover JSON files
        json_files = [f.name for f in dataset_path.glob("*.json")]
        if not json_files:
            raise FileNotFoundError(f"No JSON annotation files found in {dataset_path}")

    labels_dict = {}

    for json_file in json_files:
        json_path = dataset_path / json_file

        # Use filename (without extension) as split name
        split_name = json_path.stem

        # Load labels for this split
        labels = read_labels(json_path, dataset_root=dataset_path, grayscale=grayscale)
        labels_dict[split_name] = labels

    return labels_dict

`resolve_image_path(image_filename, dataset_root)` ¶

Resolve image file path handling various directory structures.

Parameters:

Name	Type	Description	Default
`image_filename`	`str`	Image filename from COCO annotation.	required
`dataset_root`	`Path`	Root directory of the dataset.	required

Returns:

Type	Description
`Path`	Resolved absolute path to the image file.

Raises:

Type	Description
`FileNotFoundError`	If image file cannot be found.

Source code in sleap_io/io/coco.py

def resolve_image_path(image_filename: str, dataset_root: Path) -> Path:
    """Resolve image file path handling various directory structures.

    Args:
        image_filename: Image filename from COCO annotation.
        dataset_root: Root directory of the dataset.

    Returns:
        Resolved absolute path to the image file.

    Raises:
        FileNotFoundError: If image file cannot be found.
    """
    # Try direct path first
    image_path = dataset_root / image_filename
    if image_path.exists():
        return image_path

    # Try common variations
    common_prefixes = ["images", "imgs", "data/images", ""]

    for prefix in common_prefixes:
        if prefix:
            test_path = dataset_root / prefix / image_filename
        else:
            # Try finding the file anywhere in the dataset
            test_path = None
            for found_path in dataset_root.rglob(Path(image_filename).name):
                if found_path.is_file():
                    test_path = found_path
                    break

        if test_path and test_path.exists():
            return test_path

    raise FileNotFoundError(
        f"Image file not found: {image_filename} (searched in {dataset_root})"
    )

Coco

sleap_io.io.coco ¶

create_skeleton_from_category(category) ¶

decode_keypoints(keypoints, num_keypoints, skeleton) ¶

parse_coco_json(json_path) ¶

read_labels(json_path, dataset_root=None, grayscale=False) ¶

read_labels_set(dataset_path, json_files=None, grayscale=False) ¶

resolve_image_path(image_filename, dataset_root) ¶

`sleap_io.io.coco` ¶

`create_skeleton_from_category(category)` ¶

`decode_keypoints(keypoints, num_keypoints, skeleton)` ¶

`parse_coco_json(json_path)` ¶

`read_labels(json_path, dataset_root=None, grayscale=False)` ¶

`read_labels_set(dataset_path, json_files=None, grayscale=False)` ¶

`resolve_image_path(image_filename, dataset_root)` ¶