from super_gradients.common.factories.base_factory import BaseFactory
from super_gradients.training.datasets.all_target_generators import ALL_TARGET_GENERATORS


class TargetGeneratorsFactory(BaseFactory):
    def __init__(self):
        super().__init__(ALL_TARGET_GENERATORS)

          
@@ -97,6 +97,16 @@ class Transforms:
 
               RandomAutocontrast = "RandomAutocontrast"
            
 
               RandomEqualize = "RandomEqualize"
            
 
           
            
 
          +    # Keypoints
            
 
          +    KeypointsRandomAffineTransform = "KeypointsRandomAffineTransform"
            
 
          +    KeypointsImageNormalize = "KeypointsImageNormalize"
            
 
          +    KeypointsImageToTensor = "KeypointsImageToTensor"
            
 
          +    KeypointTransform = "KeypointTransform"
            
 
          +    KeypointsPadIfNeeded = "KeypointsPadIfNeeded"
            
 
          +    KeypointsLongestMaxSize = "KeypointsLongestMaxSize"
            
 
          +    KeypointsRandomVerticalFlip = "KeypointsRandomVerticalFlip"
            
 
          +    KeypointsRandomHorizontalFlip = "KeypointsRandomHorizontalFlip"
            
 
          +
            
 
           
            
 
           class Optimizers:
            
 
               """Static class holding all the supported optimizer names"""
            
@@ -12,6 +12,7 @@ from super_gradients.training.datasets.all_datasets import ALL_DATASETS
 
           from super_gradients.training.pre_launch_callbacks import ALL_PRE_LAUNCH_CALLBACKS
            
 
           from super_gradients.training.models.segmentation_models.unet.unet_encoder import BACKBONE_STAGES
            
 
           from super_gradients.training.models.segmentation_models.unet.unet_decoder import UP_FUSE_BLOCKS
            
 
          +from super_gradients.training.datasets.all_target_generators import ALL_TARGET_GENERATORS
            
 
           
            
 
           
            
 
           def create_register_decorator(registry: Dict[str, Callable]) -> Callable:
            
@@ -57,3 +58,4 @@ register_dataset = create_register_decorator(registry=ALL_DATASETS)
 
           register_pre_launch_callback = create_register_decorator(registry=ALL_PRE_LAUNCH_CALLBACKS)
            
 
           register_unet_backbone_stage = create_register_decorator(registry=BACKBONE_STAGES)
            
 
           register_unet_up_block = create_register_decorator(registry=UP_FUSE_BLOCKS)
            
 
          +register_target_generator = create_register_decorator(registry=ALL_TARGET_GENERATORS)
            
 
            train_dataset_params:
  data_dir: /data/coco # root path to coco data
  images_dir: images/train2017
  json_file: annotations/person_keypoints_train2017.json

  include_empty_samples: False
  min_instance_area: 128

  transforms:
    - KeypointsLongestMaxSize:
        max_height: 640
        max_width: 640

    - KeypointsPadIfNeeded:
        min_height: 640
        min_width: 640
        image_pad_value: [ 127, 127, 127 ]
        mask_pad_value: 1

    - KeypointsRandomHorizontalFlip:
        # Note these indexes are COCO-specific. If you're using a different dataset, you'll need to change these accordingly.
        flip_index: [ 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15 ]
        prob: 0.5

    - KeypointsRandomAffineTransform:
        max_rotation: 30
        min_scale: 0.75
        max_scale: 1.5
        max_translate: 0.2
        image_pad_value: [ 127, 127, 127 ]
        mask_pad_value: 1
        prob: 0.5

    - KeypointsImageToTensor

    - KeypointsImageNormalize:
        mean: [ 0.485, 0.456, 0.406 ]
        std: [ 0.229, 0.224, 0.225 ]

  target_generator: ???

val_dataset_params:
  data_dir: /data/coco/

  images_dir: images/val2017
  json_file: annotations/person_keypoints_val2017.json
  include_empty_samples: True
  min_instance_area: 128
  transforms:
    - KeypointsLongestMaxSize:
        max_height: 640
        max_width: 640

    - KeypointsPadIfNeeded:
        min_height: 640
        min_width: 640
        image_pad_value: [ 127, 127, 127 ]
        mask_pad_value: 1

    - KeypointsImageToTensor

    - KeypointsImageNormalize:
        mean: [0.485, 0.456, 0.406]
        std: [0.229, 0.224, 0.225]

  target_generator: ???

train_dataloader_params:
  shuffle: True
  batch_size: 8
  num_workers: 8
  drop_last: True
  worker_init_fn:
    _target_: super_gradients.training.utils.utils.load_func
    dotpath: super_gradients.training.datasets.datasets_utils.worker_init_reset_seed
  collate_fn:
    _target_: super_gradients.training.datasets.pose_estimation_datasets.KeypointsCollate

val_dataloader_params:
  batch_size: 24
  num_workers: 8
  drop_last: False
  collate_fn:
    _target_: super_gradients.training.datasets.pose_estimation_datasets.KeypointsCollate

_convert_: all

          
 
            defaults:
  - coco_pose_estimation_dataset_params
  - _self_

train_dataset_params:
  target_generator:
    DEKRTargetsGenerator:
      output_stride: 4
      sigma: 2
      center_sigma: 4
      bg_weight: 0.1
      offset_radius: 4

val_dataset_params:
  target_generator:
    DEKRTargetsGenerator:
      output_stride: 4
      sigma: 2
      center_sigma: 4
      bg_weight: 0.1
      offset_radius: 4

          
@@ -39,6 +39,7 @@ from super_gradients.training.utils.distributed_training_utils import (
 
           from super_gradients.common.abstractions.abstract_logger import get_logger
            
 
           from super_gradients.training.utils.utils import override_default_params_without_nones
            
 
           from super_gradients.common.factories.datasets_factory import DatasetsFactory
            
 
          +from super_gradients.training.datasets.pose_estimation_datasets import COCOKeypointsDataset
            
 
           
            
 
           logger = get_logger(__name__)
            
 
           
            
@@ -590,6 +591,26 @@ def pascal_voc_detection_val(dataset_params: Dict = None, dataloader_params: Dic
 
               )
            
 
           
            
 
           
            
 
          +def coco2017_pose_train(dataset_params: Dict = None, dataloader_params: Dict = None):
            
 
          +    return get_data_loader(
            
 
          +        config_name="coco_pose_estimation_dataset_params",
            
 
          +        dataset_cls=COCOKeypointsDataset,
            
 
          +        train=True,
            
 
          +        dataset_params=dataset_params,
            
 
          +        dataloader_params=dataloader_params,
            
 
          +    )
            
 
          +
            
 
          +
            
 
          +def coco2017_pose_val(dataset_params: Dict = None, dataloader_params: Dict = None):
            
 
          +    return get_data_loader(
            
 
          +        config_name="coco_pose_estimation_dataset_params",
            
 
          +        dataset_cls=COCOKeypointsDataset,
            
 
          +        train=False,
            
 
          +        dataset_params=dataset_params,
            
 
          +        dataloader_params=dataloader_params,
            
 
          +    )
            
 
          +
            
 
          +
            
 
           ALL_DATALOADERS = {
            
 
               "coco2017_train": coco2017_train,
            
 
               "coco2017_val": coco2017_val,
            
@@ -597,6 +618,8 @@ ALL_DATALOADERS = {
 
               "coco2017_val_yolox": coco2017_val_yolox,
            
 
               "coco2017_train_ssd_lite_mobilenet_v2": coco2017_train_ssd_lite_mobilenet_v2,
            
 
               "coco2017_val_ssd_lite_mobilenet_v2": coco2017_val_ssd_lite_mobilenet_v2,
            
 
          +    "coco2017_pose_train": coco2017_pose_train,
            
 
          +    "coco2017_pose_val": coco2017_pose_val,
            
 
               "imagenet_train": imagenet_train,
            
 
               "imagenet_val": imagenet_val,
            
 
               "imagenet_efficientnet_train": imagenet_efficientnet_train,
            
@@ -384,3 +384,39 @@ valid_set = SuperviselyPersonsDataset(root_dir='.../supervisely-persons', list_f
 
           
            
 
           NOTE: this dataset is only available for training. To test, please use PascalVOC2012SegmentationDataSet.
            
 
            </details>
            
 
          +
            
 
          +
            
 
          +
            
 
          +### Pose Estimation Datasets
            
 
          +
            
 
          +
            
 
          +<details>
            
 
          +<summary>COCO 2017</summary>
            
 
          +
            
 
          +1. Download coco dataset:
            
 
          +    - annotations: http://images.cocodataset.org/annotations/annotations_trainval2017.zip
            
 
          +    - train2017: http://images.cocodataset.org/zips/train2017.zip
            
 
          +    - val2017: http://images.cocodataset.org/zips/val2017.zip
            
 
          +
            
 
          +2. Unzip and organize it as below:
            
 
          +```
            
 
          +    coco
            
 
          +    ├── annotations
            
 
          +    │      ├─ person_keypoints_train2017.json
            
 
          +    │      ├─ person_keypoints_val2017.json
            
 
          +    │      └─ ...
            
 
          +    └── images
            
 
          +        ├── train2017
            
 
          +        │   ├─ 000000000001.jpg
            
 
          +        │   └─ ...
            
 
          +        └── val2017
            
 
          +            └─ ...
            
 
          +```
            
 
          +
            
 
          +3. Instantiate the dataset:
            
 
          +```python
            
 
          +from super_gradients.training.datasets import COCOKeypointsDataset
            
 
          +train_set = COCOKeypointsDataset(data_dir='.../coco', images_dir='images/train2017', json_file='annotations/instances_train2017.json', ...)
            
 
          +valid_set = COCOKeypointsDataset(data_dir='.../coco', images_dir='images/val2017', json_file='annotations/instances_val2017.json', ...)
            
 
          +```
            
 
          +</details>
            
@@ -14,7 +14,7 @@ from super_gradients.training.datasets.segmentation_datasets.pascal_voc_segmenta
 
           from super_gradients.training.datasets.segmentation_datasets.cityscape_segmentation import CityscapesDataset
            
 
           from super_gradients.training.datasets.segmentation_datasets.coco_segmentation import CoCoSegmentationDataSet
            
 
           from super_gradients.training.datasets.segmentation_datasets.supervisely_persons_segmentation import SuperviselyPersonsDataset
            
 
          -
            
 
          +from super_gradients.training.datasets.pose_estimation_datasets import COCOKeypointsDataset
            
 
           
            
 
           cv2.setNumThreads(0)
            
 
           
            
@@ -36,4 +36,5 @@ __all__ = [
 
               "Cifar10",
            
 
               "Cifar100",
            
 
               "SuperviselyPersonsDataset",
            
 
          +    "COCOKeypointsDataset",
            
 
           ]
            
@@ -9,6 +9,7 @@ from super_gradients.training.datasets.segmentation_datasets import (
 
               SuperviselyPersonsDataset,
            
 
               PascalVOCAndAUGUnifiedDataset,
            
 
           )
            
 
          +from super_gradients.training.datasets.pose_estimation_datasets import COCOKeypointsDataset
            
 
           
            
 
           ALL_DATASETS = {
            
 
               "Cifar10": Cifar10,
            
@@ -24,4 +25,5 @@ ALL_DATASETS = {
 
               "CityscapesDataset": CityscapesDataset,
            
 
               "SuperviselyPersonsDataset": SuperviselyPersonsDataset,
            
 
               "PascalVOCAndAUGUnifiedDataset": PascalVOCAndAUGUnifiedDataset,
            
 
          +    "COCOKeypointsDataset": COCOKeypointsDataset,
            
 
           }
            
 
            from super_gradients.training.datasets.pose_estimation_datasets.target_generators import DEKRTargetsGenerator

ALL_TARGET_GENERATORS = {"DEKRTargetsGenerator": DEKRTargetsGenerator}

          
 
            from super_gradients.training.datasets.pose_estimation_datasets.coco_keypoints import COCOKeypointsDataset
from super_gradients.training.datasets.pose_estimation_datasets.base_keypoints import BaseKeypointsDataset, KeypointsCollate

__all__ = ["COCOKeypointsDataset", "BaseKeypointsDataset", "KeypointsCollate"]

          
 
            import abc
from typing import Tuple, List, Mapping, Any, Dict, Callable

import numpy as np
import torch
from torch.utils.data import default_collate, Dataset

from super_gradients.common.abstractions.abstract_logger import get_logger
from super_gradients.training.transforms.keypoint_transforms import KeypointsCompose, KeypointTransform

logger = get_logger(__name__)


class BaseKeypointsDataset(Dataset):
    """
    Base class for pose estimation datasets.
    Descendants should implement the load_sample method to read a sample from the disk and return (image, mask, joints, extras) tuple.
    """

    def __init__(
        self,
        target_generator: Callable,
        transforms: List[KeypointTransform],
        min_instance_area: float,
    ):
        """

        :param target_generator: Target generator that will be used to generate the targets for the model.
            See DEKRTargetsGenerator for an example.
        :param transforms: Transforms to be applied to the image & keypoints
        :param min_instance_area: Minimum area of an instance to be included in the dataset
        """
        super().__init__()
        self.target_generator = target_generator
        self.transforms = KeypointsCompose(transforms)
        self.min_instance_area = min_instance_area

    @abc.abstractmethod
    def __len__(self) -> int:
        raise NotImplementedError()

    @abc.abstractmethod
    def load_sample(self, index) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
        """
        Read a sample from the disk and return (image, mask, joints, extras) tuple
        :param index: Sample index
        :return: Tuple of (image, mask, joints)
            image - Numpy array of [H,W,3] shape, which represents input RGB image
            mask - Numpy array of [H,W] shape, which represents a binary mask with zero values corresponding to an
                    ignored region which should not be used for training (contribute to loss)
            joints - Numpy array of [Num Instances, Num Joints, 3] shape, which represents the skeletons of the instances
            extras - Dictionary of extra information about the sample that should be included in extras output
        """
        raise NotImplementedError()

    def __getitem__(self, index: int) -> Tuple[torch.Tensor, Any, Mapping[str, Any]]:
        img, mask, joints, extras = self.load_sample(index)
        img, mask, joints = self.transforms(img, mask, joints)

        joints = self.filter_joints(joints, img)

        targets = self.target_generator(img, joints, mask)
        return img, targets, {"joints": joints, **extras}

    def compute_area(self, joints: np.ndarray) -> np.ndarray:
        """
        Compute area of a bounding box for each instance.
        :param joints:  [Num Instances, Num Joints, 3]
        :return: [Num Instances]
        """
        w = np.max(joints[:, :, 0], axis=-1) - np.min(joints[:, :, 0], axis=-1)
        h = np.max(joints[:, :, 1], axis=-1) - np.min(joints[:, :, 1], axis=-1)
        return w * h

    def filter_joints(self, joints: np.ndarray, image: np.ndarray) -> np.ndarray:
        """
        Filter instances that are either too small or do not have visible keypoints
        :param joints: Array of shape [Num Instances, Num Joints, 3]
        :param image:
        :return: [New Num Instances, Num Joints, 3], New Num Instances <= Num Instances
        """
        # Update visibility of joints for those that are outside the image
        outside_image_mask = (joints[:, :, 0] < 0) | (joints[:, :, 1] < 0) | (joints[:, :, 0] >= image.shape[1]) | (joints[:, :, 1] >= image.shape[0])
        joints[outside_image_mask, 2] = 0

        # Filter instances with all invisible keypoints
        instances_with_visible_joints = np.count_nonzero(joints[:, :, 2], axis=-1) > 0
        joints = joints[instances_with_visible_joints]

        # Remove instances with too small area
        areas = self.compute_area(joints)
        joints = joints[areas > self.min_instance_area]

        return joints


class KeypointsCollate:
    """
    Collate image & targets, return extras as is.
    """

    def __call__(self, batch):
        images = []
        targets = []
        extras = []
        for image, target, extra in batch:
            images.append(image)
            targets.append(target)
            extras.append(extra)

        extras = {k: [dic[k] for dic in extras] for k in extras[0]}  # Convert list of dicts to dict of lists

        images = default_collate(images)
        targets = default_collate(targets)
        return images, targets, extras

          
 
            import os
from typing import Tuple, List, Mapping, Any, Dict

import cv2
import numpy as np
import pycocotools
from pycocotools.coco import COCO

from super_gradients.common.abstractions.abstract_logger import get_logger
from super_gradients.common.decorators.factory_decorator import resolve_param
from super_gradients.common.factories.target_generator_factory import TargetGeneratorsFactory
from super_gradients.common.factories.transforms_factory import TransformsFactory
from super_gradients.training.datasets.pose_estimation_datasets.base_keypoints import BaseKeypointsDataset
from super_gradients.training.transforms.keypoint_transforms import KeypointTransform

logger = get_logger(__name__)


class COCOKeypointsDataset(BaseKeypointsDataset):
    """
    Dataset class for training pose estimation models on COCO Keypoints dataset.
    Use should pass a target generator class that is model-specific and generates the targets for the model.
    """

    @resolve_param("transforms", TransformsFactory())
    @resolve_param("target_generator", TargetGeneratorsFactory())
    def __init__(
        self,
        data_dir: str,
        images_dir: str,
        json_file: str,
        include_empty_samples: bool,
        target_generator,
        transforms: List[KeypointTransform],
        min_instance_area: float,
    ):
        """

        :param data_dir: Root directory of the COCO dataset
        :param images_dir: path suffix to the images directory inside the dataset_root
        :param json_file: path suffix to the json file inside the dataset_root
        :param include_empty_samples: if True, images without any annotations will be included in the dataset.
            Otherwise, they will be filtered out.
        :param target_generator: Target generator that will be used to generate the targets for the model.
            See DEKRTargetsGenerator for an example.
        :param transforms: Transforms to be applied to the image & keypoints
        :param min_instance_area: Minimum area of an instance to be included in the dataset
        """
        super().__init__(transforms=transforms, target_generator=target_generator, min_instance_area=min_instance_area)
        self.root = data_dir
        self.images_dir = os.path.join(data_dir, images_dir)
        self.json_file = os.path.join(data_dir, json_file)

        coco = COCO(self.json_file)
        if len(coco.dataset["categories"]) != 1:
            raise ValueError("Dataset must contain exactly one category")

        self.coco = coco
        self.ids = list(self.coco.imgs.keys())
        self.joints = coco.dataset["categories"][0]["keypoints"]
        self.num_joints = len(self.joints)

        if not include_empty_samples:
            subset = [img_id for img_id in self.ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0]
            self.ids = subset

    def __len__(self):
        return len(self.ids)

    def load_sample(self, index) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
        img_id = self.ids[index]
        image_info = self.coco.loadImgs(img_id)[0]
        file_name = image_info["file_name"]
        file_path = os.path.join(self.images_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anno = self.coco.loadAnns(ann_ids)
        anno = [obj for obj in anno if bool(obj["iscrowd"]) is False and obj["num_keypoints"] > 0]

        orig_image = cv2.imread(file_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if orig_image.shape[0] != image_info["height"] or orig_image.shape[1] != image_info["width"]:
            raise RuntimeError(f"Annotated image size ({image_info['height'],image_info['width']}) does not match image size in file {orig_image.shape[:2]}")

        joints: np.ndarray = self.get_joints(anno)
        mask: np.ndarray = self.get_mask(anno, image_info)
        extras = dict(file_name=image_info["file_name"])

        return orig_image, mask, joints, extras

    def get_joints(self, anno: List[Mapping[str, Any]]) -> np.ndarray:
        """
        Decode the keypoints from the COCO annotation and return them as an array of shape [Num Instances, Num Joints, 3].
        The visibility of keypoints is encoded in the third dimension of the array with following values:
         - 0 being invisible (outside image)
         - 1 present in image but occluded
         - 2 - fully visible
        :param anno:
        :return: [Num Instances, Num Joints, 3], where last channel represents (x, y, visibility)
        """
        joints = []

        for i, obj in enumerate(anno):
            keypoints = np.array(obj["keypoints"]).reshape([-1, 3])
            joints.append(keypoints)

        num_instances = len(joints)
        joints = np.array(joints, dtype=np.float32).reshape((num_instances, self.num_joints, 3))
        return joints

    def get_mask(self, anno, img_info) -> np.ndarray:
        """
        This method computes ignore mask, which describes crowd objects / objects w/o keypoints to exclude these predictions from contributing to the loss
        :param anno:
        :param img_info:
        :return: Float mask of [H,W] shape (same as image dimensions),
            where 1.0 values corresponds to pixels that should contribute to the loss, and 0.0 pixels indicates areas that should be excluded.
        """
        m = np.zeros((img_info["height"], img_info["width"]), dtype=np.float32)

        for obj in anno:
            if obj["iscrowd"]:
                rle = pycocotools.mask.frPyObjects(obj["segmentation"], img_info["height"], img_info["width"])
                mask = pycocotools.mask.decode(rle)
                if mask.shape != m.shape:
                    logger.warning(f"Mask shape {mask.shape} does not match image shape {m.shape} for image {img_info['file_name']}")
                    continue
                m += mask
            elif obj["num_keypoints"] == 0:
                rles = pycocotools.mask.frPyObjects(obj["segmentation"], img_info["height"], img_info["width"])
                for rle in rles:
                    mask = pycocotools.mask.decode(rle)
                    if mask.shape != m.shape:
                        logger.warning(f"Mask shape {mask.shape} does not match image shape {m.shape} for image {img_info['file_name']}")
                        continue

                    m += mask

        return (m < 0.5).astype(np.float32)

          
 
            from typing import Tuple

import cv2
import numpy as np
from torch import Tensor


class DEKRTargetsGenerator:
    """
    Target generator for pose estimation task tailored for the DEKR paper (https://arxiv.org/abs/2104.02300)
    """

    def __init__(self, output_stride: int, sigma: float, center_sigma: float, bg_weight: float, offset_radius: float):
        """

        :param output_stride: Downsampling factor for target maps (w.r.t to input image resolution)
        :param sigma: Sigma of the gaussian kernel used to generate the heatmap (Effective radius of the heatmap would be 3*sigma)
        :param center_sigma: Sigma of the gaussian kernel used to generate the instance "center" heatmap (Effective radius of the heatmap would be 3*sigma)
        :param bg_weight: Weight assigned to all background pixels (used to re-weight the heatmap loss)
        :param offset_radius: Radius for the offset encoding (in pixels)
        """
        self.output_stride = output_stride
        self.sigma = sigma
        self.center_sigma = center_sigma
        self.bg_weight = bg_weight
        self.offset_radius = offset_radius

    def get_heat_val(self, sigma: float, x, y, x0, y0) -> float:
        g = np.exp(-((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma**2))
        return g

    def compute_area(self, joints: np.ndarray) -> np.ndarray:
        """
        Compute area of a bounding box for each instance
        :param joints:  [Num Instances, Num Joints, 3]
        :return: [Num Instances]
        """
        w = np.max(joints[:, :, 0], axis=-1) - np.min(joints[:, :, 0], axis=-1)
        h = np.max(joints[:, :, 1], axis=-1) - np.min(joints[:, :, 1], axis=-1)
        return w * h

    def sort_joints_by_area(self, joints: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
        Rearrange joints in descending order of area of bounding box around them
        """
        area = self.compute_area(joints)
        order = np.argsort(-area)
        joints = joints[order]
        area = area[order]
        return joints, area

    def augment_with_center_joint(self, joints: np.ndarray) -> np.ndarray:
        """
        Augment set of joints with additional center joint.
        Returns a new array with shape [Instances, Joints+1, 3] where the last joint is the center joint.
        Only instances with at least one visible joint are returned.

        :param joints: [Num Instances, Num Joints, 3] Last channel represents (x, y, visibility)
        :return: [Num Instances, Num Joints + 1, 3]
        """
        augmented_joints = []
        num_joints = joints.shape[1]
        num_joints_with_center = num_joints + 1

        for keypoints in joints:
            # Computing a center point for each person
            visible_keypoints = keypoints[:, 2] > 0
            joints_sum = np.sum(keypoints[:, :2] * np.expand_dims(visible_keypoints, -1), axis=0)
            num_vis_joints = np.count_nonzero(visible_keypoints)
            if num_vis_joints == 0:
                raise ValueError("No visible joints found in instance. ")

            keypoints_with_center = np.zeros((num_joints_with_center, 3))
            keypoints_with_center[0:num_joints] = keypoints
            keypoints_with_center[-1, :2] = joints_sum / num_vis_joints
            keypoints_with_center[-1, 2] = 1

            augmented_joints.append(keypoints_with_center)

        joints = np.array(augmented_joints, dtype=np.float32).reshape((-1, num_joints_with_center, 3))
        return joints

    def __call__(self, image: Tensor, joints: np.ndarray, mask: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        """
        Encode the keypoints into dense targets that participate in loss computation.
        :param image: Image tensor [3, H, W]
        :param joints: [Instances, NumJoints, 3]
        :param mask: [H,W] A mask that indicates which pixels should be included (1) or which one should be excluded (0) from loss computation.
        :return: Tuple of (heatmap, mask, offset, offset_weight)
            heatmap    - [NumJoints+1, H // Output Stride, W // Output Stride]
            mask       - [NumJoints+1, H // Output Stride, H // Output Stride]
            offset     - [NumJoints*2, H // Output Stride, W // Output Stride]
            offset_weight - [NumJoints*2, H // Output Stride, W // Output Stride]
        """
        if image.shape[1:3] != mask.shape[:2]:
            raise ValueError(f"Image and mask should have the same shape {image.shape[1:3]} != {mask.shape[:2]}")

        if image.shape[1] % self.output_stride != 0 or image.shape[2] % self.output_stride != 0:
            raise ValueError("Image shape should be divisible by output stride")

        num_instances, num_joints, _ = joints.shape
        num_joints_with_center = num_joints + 1

        joints, area = self.sort_joints_by_area(joints)
        joints = self.augment_with_center_joint(joints)

        # Compute the size of the target maps
        rows, cols = mask.shape
        output_rows, output_cols = rows // self.output_stride, cols // self.output_stride

        heatmaps = np.zeros(
            shape=(num_joints_with_center, output_rows, output_cols),
            dtype=np.float32,
        )

        ignored_hms = 2 * np.ones(
            shape=(num_joints_with_center, output_rows, output_cols),
            dtype=np.float32,
        )  # Start with 2 in all places

        offset_map = np.zeros(
            (num_joints * 2, output_rows, output_cols),
            dtype=np.float32,
        )
        offset_weight = np.zeros(
            (num_joints * 2, output_rows, output_cols),
            dtype=np.float32,
        )

        sx = output_cols / cols
        sy = output_rows / rows
        joints = joints.copy()
        joints[:, :, 0] *= sx
        joints[:, :, 1] *= sy

        for person_id, p in enumerate(joints):
            for idx, pt in enumerate(p):
                if idx < num_joints:  # Last joint index is object center
                    sigma = self.sigma
                else:
                    sigma = self.center_sigma

                if pt[2] > 0:
                    x, y = pt[0], pt[1]
                    if x < 0 or y < 0 or x >= output_cols or y >= output_rows:
                        continue

                    ul = int(np.floor(x - 3 * sigma - 1)), int(np.floor(y - 3 * sigma - 1))
                    br = int(np.ceil(x + 3 * sigma + 1)), int(np.ceil(y + 3 * sigma + 1))

                    aa, bb = max(0, ul[1]), min(br[1], output_rows)
                    cc, dd = max(0, ul[0]), min(br[0], output_cols)

                    joint_rg = np.zeros((bb - aa, dd - cc), dtype=np.float32)
                    for sy in range(aa, bb):
                        for sx in range(cc, dd):
                            # EK: Note we round x/y values here to obtain clear peak in the center of odd-sized heatmap
                            # joint_rg[sy - aa, sx - cc] = self.get_heat_val(sigma, sx, sy, x, y)
                            joint_rg[sy - aa, sx - cc] = self.get_heat_val(sigma, sx, sy, int(x), int(y))

                    # It is important for RFL loss to have 1.0 in heatmap. since 0.9999 would be interpreted as negative pixel
                    joint_rg[joint_rg.shape[0] // 2, joint_rg.shape[1] // 2] = 1

                    heatmaps[idx, aa:bb, cc:dd] = np.maximum(heatmaps[idx, aa:bb, cc:dd], joint_rg)
                    # print(heatmaps[-1, 0, 0])
                    ignored_hms[idx, aa:bb, cc:dd] = 1.0

        for person_id, p in enumerate(joints):
            ct_x = int(p[-1, 0])
            ct_y = int(p[-1, 1])
            ct_v = int(p[-1, 2])
            if ct_v < 1 or ct_x < 0 or ct_y < 0 or ct_x >= output_cols or ct_y >= output_rows:
                continue

            for idx, pt in enumerate(p[:-1]):
                if pt[2] > 0:
                    x, y = pt[0], pt[1]
                    if x < 0 or y < 0 or x >= output_cols or y >= output_rows:
                        continue

                    start_x = max(int(ct_x - self.offset_radius), 0)
                    start_y = max(int(ct_y - self.offset_radius), 0)
                    end_x = min(int(ct_x + self.offset_radius), output_cols)
                    end_y = min(int(ct_y + self.offset_radius), output_rows)

                    for pos_x in range(start_x, end_x):
                        for pos_y in range(start_y, end_y):
                            offset_x = pos_x - x
                            offset_y = pos_y - y

                            offset_map[idx * 2, pos_y, pos_x] = offset_x
                            offset_map[idx * 2 + 1, pos_y, pos_x] = offset_y
                            offset_weight[idx * 2, pos_y, pos_x] = 1.0 / np.sqrt(area[person_id])
                            offset_weight[idx * 2 + 1, pos_y, pos_x] = 1.0 / np.sqrt(area[person_id])

        ignored_hms[ignored_hms == 2] = self.bg_weight

        mask = cv2.resize(mask, dsize=(output_cols, output_rows), interpolation=cv2.INTER_LINEAR)
        mask = (mask > 0).astype(np.float32)
        mask = mask * ignored_hms

        return heatmaps, mask, offset_map, offset_weight

          
@@ -66,7 +66,16 @@ from torchvision.transforms import (
 
               RandomAutocontrast,
            
 
               RandomEqualize,
            
 
           )
            
 
          -
            
 
          +from super_gradients.training.transforms.keypoint_transforms import (
            
 
          +    KeypointsRandomAffineTransform,
            
 
          +    KeypointsImageNormalize,
            
 
          +    KeypointsImageToTensor,
            
 
          +    KeypointTransform,
            
 
          +    KeypointsPadIfNeeded,
            
 
          +    KeypointsLongestMaxSize,
            
 
          +    KeypointsRandomVerticalFlip,
            
 
          +    KeypointsRandomHorizontalFlip,
            
 
          +)
            
 
           
            
 
           TRANSFORMS = {
            
 
               Transforms.SegRandomFlip: SegRandomFlip,
            
@@ -129,6 +138,15 @@ TRANSFORMS = {
 
               Transforms.RandomAutocontrast: RandomAutocontrast,
            
 
               Transforms.RandomEqualize: RandomEqualize,
            
 
               Transforms.Standardize: Standardize,
            
 
          +    # Keypoints
            
 
          +    Transforms.KeypointsRandomAffineTransform: KeypointsRandomAffineTransform,
            
 
          +    Transforms.KeypointsImageNormalize: KeypointsImageNormalize,
            
 
          +    Transforms.KeypointsImageToTensor: KeypointsImageToTensor,
            
 
          +    Transforms.KeypointTransform: KeypointTransform,
            
 
          +    Transforms.KeypointsPadIfNeeded: KeypointsPadIfNeeded,
            
 
          +    Transforms.KeypointsLongestMaxSize: KeypointsLongestMaxSize,
            
 
          +    Transforms.KeypointsRandomVerticalFlip: KeypointsRandomVerticalFlip,
            
 
          +    Transforms.KeypointsRandomHorizontalFlip: KeypointsRandomHorizontalFlip,
            
 
           }
            
 
           logger = get_logger(__name__)
            
 
           
            
 
            import random
from abc import abstractmethod
from typing import Tuple, List, Iterable, Union

import cv2
import numpy as np
from torch import Tensor
from torchvision.transforms import functional as F

__all__ = [
    "KeypointsImageNormalize",
    "KeypointsImageToTensor",
    "KeypointsPadIfNeeded",
    "KeypointsLongestMaxSize",
    "KeypointTransform",
    "KeypointsCompose",
    "KeypointsRandomHorizontalFlip",
    "KeypointsRandomAffineTransform",
    "KeypointsRandomVerticalFlip",
]


class KeypointTransform(object):
    """
    Base class for all transforms for keypoints augmnetation.
    All transforms subclassing it should implement __call__ method which takes image, mask and keypoints as input and
    returns transformed image, mask and keypoints.
    """

    @abstractmethod
    def __call__(self, image: np.ndarray, mask: np.ndarray, joints: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """
        Apply transformation to image, mask and keypoints.

        :param image: Input image of [H,W,3] shape
        :param mask: Numpy array of [H,W] shape, where zero values are considered as ignored mask (not contributing to the loss)
        :param joints: Numpy array of [NumInstances, NumJoints, 3] shape. Last dimension contains (x,y,visibility) for each joint.
        :return: (image, mask, joints)
        """
        raise NotImplementedError


class KeypointsCompose(KeypointTransform):
    def __init__(self, transforms: List[KeypointTransform]):
        self.transforms = transforms

    def __call__(self, image: np.ndarray, mask: np.ndarray, joints: np.ndarray) -> Tuple[Union[np.ndarray, Tensor], np.ndarray, np.ndarray]:
        for t in self.transforms:
            image, mask, joints = t(image, mask, joints)
        return image, mask, joints


class KeypointsImageToTensor(KeypointTransform):
    """
    Convert image from numpy array to tensor and permute axes to [C,H,W].
    This function also divides image by 255.0 to convert it to [0,1] range.
    """

    def __call__(self, image: np.ndarray, mask: np.ndarray, joints: np.ndarray):
        return F.to_tensor(image), mask, joints


class KeypointsImageNormalize(KeypointTransform):
    """
    Normalize image with mean and std. Note this transform should come after KeypointsImageToTensor
    since it operates on torch Tensor and not numpy array.
    """

    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, image: Tensor, mask: np.ndarray, joints: np.ndarray):
        image = F.normalize(image, mean=self.mean, std=self.std)
        return image, mask, joints


class KeypointsRandomHorizontalFlip(KeypointTransform):
    """
    Flip image, mask and joints horizontally with a given probability.
    """

    def __init__(self, flip_index: List[int], prob: float = 0.5):
        """

        :param flip_index: Indexes of keypoints on the flipped image. When doing left-right flip, left hand becomes right hand.
                           So this array contains order of keypoints on the flipped image. This is dataset specific and depends on
                           how keypoints are defined in dataset.
        :param prob: Probability of flipping
        """
        self.flip_index = flip_index
        self.prob = prob

    def __call__(self, image, mask, joints):
        if image.shape[:2] != mask.shape[:2]:
            raise RuntimeError(f"Image shape ({image.shape[:2]}) does not match mask shape ({mask.shape[:2]}).")

        if random.random() < self.prob:
            image = np.ascontiguousarray(np.fliplr(image))
            mask = np.ascontiguousarray(np.fliplr(mask))
            rows, cols = image.shape[:2]

            joints = joints.copy()
            joints = joints[:, self.flip_index]
            joints[:, :, 0] = cols - joints[:, :, 0] - 1

        return image, mask, joints


class KeypointsRandomVerticalFlip(KeypointTransform):
    """
    Flip image, mask and joints vertically with a given probability.
    """

    def __init__(self, prob: float = 0.5):
        self.prob = prob

    def __call__(self, image, mask, joints):
        if image.shape[:2] != mask.shape[:2]:
            raise RuntimeError(f"Image shape ({image.shape[:2]}) does not match mask shape ({mask.shape[:2]}).")

        if random.random() < self.prob:
            image = np.ascontiguousarray(np.flipud(image))
            mask = np.ascontiguousarray(np.flipud(mask))

            rows, cols = image.shape[:2]
            joints = joints.copy()
            joints[:, :, 1] = rows - joints[:, :, 1] - 1

        return image, mask, joints


class KeypointsLongestMaxSize(KeypointTransform):
    """
    Resize image, mask and joints to ensure that resulting image does not exceed max_sizes (rows, cols).
    """

    def __init__(self, max_height: int, max_width: int, interpolation: int = cv2.INTER_LINEAR, prob: float = 1.0):
        """

        :param max_sizes: (rows, cols) - Maximum size of the image after resizing
        :param interpolation: Used interpolation method for image
        :param prob: Probability of applying this transform
        """
        self.max_height = max_height
        self.max_width = max_width
        self.interpolation = interpolation
        self.prob = prob

    def __call__(self, image, mask, joints: float):
        if random.random() < self.prob:
            height, width = image.shape[:2]
            scale = min(self.max_height / height, self.max_width / width)
            image = self.rescale_image(image, scale, cv2.INTER_LINEAR)

            if image.shape[0] != self.max_height and image.shape[1] != self.max_width:
                raise RuntimeError(f"Image shape is not as expected (scale={scale}, input_shape={height, width}, resized_shape={image.shape[:2]})")

            if image.shape[0] > self.max_height or image.shape[1] > self.max_width:
                raise RuntimeError(f"Image shape is not as expected (scale={scale}, input_shape={height, width}, resized_shape={image.shape[:2]}")

            mask = self.rescale_image(mask, scale, cv2.INTER_LINEAR)

            joints = joints.copy()
            joints[:, :, 0:2] = joints[:, :, 0:2] * scale

        return image, mask, joints

    @classmethod
    def rescale_image(cls, img, scale, interpolation):
        height, width = img.shape[:2]

        if scale != 1.0:
            new_height, new_width = tuple(int(dim * scale + 0.5) for dim in (height, width))
            img = cv2.resize(img, dsize=(new_width, new_height), interpolation=interpolation)
        return img


class KeypointsPadIfNeeded(KeypointTransform):
    """
    Pad image and mask to ensure that resulting image size is not less than `output_size` (rows, cols).
    Image and mask padded from right and bottom, thus joints remains unchanged.
    """

    def __init__(self, min_height: int, min_width: int, image_pad_value: int, mask_pad_value: float):
        """

        :param output_size: Desired image size (rows, cols)
        :param image_pad_value: Padding value of image
        :param mask_pad_value: Padding value for mask
        """
        self.min_height = min_height
        self.min_width = min_width
        self.image_pad_value = tuple(image_pad_value) if isinstance(image_pad_value, Iterable) else int(image_pad_value)
        self.mask_pad_value = mask_pad_value

    def __call__(self, image, mask, joints):
        height, width = image.shape[:2]

        pad_bottom = max(0, self.min_height - height)
        pad_right = max(0, self.min_width - width)

        image = cv2.copyMakeBorder(image, top=0, bottom=pad_bottom, left=0, right=pad_right, value=self.image_pad_value, borderType=cv2.BORDER_CONSTANT)

        original_dtype = mask.dtype
        mask = cv2.copyMakeBorder(
            mask.astype(np.uint8), top=0, bottom=pad_bottom, left=0, right=pad_right, value=self.mask_pad_value, borderType=cv2.BORDER_CONSTANT
        )
        mask = mask.astype(original_dtype)

        return image, mask, joints


class KeypointsRandomAffineTransform(KeypointTransform):
    """
    Apply random affine transform to image, mask and joints.
    """

    def __init__(
        self,
        max_rotation: float,
        min_scale: float,
        max_scale: float,
        max_translate: float,
        image_pad_value: int,
        mask_pad_value: float,
        prob: float = 0.5,
    ):
        """

        :param max_rotation: Max rotation angle in degrees
        :param min_scale: Lower bound for the scale change. For +- 20% size jitter this should be 0.8
        :param max_scale: Lower bound for the scale change. For +- 20% size jitter this should be 1.2
        :param max_translate: Max translation offset in percents of image size
        """
        self.max_rotation = max_rotation
        self.min_scale = min_scale
        self.max_scale = max_scale
        self.max_translate = max_translate
        self.image_pad_value = tuple(image_pad_value) if isinstance(image_pad_value, Iterable) else int(image_pad_value)
        self.mask_pad_value = mask_pad_value
        self.prob = prob

    def _get_affine_matrix(self, img, angle, scale, dx, dy):
        """

        :param center: (x,y)
        :param scale:
        :param output_size: (rows, cols)
        :param rot:
        :return:
        """
        height, width = img.shape[:2]
        center = (width / 2 + dx * width, height / 2 + dy * height)
        matrix = cv2.getRotationMatrix2D(center, angle, scale)

        return matrix

    def apply_to_keypoints(self, joints: np.ndarray, mat: np.ndarray):
        shape = joints.shape
        joints = joints.reshape(-1, 2)
        return np.dot(np.concatenate((joints, joints[:, 0:1] * 0 + 1), axis=1), mat.T).reshape(shape)

    def apply_to_image(self, image, mat, interpolation, padding_value, padding_mode=cv2.BORDER_CONSTANT):
        return cv2.warpAffine(
            image,
            mat,
            dsize=(image.shape[1], image.shape[0]),
            flags=interpolation,
            borderValue=padding_value,
            borderMode=padding_mode,
        )

    def __call__(self, image: np.ndarray, mask: np.ndarray, joints: np.ndarray):
        """

        :param image: (np.ndarray) Image of shape [H,W,3]
        :param mask: Single-element array with mask of [H,W] shape.
        :param joints: Single-element array of joints of [Num instances, Num Joints, 3] shape. Semantics of last channel is: x, y, joint index (?)
        :param area: Area each instance occipy: [Num instances, 1]
        :return:
        """

        if random.random() < self.prob:
            angle = random.uniform(-self.max_rotation, self.max_rotation)
            scale = random.uniform(self.min_scale, self.max_scale)
            dx = random.uniform(-self.max_translate, self.max_translate)
            dy = random.uniform(-self.max_translate, self.max_translate)

            mat_output = self._get_affine_matrix(image, angle, scale, dx, dy)
            mat_output = mat_output[:2]

            mask = self.apply_to_image(mask, mat_output, cv2.INTER_NEAREST, self.mask_pad_value, cv2.BORDER_CONSTANT)
            image = self.apply_to_image(image, mat_output, cv2.INTER_LINEAR, self.image_pad_value, cv2.BORDER_CONSTANT)

            joints = joints.copy()
            joints[:, :, 0:2] = self.apply_to_keypoints(joints[:, :, 0:2], mat_output)
            # Update visibility status of joints that were moved outside visible area
            joints_outside_image = (joints[:, :, 0] < 0) | (joints[:, :, 0] >= image.shape[1]) | (joints[:, :, 1] < 0) | (joints[:, :, 1] >= image.shape[0])
            joints[joints_outside_image, 2] = 0

        return image, mask, joints

          
@@ -1,7 +1,7 @@
 
           import sys
            
 
           import unittest
            
 
           
            
 
          -from tests.integration_tests import TestDataset, EMAIntegrationTest, LRTest
            
 
          +from tests.integration_tests import EMAIntegrationTest, LRTest, PoseEstimationDatasetIntegrationTest
            
 
           
            
 
           
            
 
           class CoreIntegrationTestSuiteRunner:
            
@@ -16,9 +16,9 @@ class CoreIntegrationTestSuiteRunner:
 
                   _add_modules_to_integration_tests_suite - Adds unit tests to the Unit Tests Test Suite
            
 
                       :return:
            
 
                   """
            
 
          -        self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestDataset))
            
 
                   self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(EMAIntegrationTest))
            
 
                   self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(LRTest))
            
 
          +        self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(PoseEstimationDatasetIntegrationTest))
            
 
           
            
 
           
            
 
           if __name__ == "__main__":
            
@@ -20,6 +20,7 @@ from tests.unit_tests import (
 
               ResumeTrainingTest,
            
 
               CallTrainAfterTestTest,
            
 
               CrashTipTest,
            
 
          +    TestTransforms,
            
 
           )
            
 
           from tests.end_to_end_tests import TestTrainer
            
 
           from tests.unit_tests.detection_utils_test import TestDetectionUtils
            
@@ -121,6 +122,7 @@ class CoreUnitTestSuiteRunner:
 
                   self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestModelsONNXExport))
            
 
                   self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(MaxBatchesLoopBreakTest))
            
 
                   self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestTrainingUtils))
            
 
          +        self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestTransforms))
            
 
           
            
 
               def _add_modules_to_end_to_end_tests_suite(self):
            
 
                   """
            
@@ -2,5 +2,6 @@
 
           
            
 
           from tests.integration_tests.ema_train_integration_test import EMAIntegrationTest
            
 
           from tests.integration_tests.lr_test import LRTest
            
 
          +from tests.integration_tests.pose_estimation_dataset_test import PoseEstimationDatasetIntegrationTest
            
 
           
            
 
          -__all__ = ["EMAIntegrationTest", "LRTest"]
            
 
          +__all__ = ["EMAIntegrationTest", "LRTest", "PoseEstimationDatasetIntegrationTest"]
            
 
            import os
import unittest

import pkg_resources
from hydra import initialize_config_dir, compose
from hydra.core.global_hydra import GlobalHydra
from super_gradients.common.environment.path_utils import normalize_path
from super_gradients.training.dataloaders.dataloaders import _process_dataset_params, get_data_loader
from super_gradients.training.datasets.pose_estimation_datasets import COCOKeypointsDataset


class PoseEstimationDatasetIntegrationTest(unittest.TestCase):
    def test_datasets_instantiation(self):
        GlobalHydra.instance().clear()
        sg_recipes_dir = pkg_resources.resource_filename("super_gradients.recipes", "")
        dataset_config = os.path.join("dataset_params", "coco_pose_estimation_dekr_dataset_params")
        with initialize_config_dir(config_dir=normalize_path(sg_recipes_dir), version_base="1.2"):
            # config is relative to a module
            cfg = compose(config_name=normalize_path(dataset_config))
            train_dataset_params = _process_dataset_params(cfg, dict(), True)
            val_dataset_params = _process_dataset_params(cfg, dict(), True)

            train_dataset = COCOKeypointsDataset(**train_dataset_params)
            assert train_dataset[0] is not None

            val_dataset = COCOKeypointsDataset(**val_dataset_params)
            assert val_dataset[0] is not None

    def test_dataloaders_instantiation(self):
        train_loader = get_data_loader("coco_pose_estimation_dekr_dataset_params", COCOKeypointsDataset, train=True, dataloader_params=dict(num_workers=0))
        val_loader = get_data_loader("coco_pose_estimation_dekr_dataset_params", COCOKeypointsDataset, train=False, dataloader_params=dict(num_workers=0))

        assert next(iter(train_loader)) is not None
        assert next(iter(val_loader)) is not None


if __name__ == "__main__":
    unittest.main()

          
@@ -22,7 +22,7 @@ from tests.unit_tests.conv_bn_relu_test import TestConvBnRelu
 
           from tests.unit_tests.initialize_with_dataloaders_test import InitializeWithDataloadersTest
            
 
           from tests.unit_tests.training_params_factory_test import TrainingParamsTest
            
 
           from tests.unit_tests.config_inspector_test import ConfigInspectTest
            
 
          -
            
 
          +from tests.unit_tests.transforms_test import TestTransforms
            
 
           
            
 
           __all__ = [
            
 
               "CrashTipTest",
            
@@ -48,4 +48,5 @@ __all__ = [
 
               "ResumeTrainingTest",
            
 
               "CallTrainAfterTestTest",
            
 
               "ConfigInspectTest",
            
 
          +    "TestTransforms",
            
 
           ]
            
 
            from super_gradients.training.datasets.pose_estimation_datasets.coco_keypoints import COCOKeypointsDataset
from super_gradients.training.datasets.pose_estimation_datasets.target_generators import DEKRTargetsGenerator
from super_gradients.training.transforms.keypoint_transforms import KeypointsCompose, KeypointsRandomVerticalFlip


def test_dataset():
    target_generator = DEKRTargetsGenerator(
        output_stride=4,
        sigma=2,
        center_sigma=4,
        bg_weight=0.1,
        offset_radius=4,
    )

    dataset = COCOKeypointsDataset(
        data_dir="e:/coco2017",
        images_dir="images/train2017",
        json_file="annotations/person_keypoints_train2017.json",
        include_empty_samples=False,
        transforms=KeypointsCompose(
            [
                KeypointsRandomVerticalFlip(),
            ]
        ),
        target_generator=target_generator,
    )

    assert dataset is not None
    assert dataset[0] is not None

          
 
            import unittest

import numpy as np

from super_gradients.training.transforms.keypoint_transforms import (
    KeypointsRandomHorizontalFlip,
    KeypointsRandomVerticalFlip,
    KeypointsRandomAffineTransform,
    KeypointsPadIfNeeded,
    KeypointsLongestMaxSize,
)


class TestTransforms(unittest.TestCase):
    def test_keypoints_random_affine(self):
        image = np.random.rand(640, 480, 3)
        mask = np.random.rand(640, 480)
        joints = np.random.randint(0, 480, size=(1, 17, 3))
        joints[..., 2] = 2  # all visible

        aug = KeypointsRandomAffineTransform(min_scale=0.8, max_scale=1.2, max_rotation=30, max_translate=0.5, prob=1, image_pad_value=0, mask_pad_value=0)
        aug_image, aug_mask, aug_joints = aug(image, mask, joints)

        joints_outside_image = (
            (aug_joints[:, :, 0] < 0) | (aug_joints[:, :, 1] < 0) | (aug_joints[:, :, 0] >= aug_image.shape[1]) | (aug_joints[:, :, 1] >= aug_image.shape[0])
        )
        # Ensure that keypoints outside the image are not visible
        self.assertTrue((aug_joints[joints_outside_image, 2] == 0).all())
        self.assertTrue((aug_joints[~joints_outside_image, 2] != 0).all())

    def test_keypoints_horizontal_flip(self):
        image = np.random.rand(640, 480, 3)
        mask = np.random.rand(640, 480)
        joints = np.random.randint(0, 100, size=(1, 17, 3))

        aug = KeypointsRandomHorizontalFlip(flip_index=[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], prob=1)
        aug_image, aug_mask, aug_joints = aug(image, mask, joints)

        np.testing.assert_array_equal(aug_image, image[:, ::-1, :])
        np.testing.assert_array_equal(aug_mask, mask[:, ::-1])
        np.testing.assert_array_equal(image.shape[1] - aug_joints[:, ::-1, 0] - 1, joints[..., 0])
        np.testing.assert_array_equal(aug_joints[:, ::-1, 1], joints[..., 1])
        np.testing.assert_array_equal(aug_joints[:, ::-1, 2], joints[..., 2])

    def test_keypoints_vertical_flip(self):
        image = np.random.rand(640, 480, 3)
        mask = np.random.rand(640, 480)
        joints = np.random.randint(0, 100, size=(1, 17, 3))

        aug = KeypointsRandomVerticalFlip(prob=1)
        aug_image, aug_mask, aug_joints = aug(image, mask, joints)

        np.testing.assert_array_equal(aug_image, image[::-1, :, :])
        np.testing.assert_array_equal(aug_mask, mask[::-1, :])
        np.testing.assert_array_equal(aug_joints[..., 0], joints[..., 0])
        np.testing.assert_array_equal(image.shape[0] - aug_joints[..., 1] - 1, joints[..., 1])
        np.testing.assert_array_equal(aug_joints[..., 2], joints[..., 2])

    def test_keypoints_pad_if_needed(self):
        image = np.random.rand(640, 480, 3)
        mask = np.random.rand(640, 480)
        joints = np.random.randint(0, 100, size=(1, 17, 3))

        aug = KeypointsPadIfNeeded(min_width=768, min_height=768, image_pad_value=0, mask_pad_value=0)
        aug_image, aug_mask, aug_joints = aug(image, mask, joints)

        self.assertEqual(aug_image.shape, (768, 768, 3))
        self.assertEqual(aug_mask.shape, (768, 768))
        np.testing.assert_array_equal(aug_joints, joints)

    def test_keypoints_longest_max_size(self):
        image = np.random.rand(640, 480, 3)
        mask = np.random.rand(640, 480)
        joints = np.random.randint(0, 480, size=(1, 17, 3))

        aug = KeypointsLongestMaxSize(max_height=512, max_width=512)
        aug_image, aug_mask, aug_joints = aug(image, mask, joints)

        self.assertEqual(aug_image.shape[:2], aug_mask.shape[:2])
        self.assertLessEqual(aug_image.shape[0], 512)
        self.assertLessEqual(aug_image.shape[1], 512)

        self.assertTrue((aug_joints[..., 0] < aug_image.shape[1]).all())
        self.assertTrue((aug_joints[..., 1] < aug_image.shape[0]).all())


if __name__ == "__main__":
    unittest.main()