@@ -4,6 +4,7 @@ from super_gradients.training.datasets.data_augmentation import DataAugmentation
 
                             from super_gradients.training.datasets.sg_dataset import ListDataset, DirectoryDataSet
                
 
                             from super_gradients.training.datasets.all_datasets import CLASSIFICATION_DATASETS, OBJECT_DETECTION_DATASETS, \
                
 
                                 SEMANTIC_SEGMENTATION_DATASETS
                
 
                            +from super_gradients.training.datasets.detection_datasets import DetectionDataset, COCODetectionDataset, PascalVOCDetectionDataset
                
 
                             from super_gradients.training.datasets.segmentation_datasets.segmentation_dataset import SegmentationDataSet
                
 
                             from super_gradients.training.datasets.segmentation_datasets.pascal_voc_segmentation import PascalVOC2012SegmentationDataSet
                
 
                             from super_gradients.training.datasets.segmentation_datasets.pascal_aug_segmentation import PascalAUG2012SegmentationDataSet
                
@@ -22,4 +23,5 @@ __all__ = ['DataAugmentation', 'ListDataset', 'DirectoryDataSet', 'CLASSIFICATIO
 
                                        'PascalVOC2012SegmentationDataSetInterface', 'PascalAUG2012SegmentationDataSetInterface',
                
 
                                        'TestYoloDetectionDatasetInterface', 'DetectionTestDatasetInterface', 'ClassificationTestDatasetInterface',
                
 
                                        'SegmentationTestDatasetInterface',
                
 
                            -           'ImageNetDatasetInterface']
                
 
                            +           'ImageNetDatasetInterface',
                
 
                            +           'DetectionDataset', 'COCODetectionDataset', 'PascalVOCDetectionDataset']
                
@@ -3,10 +3,13 @@ from super_gradients.training.datasets.dataset_interfaces.dataset_interface impo
 
                                 ClassificationDatasetInterface, Cifar10DatasetInterface, Cifar100DatasetInterface, \
                
 
                                 ImageNetDatasetInterface, TinyImageNetDatasetInterface, CoCoSegmentationDatasetInterface, \
                
 
                                 PascalAUG2012SegmentationDataSetInterface, PascalVOC2012SegmentationDataSetInterface, \
                
 
                            -    TestYoloDetectionDatasetInterface, SegmentationTestDatasetInterface, DetectionTestDatasetInterface, ClassificationTestDatasetInterface
                
 
                            +    TestYoloDetectionDatasetInterface, SegmentationTestDatasetInterface, DetectionTestDatasetInterface, ClassificationTestDatasetInterface,\
                
 
                            +    CoCoDetectionDatasetInterface, PascalVOCUnifiedDetectionDatasetInterface
                
 
                            +
                
 
                             __all__ = ['DatasetInterface', 'TestDatasetInterface', 'LibraryDatasetInterface', 'ClassificationDatasetInterface', 'Cifar10DatasetInterface',
                
 
                                        'Cifar100DatasetInterface', 'ImageNetDatasetInterface', 'TinyImageNetDatasetInterface',
                
 
                                        'CoCoSegmentationDatasetInterface', 'PascalAUG2012SegmentationDataSetInterface',
                
 
                                        'PascalVOC2012SegmentationDataSetInterface', 'TestYoloDetectionDatasetInterface', 'SegmentationTestDatasetInterface',
                
 
                            -           'DetectionTestDatasetInterface', 'ClassificationTestDatasetInterface']
                
 
                            +           'DetectionTestDatasetInterface', 'ClassificationTestDatasetInterface', 'CoCoDetectionDatasetInterface',
                
 
                            +           'PascalVOCUnifiedDetectionDatasetInterface']
                
@@ -1,36 +1,46 @@
 
                             import os
                
 
                            +
                
 
                             import numpy as np
                
 
                             import torch
                
 
                             import torchvision
                
 
                             import torchvision.datasets as datasets
                
 
                             from torch.utils.data.distributed import DistributedSampler
                
 
                            +from torch.utils.data import ConcatDataset, BatchSampler, DataLoader
                
 
                            +import torchvision.transforms as transforms
                
 
                            +
                
 
                            +from super_gradients.common import DatasetDataInterface
                
 
                            +from super_gradients.common.environment import AWS_ENV_NAME
                
 
                             from super_gradients.common.abstractions.abstract_logger import get_logger
                
 
                            +
                
 
                            +from super_gradients.training import utils as core_utils
                
 
                            +from super_gradients.training.utils.distributed_training_utils import get_local_rank, wait_for_the_master
                
 
                            +
                
 
                            +from super_gradients.training.utils import get_param
                
 
                            +from super_gradients.training.utils.detection_utils import DetectionTargetsFormat
                
 
                            +
                
 
                             from super_gradients.training.datasets import datasets_utils, DataAugmentation
                
 
                            +from super_gradients.training.datasets.datasets_conf import COCO_DETECTION_CLASSES_LIST
                
 
                             from super_gradients.training.datasets.data_augmentation import Lighting, RandomErase
                
 
                            -from super_gradients.training.datasets.datasets_utils import RandomResizedCropAndInterpolation, worker_init_reset_seed
                
 
                            -from super_gradients.training.datasets.detection_datasets.coco_detection import COCODetectionDataset
                
 
                            +from super_gradients.training.datasets.mixup import CollateMixup
                
 
                            +from super_gradients.training.datasets.detection_datasets import COCODetectionDataset, PascalVOCDetectionDataset
                
 
                            +
                
 
                             from super_gradients.training.datasets.samplers.infinite_sampler import InfiniteSampler
                
 
                             from super_gradients.training.datasets.segmentation_datasets import PascalVOC2012SegmentationDataSet, \
                
 
                                 PascalAUG2012SegmentationDataSet, CoCoSegmentationDataSet
                
 
                            -from super_gradients.training import utils as core_utils
                
 
                            -from super_gradients.common import DatasetDataInterface
                
 
                            -from super_gradients.common.environment import AWS_ENV_NAME
                
 
                            -from super_gradients.training.utils.detection_utils import DetectionTargetsFormat
                
 
                            -from super_gradients.training.datasets.mixup import CollateMixup
                
 
                            -from super_gradients.training.exceptions.dataset_exceptions import IllegalDatasetParameterException
                
 
                             from super_gradients.training.datasets.segmentation_datasets.cityscape_segmentation import CityscapesDataset
                
 
                            -from torch.utils.data import BatchSampler, DataLoader
                
 
                            -from super_gradients.training.utils.distributed_training_utils import get_local_rank, wait_for_the_master
                
 
                            -from super_gradients.training.utils import get_param
                
 
                            -import torchvision.transforms as transforms
                
 
                             from super_gradients.training.datasets.segmentation_datasets.supervisely_persons_segmentation import \
                
 
                                 SuperviselyPersonsDataset
                
 
                            +
                
 
                             from super_gradients.training.datasets.samplers.repeated_augmentation_sampler import RepeatAugSampler
                
 
                            -from super_gradients.training.datasets.datasets_conf import COCO_DETECTION_CLASSES_LIST
                
 
                            -from super_gradients.training.transforms.transforms import DetectionMosaic, DetectionMixup, DetectionRandomAffine, DetectionTargetsFormatTransform, \
                
 
                            -    DetectionPaddedRescale, DetectionHSV, DetectionHorizontalFlip
                
 
                            +from super_gradients.training.datasets.datasets_utils import RandomResizedCropAndInterpolation, worker_init_reset_seed
                
 
                            +
                
 
                            +from super_gradients.training.transforms.transforms import DetectionMosaic, DetectionMixup, DetectionRandomAffine,\
                
 
                            +    DetectionTargetsFormatTransform, DetectionPaddedRescale, DetectionHSV, DetectionHorizontalFlip
                
 
                            +
                
 
                            +from super_gradients.training.exceptions.dataset_exceptions import IllegalDatasetParameterException
                
 
                            +
                
 
                             default_dataset_params = {"batch_size": 64, "val_batch_size": 200, "test_batch_size": 200, "dataset_dir": "./data/",
                
 
                                                       "s3_link": None}
                
@@ -683,7 +693,92 @@ class SuperviselyPersonsDatasetInterface(DatasetInterface):
 
                                     self.classes = self.trainset.classes
                
 
                            -class CoCoDetectionDatasetInterface(DatasetInterface):
                
 
                            +class DetectionDatasetInterface(DatasetInterface):
                
 
                            +    def build_data_loaders(self, batch_size_factor=1, num_workers=8, train_batch_size=None, val_batch_size=None,
                
 
                            +                           test_batch_size=None, distributed_sampler: bool = False):
                
 
                            +
                
 
                            +        train_sampler = InfiniteSampler(len(self.trainset), seed=0)
                
 
                            +
                
 
                            +        train_batch_sampler = BatchSampler(
                
 
                            +            sampler=train_sampler,
                
 
                            +            batch_size=self.dataset_params.batch_size,
                
 
                            +            drop_last=False,
                
 
                            +        )
                
 
                            +
                
 
                            +        self.train_loader = DataLoader(self.trainset,
                
 
                            +                                       batch_sampler=train_batch_sampler,
                
 
                            +                                       num_workers=num_workers,
                
 
                            +                                       pin_memory=True,
                
 
                            +                                       worker_init_fn=worker_init_reset_seed,
                
 
                            +                                       collate_fn=self.dataset_params.train_collate_fn)
                
 
                            +
                
 
                            +        if distributed_sampler:
                
 
                            +            sampler = torch.utils.data.distributed.DistributedSampler(self.valset, shuffle=False)
                
 
                            +        else:
                
 
                            +            sampler = torch.utils.data.SequentialSampler(self.valset)
                
 
                            +
                
 
                            +        val_loader = torch.utils.data.DataLoader(self.valset,
                
 
                            +                                                 num_workers=num_workers,
                
 
                            +                                                 pin_memory=True,
                
 
                            +                                                 sampler=sampler,
                
 
                            +                                                 batch_size=self.dataset_params.val_batch_size,
                
 
                            +                                                 collate_fn=self.dataset_params.val_collate_fn)
                
 
                            +
                
 
                            +        self.val_loader = val_loader
                
 
                            +
                
 
                            +
                
 
                            +class PascalVOCUnifiedDetectionDatasetInterface(DetectionDatasetInterface):
                
 
                            +
                
 
                            +    def __init__(self, dataset_params=None):
                
 
                            +        if dataset_params is None:
                
 
                            +            dataset_params = dict()
                
 
                            +        super().__init__(dataset_params=dataset_params)
                
 
                            +
                
 
                            +        self.data_dir = self.dataset_params.data_dir
                
 
                            +        train_input_dim = (self.dataset_params.train_image_size, self.dataset_params.train_image_size)
                
 
                            +        val_input_dim = (self.dataset_params.val_image_size, self.dataset_params.val_image_size)
                
 
                            +        train_max_num_samples = get_param(self.dataset_params, "train_max_num_samples")
                
 
                            +        val_max_num_samples = get_param(self.dataset_params, "val_max_num_samples")
                
 
                            +
                
 
                            +        if self.dataset_params.download:
                
 
                            +            PascalVOCDetectionDataset.download(data_dir=self.data_dir)
                
 
                            +
                
 
                            +        train_dataset_names = ["train2007", "val2007", "train2012", "val2012"]
                
 
                            +        # We divide train_max_num_samples between the datasets
                
 
                            +        if train_max_num_samples:
                
 
                            +            max_num_samples_per_train_dataset = [len(segment) for segment in np.array_split(range(train_max_num_samples), len(train_dataset_names))]
                
 
                            +        else:
                
 
                            +            max_num_samples_per_train_dataset = [None] * len(train_dataset_names)
                
 
                            +        train_sets = [PascalVOCDetectionDataset(data_dir=self.data_dir,
                
 
                            +                                                input_dim=train_input_dim,
                
 
                            +                                                cache=self.dataset_params.cache_train_images,
                
 
                            +                                                cache_path=self.dataset_params.cache_dir + "cache_train",
                
 
                            +                                                transforms=self.dataset_params.train_transforms,
                
 
                            +                                                images_sub_directory='images/' + trainset_name + '/',
                
 
                            +                                                class_inclusion_list=self.dataset_params.class_inclusion_list,
                
 
                            +                                                max_num_samples=max_num_samples_per_train_dataset[i])
                
 
                            +                      for i, trainset_name in enumerate(train_dataset_names)]
                
 
                            +
                
 
                            +        testset2007 = PascalVOCDetectionDataset(data_dir=self.data_dir,
                
 
                            +                                                input_dim=val_input_dim,
                
 
                            +                                                cache=self.dataset_params.cache_val_images,
                
 
                            +                                                cache_path=self.dataset_params.cache_dir + "cache_valid",
                
 
                            +                                                transforms=self.dataset_params.val_transforms,
                
 
                            +                                                images_sub_directory='images/test2007/',
                
 
                            +                                                class_inclusion_list=self.dataset_params.class_inclusion_list,
                
 
                            +                                                max_num_samples=val_max_num_samples)
                
 
                            +
                
 
                            +        self.classes = train_sets[1].classes
                
 
                            +        self.trainset = ConcatDataset(train_sets)
                
 
                            +        self.valset = testset2007
                
 
                            +
                
 
                            +        self.trainset.collate_fn = self.dataset_params.train_collate_fn
                
 
                            +        self.trainset.classes = self.classes
                
 
                            +        self.trainset.img_size = self.dataset_params.train_image_size
                
 
                            +        self.trainset.cache_labels = self.dataset_params.cache_train_images
                
 
                            +
                
 
                            +
                
 
                            +class CoCoDetectionDatasetInterface(DetectionDatasetInterface):
                
 
                                 def __init__(self, dataset_params={}):
                
 
                                     super(CoCoDetectionDatasetInterface, self).__init__(dataset_params=dataset_params)
                
@@ -743,36 +838,4 @@ class CoCoDetectionDatasetInterface(DatasetInterface):
 
                                             cache=self.dataset_params.cache_val_images,
                
 
                                             cache_dir_path=self.dataset_params.cache_dir_path,
                
 
                                             with_crowd=with_crowd)
                
 
                            -
                
 
                            -    def build_data_loaders(self, batch_size_factor=1, num_workers=8, train_batch_size=None, val_batch_size=None,
                
 
                            -                           test_batch_size=None, distributed_sampler: bool = False):
                
 
                            -
                
 
                            -        train_sampler = InfiniteSampler(len(self.trainset), seed=0)
                
 
                            -
                
 
                            -        train_batch_sampler = BatchSampler(
                
 
                            -            sampler=train_sampler,
                
 
                            -            batch_size=self.dataset_params.batch_size,
                
 
                            -            drop_last=False,
                
 
                            -        )
                
 
                            -
                
 
                            -        self.train_loader = DataLoader(self.trainset,
                
 
                            -                                       batch_sampler=train_batch_sampler,
                
 
                            -                                       num_workers=num_workers,
                
 
                            -                                       pin_memory=True,
                
 
                            -                                       worker_init_fn=worker_init_reset_seed,
                
 
                            -                                       collate_fn=self.dataset_params.train_collate_fn)
                
 
                            -
                
 
                            -        if distributed_sampler:
                
 
                            -            sampler = torch.utils.data.distributed.DistributedSampler(self.valset, shuffle=False)
                
 
                            -        else:
                
 
                            -            sampler = torch.utils.data.SequentialSampler(self.valset)
                
 
                            -
                
 
                            -        val_loader = torch.utils.data.DataLoader(self.valset,
                
 
                            -                                                 num_workers=num_workers,
                
 
                            -                                                 pin_memory=True,
                
 
                            -                                                 sampler=sampler,
                
 
                            -                                                 batch_size=self.dataset_params.val_batch_size,
                
 
                            -                                                 collate_fn=self.dataset_params.val_collate_fn)
                
 
                            -
                
 
                            -        self.val_loader = val_loader
                
 
                                     self.classes = COCO_DETECTION_CLASSES_LIST
                
@@ -35,3 +35,7 @@ COCO_DETECTION_CLASSES_LIST = [
 
                                 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
                
 
                                 'teddy bear', 'hair drier', 'toothbrush'
                
 
                             ]
                
 
                            +
                
 
                            +PASCAL_VOC_2012_CLASSES_LIST = [
                
 
                            +    'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
                
 
                            +    'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
                
@@ -1,3 +1,6 @@
 
                             from super_gradients.training.datasets.detection_datasets.coco_detection import COCODetectionDataset
                
 
                            +from super_gradients.training.datasets.detection_datasets.pascal_voc_detection import PascalVOCDetectionDataset
                
 
                            +from super_gradients.training.datasets.detection_datasets.detection_dataset import DetectionDataset
                
 
                            -__all__ = ['COCODetectionDataset']
                
 
                            +
                
 
                            +__all__ = ['COCODetectionDataset', 'DetectionDataset', 'PascalVOCDetectionDataset']
                
 
            import os
from typing import List, Dict, Union, Any, Optional, Tuple
from multiprocessing.pool import ThreadPool
import random
import cv2
import matplotlib.pyplot as plt
from pathlib import Path

import numpy as np
from tqdm import tqdm
from torch.utils.data import Dataset

from super_gradients.training.utils.detection_utils import get_cls_posx_in_target, DetectionTargetsFormat
from super_gradients.common.abstractions.abstract_logger import get_logger
from super_gradients.training.transforms.transforms import DetectionTransform, DetectionTargetsFormatTransform
from super_gradients.training.exceptions.dataset_exceptions import EmptyDatasetException

logger = get_logger(__name__)


class DetectionDataset(Dataset):
    """Detection dataset.

    This is a boilerplate class to facilitate the implementation of datasets.

    HOW TO CREATE A DATASET THAT INHERITS FROM DetectionDataSet ?
        - Inherit from DetectionDataSet
        - implement the method self._load_annotation to return at least the fields "target" and "img_path"
        - Call super().__init__ with the required params.
                //!\\ super().__init__ will call self._load_annotation, so make sure that every required
                      attributes are set up before calling super().__init__ (ideally just call it last)

    WORKFLOW:
        - On instantiation:
            - All annotations are cached. If class_inclusion_list was specified, there is also subclassing at this step.
            - If cache is True, the images are also cached

        - On call (__getitem__) for a specific image index:
            - The image and annotations are grouped together in a dict called SAMPLE
            - the sample is processed according to th transform
            - Only the specified fields are returned by __getitem__

    TERMINOLOGY
        - TARGET:       Groundtruth, made of bboxes. The format can vary from one dataset to another
        - ANNOTATION:   Combination of targets (groundtruth) and metadata of the image, but without the image itself.
                            > Has to include the fields "target" and "img_path"
                            > Can include other fields like "crowd_target", "image_info", "segmentation", ...
        - SAMPLE:       Outout of the dataset:
                            > Has to include the fields "target" and "image"
                            > Can include other fields like "crowd_target", "image_info", "segmentation", ...
        - INDEX:        Refers to the index in the dataset.
        - SAMPLE ID:    Refers to the id of sample before droping any annotaion.
                            Let's imagine a situation where the downloaded data is made of 120 images but 20 were drop
                            because they had no annotation. In that case:
                                > We have 120 samples so sample_id will be between 0 and 119
                                > But only 100 will be indexed so index will be between 0 and 99
                                > Therefore, we also have len(self) = 100
    """

    def __init__(
            self,
            data_dir: str,
            input_dim: tuple,
            original_target_format: DetectionTargetsFormat,
            max_num_samples: int = None,
            cache: bool = False,
            cache_path: str = None,
            transforms: List[DetectionTransform] = [],
            all_classes_list: Optional[List[str]] = None,
            class_inclusion_list: Optional[List[str]] = None,
            ignore_empty_annotations: bool = True,
            target_fields: List[str] = None,
            output_fields: List[str] = None,
    ):
        """Detection dataset.

        :param data_dir:                Where the data is stored
        :param input_dim:               Image size (when loaded, before transforms).
        :param original_target_format:  Format of targets stored on disk. raw data format, the output format might
                                        differ based on transforms.
        :param max_num_samples:         If not None, set the maximum size of the dataset by only indexing the first n annotations/images.
        :param cache:                   Whether to cache images or not.
        :param cache_path:              Path to the directory where cached images will be stored in an optimized format.
        :param transforms:              List of transforms to apply sequentially on sample.
        :param all_classes_list:        All the class names.
        :param class_inclusion_list:    If not None,every class not included will be ignored.
        :param ignore_empty_annotations:        If True and class_inclusion_list not None, images without any target
                                                will be ignored.
        :param target_fields:                   List of the fields target fields. This has to include regular target,
                                                but can also include crowd target, segmentation target, ...
                                                It has to include at least "target" but can include other.
        :paran output_fields:                   Fields that will be outputed by __getitem__.
                                                It has to include at least "image" and "target" but can include other.
        """
        super().__init__()

        self.data_dir = data_dir
        if not Path(data_dir).exists():
            raise FileNotFoundError(f"Please make sure to download the data in the data directory ({self.data_dir}).")

        # Number of images that are avalaible(regardless of ignored images)
        self.n_available_samples = self._setup_data_source()
        if not isinstance(self.n_available_samples, int) or self.n_available_samples < 1:
            raise ValueError(f"_setup_data_source() should return the number of available samples but got {self.n_available_samples}")

        self.input_dim = input_dim
        self.original_target_format = original_target_format
        self.max_num_samples = max_num_samples

        self.all_classes_list = all_classes_list
        self.class_inclusion_list = class_inclusion_list
        self.classes = self.class_inclusion_list or self.all_classes_list
        if len(set(self.classes) - set(all_classes_list)) > 0:
            wrong_classes = set(self.classes) - set(all_classes_list)
            raise ValueError(f"class_inclusion_list includes classes that are not in all_classes_list: {wrong_classes}")

        self.ignore_empty_annotations = ignore_empty_annotations
        self.target_fields = target_fields or ["target"]
        if "target" not in self.target_fields:
            raise KeyError('"target" is expected to be in the fields to subclass but it was not included')

        self.annotations = self._cache_annotations()

        self.cache = cache
        self.cache_path = cache_path
        self.cached_imgs = self._cache_images() if self.cache else None

        self.transforms = transforms

        self.output_fields = output_fields or ["image", "target"]
        if len(self.output_fields) < 2 or self.output_fields[0] != "image" or self.output_fields[1] != "target":
            raise ValueError('output_fields must start with "image" and then "target", followed by any other field')

    def _setup_data_source(self) -> int:
        """Set up the data source and store relevant objects as attributes.

        :return: Number of available samples, (i.e. how many images we have, regardless of any filter we might want to use)"""
        raise NotImplementedError

    def _load_annotation(self, sample_id: int) -> Dict[str, Union[np.ndarray, Any]]:
        """Load annotations associated to a specific sample.
        Please note that the targets should be resized according to self.input_dim!

        :param sample_id:   Id of the sample to load annotations from.
        :return:            Annotation, a dict with any field but has to include at least "target" and "img_path".
        """
        raise NotImplementedError

    def _cache_annotations(self) -> List[Dict[str, Union[np.ndarray, Any]]]:
        """Load all the annotations to memory to avoid opening files back and forth.
        :return: List of annotations
        """
        annotations = []
        for sample_id, img_id in enumerate(tqdm(range(self.n_available_samples), desc="Caching annotations")):

            if self.max_num_samples is not None and len(annotations) >= self.max_num_samples:
                break

            img_annotation = self._load_annotation(img_id)
            if "target" not in img_annotation or "img_path" not in img_annotation:
                raise KeyError('_load_annotation is expected to return at least the field "target" and "img_path"')

            if self.class_inclusion_list is not None:
                img_annotation = self._sub_class_annotation(img_annotation)

            is_annotation_empty = all(len(img_annotation[field]) == 0 for field in self.target_fields)
            if self.ignore_empty_annotations and is_annotation_empty:
                continue
            annotations.append(img_annotation)

        if len(annotations) == 0:
            raise EmptyDatasetException(f"Out of {self.n_available_samples} images, not a single one was found with"
                                        f"any of these classes: {self.class_inclusion_list}")
        return annotations

    def _sub_class_annotation(self, annotation: dict) -> Union[dict, None]:
        """Subclass every field listed in self.target_fields. It could be targets, crowd_targets, ...

        :param annotation: Dict representing the annotation of a specific image
        :return:           Subclassed annotation if non empty after subclassing, otherwise None
        """
        cls_posx = get_cls_posx_in_target(self.original_target_format)
        for field in self.target_fields:
            annotation[field] = self._sub_class_target(targets=annotation[field], cls_posx=cls_posx)
        return annotation

    def _sub_class_target(self, targets: np.ndarray, cls_posx: int) -> np.ndarray:
        """Sublass targets of a specific image.

        :param targets:     Target array to subclass of shape [n_targets, 5], 5 representing a bbox
        :param cls_posx:    Position of the class id in a bbox
                                ex: 0 if bbox of format label_xyxy | -1 if bbox of format xyxy_label
        :return:            Subclassed target
        """
        targets_kept = []
        for target in targets:
            cls_id = int(target[cls_posx])
            cls_name = self.all_classes_list[cls_id]
            if cls_name in self.class_inclusion_list:
                # Replace the target cls_id in self.all_classes_list by cls_id in self.class_inclusion_list
                target[cls_posx] = self.class_inclusion_list.index(cls_name)
                targets_kept.append(target)

        return np.array(targets_kept) if len(targets_kept) > 0 else np.zeros((0, 5), dtype=np.float32)

    def _cache_images(self) -> np.ndarray:
        """Cache the images. The cached image are stored in a file to be loaded faster mext time.
        :return: Cached images
        """
        cache_path = Path(self.cache_path)
        if cache_path is None:
            raise ValueError("You must specify a cache_path if you want to cache your images."
                             "If you did not mean to use cache, please set cache=False ")
        cache_path.mkdir(parents=True, exist_ok=True)

        logger.warning("\n********************************************************************************\n"
                       "You are using cached images in RAM to accelerate training.\n"
                       "This requires large system RAM.\n"
                       "********************************************************************************\n")

        max_h, max_w = self.input_dim[0], self.input_dim[1]
        img_resized_cache_path = cache_path / "img_resized_cache.array"

        if not img_resized_cache_path.exists():
            logger.info("Caching images for the first time.")
            NUM_THREADs = min(8, os.cpu_count())
            loaded_images = ThreadPool(NUM_THREADs).imap(func=lambda x: self._load_image(x), iterable=range(len(self)))

            # Initialize placeholder for images
            cached_imgs = np.memmap(str(img_resized_cache_path), shape=(len(self), max_h, max_w, 3),
                                    dtype=np.uint8, mode="w+")

            # Store images in the placeholder
            loaded_images_pbar = tqdm(enumerate(loaded_images), total=len(self))
            for i, image in loaded_images_pbar:
                cached_imgs[i][: image.shape[0], : image.shape[1], :] = image.copy()
            cached_imgs.flush()
            loaded_images_pbar.close()
        else:
            logger.warning("You are using cached imgs! Make sure your dataset is not changed!!\n"
                           "Everytime the self.input_size is changed in your exp file, you need to delete\n"
                           "the cached data and re-generate them.\n")

        logger.info("Loading cached imgs...")
        cached_imgs = np.memmap(str(img_resized_cache_path), shape=(len(self), max_h, max_w, 3),
                                dtype=np.uint8, mode="r+")
        return cached_imgs

    def _load_resized_img(self, index: int) -> np.ndarray:
        """Load image, and resizes it to self.input_dim
        :param index:   Image index
        :return:        Resized image
        """
        img = self._load_image(index)

        r = min(self.input_dim[0] / img.shape[0], self.input_dim[1] / img.shape[1])
        desired_size = (int(img.shape[1] * r), int(img.shape[0] * r))

        resized_img = cv2.resize(src=img, dsize=desired_size, interpolation=cv2.INTER_LINEAR).astype(np.uint8)
        return resized_img

    def _load_image(self, index: int) -> np.ndarray:
        """Loads image at index with its original resolution.
        :param index:   Image index
        :return:        Image in array format
        """
        img_path = self.annotations[index]["img_path"]

        img_file = os.path.join(img_path)
        img = cv2.imread(img_file)

        if img is None:
            raise FileNotFoundError(f"{img_file} was no found. Please make sure that the dataset was"
                                    f"downloaded and that the path is correct")
        return img

    def __del__(self):
        """Clear the cached images"""
        if hasattr(self, "cached_imgs"):
            del self.cached_imgs

    def __len__(self):
        """Get the length of the dataset."""
        return len(self.annotations)

    def __getitem__(self, index: int) -> Tuple:
        """Get the sample post transforms at a specific index of the dataset.
        The output of this function will be collated to form batches."""
        sample = self.apply_transforms(self.get_sample(index))
        for field in self.output_fields:
            if field not in sample.keys():
                raise KeyError(f'The field {field} must be present in the sample but was not found.'
                               'Please check the output fields of your transforms.')
        return tuple(sample[field] for field in self.output_fields)

    def get_random_item(self):
        return self[self._random_index()]

    def get_sample(self, index: int) -> Dict[str, Union[np.ndarray, Any]]:
        """Get raw sample, before any transform (beside subclassing).
        :param index:   Image index
        :return:        Sample, i.e. a dictionary including at least "image" and "target"
        """
        img = self.get_resized_image(index)
        annotation = self.annotations[index]
        return {"image": img, **annotation}

    def get_resized_image(self, index: int) -> np.ndarray:
        """
        Get the resized image at a specific sample_id, either from cache or by loading from disk, based on self.cached_imgs
        :param index:  Image index
        :return:       Resized image
        """
        if self.cache:
            return self.cached_imgs[index].copy()
        else:
            return self._load_resized_img(index)

    def apply_transforms(self, sample: Dict[str, Union[np.ndarray, Any]]) -> Dict[str, Union[np.ndarray, Any]]:
        """
        Applies self.transforms sequentially to sample

        If a transforms has the attribute 'additional_samples_count', additional samples will be loaded and stored in
         sample["additional_samples"] prior to applying it. Combining with the attribute "non_empty_annotations" will load
         only additional samples with objects in them.

        :param sample: Sample to apply the transforms on to (loaded with self.get_sample)
        :return: Transformed sample
        """
        for transform in self.transforms:
            self._add_additional_inputs_for_transform(sample, transform)
            sample = transform(sample)
            sample.pop("additional_samples")  # additional_samples is not useful after the transform
        return sample

    def _add_additional_inputs_for_transform(self, sample: Dict[str, Union[np.ndarray, Any]],
                                             transform: DetectionTransform):
        """Add additional inputs required by a transform to the sample"""
        additional_samples_count = transform.additional_samples_count if hasattr(transform,
                                                                                 "additional_samples_count") else 0
        non_empty_annotations = transform.non_empty_annotations if hasattr(transform, "non_empty_annotations") else False
        additional_samples = self.get_random_samples(additional_samples_count, non_empty_annotations)
        sample["additional_samples"] = additional_samples

    def get_random_samples(self, count: int,
                           non_empty_annotations_only: bool = False) -> List[Dict[str, Union[np.ndarray, Any]]]:
        """Load random samples.

        :param count: The number of samples wanted
        :param non_empty_annotations_only: If true, only return samples with at least 1 annotation
        :return: A list of samples satisfying input params
        """
        return [self.get_random_sample(non_empty_annotations_only) for _ in range(count)]

    def get_random_sample(self, non_empty_annotations_only: bool = False):
        if non_empty_annotations_only:
            return self.get_sample(self._get_random_non_empty_annotation_available_indexes())
        else:
            return self.get_sample(self._random_index())

    def _get_random_non_empty_annotation_available_indexes(self) -> int:
        """Get the index of a non-empty annotation.
        :return: Image index"""
        target, index = [], -1
        while len(target) == 0:
            index = self._random_index()
            target = self.annotations[index]["target"]
        return index

    def _random_index(self):
        """Get a random index of this dataset"""
        return random.randint(0, len(self) - 1)

    @property
    def output_target_format(self):
        target_format = self.original_target_format
        for transform in self.transforms:
            if isinstance(transform, DetectionTargetsFormatTransform):
                target_format = transform.output_format
        return target_format

    def plot(self, max_samples_per_plot: int = 16, n_plots: int = 1, plot_transformed_data: bool = True):
        """Combine samples of images with bbox into plots and display the result.

            :param max_samples_per_plot:    Maximum number of images to be displayed per plot
            :param n_plots:                 Number of plots to display (each plot being a combination of img with bbox)
            :param plot_transformed_data:   If True, the plot will be over samples after applying transforms (i.e. on __getitem__).
                                            If False, the plot will be over the raw samples (i.e. on get_sample)
            :return:
        """
        plot_counter = 0
        input_format = self.output_target_format if plot_transformed_data else self.original_target_format
        target_format_transform = DetectionTargetsFormatTransform(input_format=input_format,
                                                                  output_format=DetectionTargetsFormat.XYXY_LABEL)

        for plot_i in range(n_plots):
            fig = plt.figure(figsize=(10, 10))
            n_subplot = int(np.ceil(max_samples_per_plot ** 0.5))
            for img_i in range(max_samples_per_plot):
                index = img_i + plot_i * 16

                if plot_transformed_data:
                    image, targets, *_ = self[img_i + plot_i * 16]
                    image = image.transpose(1, 2, 0).astype(np.int32)
                else:
                    sample = self.get_sample(index)
                    image, targets = sample["image"], sample["target"]

                sample = target_format_transform({"image": image, "target": targets})

                # shape = [padding_size x 4] (The dataset will most likely pad the targets to a fixed dim)
                boxes = sample["target"][:, 0:4]

                # shape = [n_box x 4] (We remove padded boxes, which corresponds to boxes with only 0)
                boxes = boxes[(boxes != 0).any(axis=1)]
                plt.subplot(n_subplot, n_subplot, img_i + 1).imshow(image)
                plt.plot(boxes[:, [0, 2, 2, 0, 0]].T, boxes[:, [1, 1, 3, 3, 1]].T, '.-')
                plt.axis('off')
            fig.tight_layout()
            plt.show()
            plt.close()

            plot_counter += 1
            if plot_counter == n_plots:
                return

          
 
            import os
import glob
from pathlib import Path
from xml.etree import ElementTree
from tqdm import tqdm

import numpy as np

from super_gradients.training.utils.utils import download_and_untar_from_url, get_image_size_from_path
from super_gradients.training.datasets.detection_datasets.detection_dataset import DetectionDataset
from super_gradients.training.utils.detection_utils import DetectionTargetsFormat
from super_gradients.common.abstractions.abstract_logger import get_logger
from super_gradients.training.datasets.datasets_conf import PASCAL_VOC_2012_CLASSES_LIST

logger = get_logger(__name__)


class PascalVOCDetectionDataset(DetectionDataset):
    """Dataset for Pascal VOC object detection"""

    def __init__(self, images_sub_directory: str, *args, **kwargs):
        """Dataset for Pascal VOC object detection

        :param images_sub_directory:    Sub directory of data_dir that includes images.
        """
        self.images_sub_directory = images_sub_directory
        self.img_and_target_path_list = None

        kwargs['all_classes_list'] = PASCAL_VOC_2012_CLASSES_LIST
        kwargs['original_target_format'] = DetectionTargetsFormat.XYXY_LABEL
        super().__init__(*args, **kwargs)

    def _setup_data_source(self):
        """Initialize img_and_target_path_list and warn if label file is missing

        :return: List of tuples made of (img_path,target_path)
        """
        img_files_folder = self.data_dir + self.images_sub_directory
        if not Path(img_files_folder).exists():
            raise FileNotFoundError(f"{self.data_dir} does not include {self.images_sub_directory}. "
                                    f"Please make sure that f{self.data_dir} refers to PascalVOC dataset and that "
                                    "it was downloaded using PascalVOCDetectionDataSetV2.download()")

        img_files = glob.glob(img_files_folder + "*.jpg")
        if len(img_files) == 0:
            raise FileNotFoundError(f"No image file found at {img_files_folder}")

        target_files = [img_file.replace("images", "labels").replace(".jpg", ".txt") for img_file in img_files]

        img_and_target_path_list = [(img_file, target_file)
                                    for img_file, target_file in zip(img_files, target_files)
                                    if os.path.exists(target_file)]
        if len(img_and_target_path_list) == 0:
            raise FileNotFoundError("No target file associated to the images was found")

        num_missing_files = len(img_files) - len(img_and_target_path_list)
        if num_missing_files > 0:
            logger.warning(f'{num_missing_files} labels files were not loaded our of {len(img_files)} image files')

        self.img_and_target_path_list = img_and_target_path_list
        return len(self.img_and_target_path_list)

    def _load_annotation(self, sample_id: int) -> dict:
        """Load annotations associated to a specific sample.

        :return: Annotation including:
                    - target in XYXY_LABEL format
                    - img_path
        """
        img_path, target_path = self.img_and_target_path_list[sample_id]
        with open(target_path, 'r') as targets_file:
            target = np.array([x.split() for x in targets_file.read().splitlines()], dtype=np.float32)

        width, height = get_image_size_from_path(img_path)

        # We have to rescale the targets because the images will be rescaled.
        r = min(self.input_dim[1] / height, self.input_dim[0] / width)
        target[:, :4] *= r

        initial_img_shape = (width, height)
        resized_img_shape = (int(width * r), int(height * r))

        return {"img_path": img_path, "target": target,
                "initial_img_shape": initial_img_shape, "resized_img_shape": resized_img_shape}

    @staticmethod
    def download(data_dir: str):
        """Download Pascal dataset in XYXY_LABEL format.

        Data extracted form http://host.robots.ox.ac.uk/pascal/VOC/
        """
        def _parse_and_save_labels(path, new_label_path, year, image_id):
            """Parse and save the labels of an image in XYXY_LABEL format."""

            with open(f'{path}/VOC{year}/Annotations/{image_id}.xml') as f:
                xml_parser = ElementTree.parse(f).getroot()

            labels = []
            for obj in xml_parser.iter('object'):
                cls = obj.find('name').text
                if cls in PASCAL_VOC_2012_CLASSES_LIST and not int(obj.find('difficult').text) == 1:
                    xml_box = obj.find('bndbox')

                    def get_coord(box_coord):
                        return xml_box.find(box_coord).text

                    xmin, ymin, xmax, ymax = get_coord("xmin"), get_coord("ymin"), get_coord("xmax"), get_coord("ymax")
                    labels.append(" ".join([xmin, ymin, xmax, ymax, str(PASCAL_VOC_2012_CLASSES_LIST.index(cls))]))

            with open(new_label_path, 'w') as f:
                f.write("\n".join(labels))

        urls = ["http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar",  # 439M 5011 images
                "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar",  # 430M, 4952 images
                "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"]  # 1.86G, 17125 images
        data_dir = Path(data_dir)
        download_and_untar_from_url(urls, dir=data_dir / 'images')

        # Convert
        data_path = data_dir / 'images' / 'VOCdevkit'
        for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
            dest_imgs_path = data_dir / 'images' / f'{image_set}{year}'
            dest_imgs_path.mkdir(exist_ok=True, parents=True)

            dest_labels_path = data_dir / 'labels' / f'{image_set}{year}'
            dest_labels_path.mkdir(exist_ok=True, parents=True)

            with open(data_path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
                image_ids = f.read().strip().split()

            for id in tqdm(image_ids, desc=f'{image_set}{year}'):
                img_path = data_path / f'VOC{year}/JPEGImages/{id}.jpg'
                new_img_path = dest_imgs_path / img_path.name
                new_label_path = (dest_labels_path / img_path.name).with_suffix('.txt')
                img_path.rename(new_img_path)  # Move image to dest folder
                _parse_and_save_labels(data_path, new_label_path, year, id)

          
@@ -12,6 +12,19 @@ class IllegalDatasetParameterException(Exception):
 
                                     super().__init__(self.message)
                
 
                            +class EmptyDatasetException(Exception):
                
 
                            +    """
                
 
                            +    Exception raised when a dataset does not have any image for a specific config
                
 
                            +
                
 
                            +    Attributes:
                
 
                            +        message -- explanation of the error
                
 
                            +    """
                
 
                            +
                
 
                            +    def __init__(self, desc):
                
 
                            +        self.message = "Empty Dataset: " + desc
                
 
                            +        super().__init__(self.message)
                
 
                            +
                
 
                            +
                
 
                             class UnsupportedBatchItemsFormat(ValueError):
                
 
                                 """Exception raised illegal batch items returned from data loader.
                
@@ -6,13 +6,13 @@ from enum import Enum
 
                             from typing import Callable, List, Union, Tuple, Optional, Dict
                
 
                             import cv2
                
 
                            -from torch.utils.data._utils.collate import default_collate
                
 
                             import matplotlib.pyplot as plt
                
 
                            +import numpy as np
                
 
                             import torch
                
 
                             import torchvision
                
 
                            -import numpy as np
                
 
                             from torch import nn
                
 
                            +from torch.utils.data._utils.collate import default_collate
                
 
                             from omegaconf import ListConfig
                
@@ -35,6 +35,21 @@ class DetectionTargetsFormat(Enum):
 
                                 NORMALIZED_CXCYWH_LABEL = "NORMALIZED_CXCYWH_LABEL"
                
 
                            +def get_cls_posx_in_target(target_format: DetectionTargetsFormat) -> int:
                
 
                            +    """Get the label of a given target
                
 
                            +    :param target_format:   Representation of the target (ex: LABEL_XYXY)
                
 
                            +    :return:                Position of the class id in a bbox
                
 
                            +                                ex: 0 if bbox of format label_xyxy | -1 if bbox of format xyxy_label
                
 
                            +    """
                
 
                            +    format_split = target_format.value.split("_")
                
 
                            +    if format_split[0] == "LABEL":
                
 
                            +        return 0
                
 
                            +    elif format_split[-1] == "LABEL":
                
 
                            +        return -1
                
 
                            +    else:
                
 
                            +        raise NotImplementedError(f"No implementation to find index of LABEL in {target_format.value}")
                
 
                            +
                
 
                            +
                
 
                             def _set_batch_labels_index(labels_batch):
                
 
                                 for i, labels in enumerate(labels_batch):
                
 
                                     labels[:, 0] = i
                
@@ -1,20 +1,28 @@
 
                             import math
                
 
                             import time
                
 
                            +from functools import lru_cache
                
 
                             from pathlib import Path
                
 
                            -from typing import Mapping, Optional, Tuple, Union
                
 
                            +from typing import Mapping, Optional, Tuple, Union, List
                
 
                             from zipfile import ZipFile
                
 
                             import os
                
 
                             from jsonschema import validate
                
 
                            +import tarfile
                
 
                            +from PIL import Image, ExifTags
                
 
                             import torch
                
 
                             import torch.nn as nn
                
 
                            +
                
 
                             # These functions changed from torch 1.2 to torch 1.3
                
 
                             import random
                
 
                             import numpy as np
                
 
                             from importlib import import_module
                
 
                            +from super_gradients.common.abstractions.abstract_logger import get_logger
                
 
                            +
                
 
                            +logger = get_logger(__name__)
                
 
                            +
                
 
                             def convert_to_tensor(array):
                
 
                                 """Converts numpy arrays and lists to Torch tensors before calculation losses
                
@@ -338,6 +346,35 @@ def download_and_unzip_from_url(url, dir='.', unzip=True, delete=True):
 
                                     download_one(u, dir)
                
 
                            +def download_and_untar_from_url(urls: List[str], dir: Union[str, Path] = '.'):
                
 
                            +    """
                
 
                            +    Download a file from url and untar.
                
 
                            +
                
 
                            +    :param urls:    Url to download the file from.
                
 
                            +    :param dir:     Destination directory.
                
 
                            +    """
                
 
                            +    dir = Path(dir)
                
 
                            +    dir.mkdir(parents=True, exist_ok=True)
                
 
                            +
                
 
                            +    for url in urls:
                
 
                            +        url_path = Path(url)
                
 
                            +        filepath = dir / url_path.name
                
 
                            +
                
 
                            +        if url_path.is_file():
                
 
                            +            url_path.rename(filepath)
                
 
                            +        elif not filepath.exists():
                
 
                            +            logger.info(f'Downloading {url} to {filepath}...')
                
 
                            +            torch.hub.download_url_to_file(url, str(filepath), progress=True)
                
 
                            +
                
 
                            +        modes = {".tar.gz": "r:gz", ".tar": "r:"}
                
 
                            +        assert filepath.suffix in modes.keys(), f"{filepath} has {filepath.suffix} suffix which is not supported"
                
 
                            +
                
 
                            +        logger.info(f'Extracting to {dir}...')
                
 
                            +        with tarfile.open(filepath, mode=modes[filepath.suffix]) as f:
                
 
                            +            f.extractall(dir)
                
 
                            +        filepath.unlink()
                
 
                            +
                
 
                            +
                
 
                             def make_divisible(x: int, divisor: int, ceil: bool = True) -> int:
                
 
                                 """
                
 
                                 Returns x evenly divisible by divisor.
                
@@ -362,3 +399,43 @@ def check_img_size_divisibility(img_size: int, stride: int = 32) -> Tuple[bool,
 
                                     return False, (new_size, make_divisible(img_size, int(stride), ceil=False))
                
 
                                 else:
                
 
                                     return True, None
                
 
                            +
                
 
                            +
                
 
                            +@lru_cache(None)
                
 
                            +def get_orientation_key() -> int:
                
 
                            +    """Get the orientation key according to PIL, which is useful to get the image size for instance
                
 
                            +    :return: Orientation key according to PIL"""
                
 
                            +    for key, value in ExifTags.TAGS.items():
                
 
                            +        if value == 'Orientation':
                
 
                            +            return key
                
 
                            +
                
 
                            +
                
 
                            +def exif_size(image: Image) -> Tuple[int, int]:
                
 
                            +    """Get the size of image.
                
 
                            +    :param image:   The image to get size from
                
 
                            +    :return:        (width, height)
                
 
                            +    """
                
 
                            +
                
 
                            +    orientation_key = get_orientation_key()
                
 
                            +
                
 
                            +    image_size = image.size
                
 
                            +    try:
                
 
                            +        exif_data = image._getexif()
                
 
                            +        if exif_data is not None:
                
 
                            +            rotation = dict(exif_data.items())[orientation_key]
                
 
                            +            # ROTATION 270
                
 
                            +            if rotation == 6:
                
 
                            +                image_size = (image_size[1], image_size[0])
                
 
                            +            # ROTATION 90
                
 
                            +            elif rotation == 8:
                
 
                            +                image_size = (image_size[1], image_size[0])
                
 
                            +    except Exception as ex:
                
 
                            +        print('Caught Exception trying to rotate: ' + str(image) + str(ex))
                
 
                            +    height, width = image_size
                
 
                            +    return width, height
                
 
                            +
                
 
                            +
                
 
                            +def get_image_size_from_path(img_path: str) -> Tuple[int, int]:
                
 
                            +    """Get the image size of an image at a specific path"""
                
 
                            +    with open(img_path, 'rb') as f:
                
 
                            +        return exif_size(Image.open(f))