@@ -1,17 +1,16 @@
 
                             # SSD MobileNetV2 Detection training on CoCo2017 Dataset:
                
 
                            -# Trained in 320x320 mAP@0.5@0.95 (COCO API, confidence 0.001, IoU threshold 0.6, test on 320x320 images) ~21.5
                
 
                            -# Checkpoint path: https://deci-pretrained-models.s3.amazonaws.com/ssd_lite_mobilenet_v2/ckpt_best_anchors.pth
                
 
                            +# Trained in 320x320 mAP@0.5@0.95 (COCO API, confidence 0.001, IoU threshold 0.6, test on 320x320 images) ~20.52
                
 
                            +# Checkpoint path: https://deci-pretrained-models.s3.amazonaws.com/ssd_lite_mobilenet_v2_coco_res320_new_coco_filtered_affine_scale_5_15_no_mosaic/ckpt_best.pth
                
 
                             # (trained with stride_16_plus_big)
                
 
                            -# Hardware: 4 NVIDIA RTX A5000
                
 
                            -# Training time: ±16 hours
                
 
                            +# Hardware: 8 NVIDIA RTX 3090
                
 
                            +# Training time: ±17 hours
                
 
                             # Instructions:
                
 
                             # Set the PYTHONPATH environment variable: (Replace "YOUR_LOCAL_PATH" with the path to the downloaded repo):
                
 
                            -#   export PYTHONPATH="YOUR_LOCAL_PATH"/super_gradients/
                
 
                            -
                
 
                            +# export PYTHONPATH="YOUR_LOCAL_PATH"/super_gradients/src:"YOUR_LOCAL_PATH"/super_gradients/
                
 
                            +#
                
 
                             # Run with:
                
 
                            -# python3 -m torch.distributed.launch --nproc_per_node=4 train_from_recipe_example/train_from_recipe.py \
                
 
                            -# --config-name=coco_ssd_lite_mobilenet_v2.yaml
                
 
                            +# python3 -m torch.distributed.launch --nproc_per_node=8 train_from_recipe.py --config-name=coco2017_ssd_lite_mobilenet_v2.yaml
                
 
                             # NOTE:
                
@@ -23,20 +22,13 @@
 
                             defaults:
                
 
                            -  - training_hyperparams: default_train_params
                
 
                            -  - dataset_params: coco_detection_dataset_params
                
 
                            +  - training_hyperparams: coco2017_ssd_lite_mobilenet_v2_train_params
                
 
                            +  - dataset_params: coco_detection_ssd_lite_mobilenet_v2_dataset_params
                
 
                               - arch_params: default_arch_params
                
 
                               - checkpoint_params: default_checkpoint_params
                
 
                               - anchors: ssd_anchors
                
 
                             architecture: ssd_lite_mobilenet_v2
                
 
                            -project_name: SSD_Mobile
                
 
                            -
                
 
                            -dataset_params:
                
 
                            -  batch_size: 64
                
 
                            -  val_batch_size: 64
                
 
                            -  val_image_size: 320
                
 
                            -  train_image_size: 320
                
 
                             data_loader_num_workers: 8
                
 
                             model_checkpoints_location: local
                
@@ -47,7 +39,7 @@ sg_model:
 
                               _target_: super_gradients.SgModel
                
 
                               experiment_name: ${experiment_name}
                
 
                               model_checkpoints_location: ${model_checkpoints_location}
                
 
                            -  multi_gpu: AUTO
                
 
                            +  multi_gpu: DDP
                
 
                             anchors_resolution: ${dataset_params.val_image_size}x${dataset_params.val_image_size}
                
 
                             anchors_name: stride_16_plus_big
                
@@ -58,40 +50,12 @@ arch_params:
 
                               anchors: ${dboxes}
                
 
                             dataset_interface:
                
 
                            -  coco2017_detection:
                
 
                            +  coco2017_detection_v2:
                
 
                                 dataset_params: ${dataset_params}
                
 
                             training_hyperparams:
                
 
                            -  ema: True
                
 
                            -  anchors_name: ${anchors_name}
                
 
                            -  max_epochs: 400
                
 
                            -  lr_mode: cosine
                
 
                            -  cosine_final_lr_ratio: 0.01
                
 
                            -  batch_accumulate: 1
                
 
                            -  initial_lr: 0.02
                
 
                            -  loss: ssd_loss
                
 
                            -  loss_logging_items_names: [ smooth_l1, closs, Loss ]
                
 
                               criterion_params:
                
 
                                 alpha: 1.0
                
 
                                 dboxes: ${dboxes}
                
 
                            -  optimizer: SGD
                
 
                            -  optimizer_params:
                
 
                            -    momentum: 0.9
                
 
                            -    weight_decay: 0.0005
                
 
                            -    nesterov: True
                
 
                            -  lr_warmup_epochs: 3
                
 
                            -  warmup_momentum: 0.8
                
 
                            -  warmup_initial_lr: 1e-06
                
 
                            -  warmup_bias_lr: 0.1
                
 
                            -
                
 
                            -  valid_metrics_list:
                
 
                            -    - _target_: super_gradients.training.metrics.DetectionMetrics
                
 
                            -      post_prediction_callback:
                
 
                            -        _target_: super_gradients.training.utils.ssd_utils.SSDPostPredictCallback
                
 
                            -        conf: 0.001
                
 
                            -        iou: 0.6
                
 
                            -      num_cls: ${arch_params.num_classes}
                
 
                            -  metric_to_watch: 'mAP@0.50:0.95'
                
 
                            -  greater_metric_to_watch_is_better: True
                
 
            defaults:
  - coco_detection_yolox_dataset_params

batch_size: 32
val_batch_size: 16
val_image_size: 320
train_image_size: 320

mixup_prob: 0.
degrees: 0.
shear: 0.
flip_prob: 0.5
hsv_prob: 1.0
hgain: 5
sgain: 30
vgain: 30
mosaic_scale: [0.5, 1.5]
mosaic_prob: 0.
translate: 0.1
targets_format:
  _target_: super_gradients.training.utils.detection_utils.DetectionTargetsFormat
  value: LABEL_NORMALIZED_CXCYWH
filter_box_candidates: True

          
@@ -22,6 +22,10 @@ mosaic_scale: [0.1, 2] # random rescale range (keeps size by padding/cropping) a
 
                             mixup_scale: [0.5, 1.5] # random rescale range for the additional sample in mixup
                
 
                             mosaic_prob: 1. # probability to apply mosaic
                
 
                             translate: 0.1 # image translation fraction
                
 
                            +filter_box_candidates: False # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
                
 
                            +wh_thr: 2 # edge size threshold when filter_box_candidates = True (pixels)
                
 
                            +ar_thr: 20 # aspect ratio threshold when filter_box_candidates = True
                
 
                            +area_thr: 0.1 # threshold for area ratio between original image and the transformed one, when when filter_box_candidates = True
                
 
                             targets_format:
                
 
                               _target_: super_gradients.training.utils.detection_utils.DetectionTargetsFormat # targets format
                
 
            ema: True
max_epochs: 400
lr_mode: cosine
cosine_final_lr_ratio: 0.01
batch_accumulate: 1
initial_lr: 0.02
loss: ssd_loss
loss_logging_items_names: [ smooth_l1, closs, Loss ]
criterion_params:
  alpha: 1.0
  dboxes: # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.

optimizer: SGD
optimizer_params:
  momentum: 0.9
  weight_decay: 0.0005
  nesterov: True
lr_warmup_epochs: 3
warmup_momentum: 0.8
warmup_initial_lr: 1e-06
warmup_bias_lr: 0.1

valid_metrics_list:
  - DetectionMetrics:
      post_prediction_callback:
        _target_: super_gradients.training.utils.ssd_utils.SSDPostPredictCallback
        conf: 0.001
        iou: 0.6
      num_cls: 80

metric_to_watch: 'mAP@0.50:0.95'
greater_metric_to_watch_is_better: True

          
@@ -916,7 +916,11 @@ class CocoDetectionDatasetInterfaceV2(DatasetInterface):
 
                                                                               translate=self.dataset_params.translate,
                
 
                                                                               scales=self.dataset_params.mosaic_scale,
                
 
                                                                               shear=self.dataset_params.shear,
                
 
                            -                                                  target_size=train_input_dim
                
 
                            +                                                  target_size=train_input_dim,
                
 
                            +                                                  filter_box_candidates=self.dataset_params.filter_box_candidates,
                
 
                            +                                                  wh_thr=self.dataset_params.wh_thr,
                
 
                            +                                                  area_thr=self.dataset_params.area_thr,
                
 
                            +                                                  ar_thr=self.dataset_params.ar_thr
                
 
                                                                               ),
                
 
                                                         DetectionMixup(input_dim=train_input_dim,
                
 
                                                                        mixup_scale=self.dataset_params.mixup_scale,
                
@@ -20,7 +20,9 @@ MODEL_URLS = {"regnetY800_imagenet": "https://deci-pretrained-models.s3.amazonaw
 
                                           "stdc2_seg50_cityscapes": "https://deci-pretrained-models.s3.amazonaws.com/cityscapes_stdc2_seg50_dice_edge/ckpt_best.pth",
                
 
                                           "stdc2_seg75_cityscapes": "https://deci-pretrained-models.s3.amazonaws.com/stdc2_seg75_cityscapes/ckpt_best.pth",
                
 
                                           "efficientnet_b0_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/efficientnet_b0/average_model-3.pth",
                
 
                            -              "ssd_lite_mobilenet_v2_coco": "https://deci-pretrained-models.s3.amazonaws.com/ssd_lite_mobilenet_v2/ckpt_best_anchors.pth",
                
 
                            +              "ssd_lite_mobilenet_v2_coco":
                
 
                            +                  "https://deci-pretrained-models.s3.amazonaws.com/"
                
 
                            +                  "ssd_lite_mobilenet_v2_coco_res320_new_coco_filtered_affine_scale_5_15_no_mosaic/ckpt_best.pth",
                
 
                                           "ssd_mobilenet_v1_coco": "https://deci-pretrained-models.s3.amazonaws.com/ssd_mobilenet_v1_coco_res320/ckpt_best.pth",
                
 
                                           "mobilenet_v3_large_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/mobilenetv3+large+300epoch/average_model.pth",
                
@@ -135,7 +135,7 @@ class SSDPostPredictCallback(DetectionPostPredictionCallback):
 
                                     nms_input = predictions[0]
                
 
                                     if self.nms_type == NMS_Type.ITERATIVE:
                
 
                                         nms_res = non_max_suppression(nms_input, conf_thres=self.conf, iou_thres=self.iou,
                
 
                            -                                          multi_label_per_box=self.multi_label_per_box)
                
 
                            +                                          multi_label_per_box=self.multi_label_per_box, with_confidence=True)
                
 
                                     else:
                
 
                                         nms_res = matrix_non_max_suppression(nms_input, conf_thres=self.conf,
                
 
                                                                              max_num_of_detections=self.max_predictions)
                
@@ -4,7 +4,7 @@ from super_gradients.training import MultiGPUMode
 
                             from super_gradients.training import SgModel
                
 
                             from super_gradients.training.datasets.dataset_interfaces.dataset_interface import ImageNetDatasetInterface, \
                
 
                                 ClassificationTestDatasetInterface, CityscapesDatasetInterface, SegmentationTestDatasetInterface, \
                
 
                            -    CoCoSegmentationDatasetInterface, CoCoDetectionDatasetInterface, DetectionTestDatasetInterface
                
 
                            +    CoCoSegmentationDatasetInterface, DetectionTestDatasetInterface
                
 
                             from super_gradients.training.utils.segmentation_utils import coco_sub_classes_inclusion_tuples_list
                
 
                             from super_gradients.training.metrics import Accuracy, IoU
                
 
                             import os
                
@@ -13,7 +13,6 @@ from super_gradients.training.utils.ssd_utils import SSDPostPredictCallback
 
                             from super_gradients.training.models.detection_models.ssd import DEFAULT_SSD_LITE_MOBILENET_V2_ARCH_PARAMS
                
 
                             import torchvision.transforms as transforms
                
 
                             from super_gradients.training.losses.ddrnet_loss import DDRNetLoss
                
 
                            -from super_gradients.training.utils.detection_utils import crowd_detection_collate_fn
                
 
                             from super_gradients.training.metrics import DetectionMetrics
                
 
                             from super_gradients.training.transforms.transforms import Rescale
                
 
                             from super_gradients.training.losses.stdc_loss import STDCLoss
                
@@ -111,22 +110,49 @@ class PretrainedModelsTest(unittest.TestCase):
 
                                                                                                  "cache_train_images": False,
                
 
                                                                                                  "cache_val_images": False,
                
 
                                                                                                  "targets_format": DetectionTargetsFormat.LABEL_CXCYWH,
                
 
                            -                                                                     "with_crowd": True
                
 
                            +                                                                     "with_crowd": True,
                
 
                            +                                                                     "filter_box_candidates": False,
                
 
                            +                                                                     "wh_thr": 0,
                
 
                            +                                                                     "ar_thr": 0,
                
 
                            +                                                                     "area_thr": 0
                
 
                                                                                                  }),
                
 
                            -            'ssd_mobilenet': CoCoDetectionDatasetInterface(
                
 
                            -                dataset_params={
                
 
                            -                    "batch_size": 32,
                
 
                            -                    "val_batch_size": 32,
                
 
                            -                    "train_image_size": 320,
                
 
                            -                    "val_image_size": 320,
                
 
                            -                    "val_collate_fn": crowd_detection_collate_fn,
                
 
                            -                    "val_sample_loading_method": "default",
                
 
                            -                    "with_crowd": True
                
 
                            -                }
                
 
                            -            ),
                
 
                            +
                
 
                            +            'ssd_mobilenet': CocoDetectionDatasetInterfaceV2(dataset_params={"data_dir": "/data/coco",
                
 
                            +                                                                             "train_subdir": "images/train2017",
                
 
                            +                                                                             "val_subdir": "images/val2017",
                
 
                            +                                                                             "train_json_file": "instances_train2017.json",
                
 
                            +                                                                             "val_json_file": "instances_val2017.json",
                
 
                            +                                                                             "batch_size": 16,
                
 
                            +                                                                             "val_batch_size": 128,
                
 
                            +                                                                             "val_image_size": 320,
                
 
                            +                                                                             "train_image_size": 320,
                
 
                            +                                                                             "hgain": 5,
                
 
                            +                                                                             "sgain": 30,
                
 
                            +                                                                             "vgain": 30,
                
 
                            +                                                                             "mixup_prob": .0,
                
 
                            +                                                                             "degrees": 0.,
                
 
                            +                                                                             "shear": 0.,
                
 
                            +                                                                             "flip_prob": 0.,
                
 
                            +                                                                             "hsv_prob": 0.,
                
 
                            +                                                                             "mosaic_scale": [0.5, 1.5],
                
 
                            +                                                                             "mixup_scale": [0.5, 1.5],
                
 
                            +                                                                             "mosaic_prob": 1.,
                
 
                            +                                                                             "translate": 0.1,
                
 
                            +                                                                             "val_collate_fn": CrowdDetectionCollateFN(),
                
 
                            +                                                                             "train_collate_fn": DetectionCollateFN(),
                
 
                            +                                                                             "cache_dir_path": None,
                
 
                            +                                                                             "cache_train_images": False,
                
 
                            +                                                                             "cache_val_images": False,
                
 
                            +                                                                             "targets_format": DetectionTargetsFormat.LABEL_NORMALIZED_CXCYWH,
                
 
                            +                                                                             "with_crowd": True,
                
 
                            +                                                                             "filter_box_candidates": True,
                
 
                            +                                                                             "wh_thr": 2,
                
 
                            +                                                                             "ar_thr": 20,
                
 
                            +                                                                             "area_thr": 0.1
                
 
                            +                                                                             })
                
 
                                     }
                
 
                            -        self.coco_pretrained_maps = {'ssd_lite_mobilenet_v2': 0.215,
                
 
                            +        self.coco_pretrained_maps = {'ssd_lite_mobilenet_v2': 0.2052,
                
 
                                                                  'coco_ssd_mobilenet_v1': 0.243,
                
 
                                                                  "yolox_s": 0.4047,
                
 
                                                                  "yolox_m": 0.4640,
                
@@ -514,8 +540,7 @@ class PretrainedModelsTest(unittest.TestCase):
 
                                                         checkpoint_params=self.coco_pretrained_ckpt_params)
                
 
                                     ssd_post_prediction_callback = SSDPostPredictCallback()
                
 
                                     res = trainer.test(test_loader=self.coco_dataset['ssd_mobilenet'].val_loader,
                
 
                            -                           test_metrics_list=[DetectionMetrics(post_prediction_callback=ssd_post_prediction_callback,
                
 
                            -                                                                 num_cls=len(self.coco_dataset['ssd_mobilenet'].coco_classes))],
                
 
                            +                           test_metrics_list=[DetectionMetrics(post_prediction_callback=ssd_post_prediction_callback, num_cls=80)],
                
 
                                                        metrics_progress_verbose=True)[2]
                
 
                                     self.assertAlmostEqual(res, self.coco_pretrained_maps["ssd_lite_mobilenet_v2"], delta=0.001)