Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#292 Feature/sg 172 reproduce ssd with new coco dataset interface

Merged
Shay Aharon merged 1 commits into Deci-AI:master from deci-ai:feature/SG-000_reproduce_ssd_with_new_coco_2nd_try
@@ -1,17 +1,16 @@
 # SSD MobileNetV2 Detection training on CoCo2017 Dataset:
 # SSD MobileNetV2 Detection training on CoCo2017 Dataset:
-# Trained in 320x320 mAP@0.5@0.95 (COCO API, confidence 0.001, IoU threshold 0.6, test on 320x320 images) ~21.5
-# Checkpoint path: https://deci-pretrained-models.s3.amazonaws.com/ssd_lite_mobilenet_v2/ckpt_best_anchors.pth
+# Trained in 320x320 mAP@0.5@0.95 (COCO API, confidence 0.001, IoU threshold 0.6, test on 320x320 images) ~20.52
+# Checkpoint path: https://deci-pretrained-models.s3.amazonaws.com/ssd_lite_mobilenet_v2_coco_res320_new_coco_filtered_affine_scale_5_15_no_mosaic/ckpt_best.pth
 # (trained with stride_16_plus_big)
 # (trained with stride_16_plus_big)
-# Hardware: 4 NVIDIA RTX A5000
-# Training time: ±16 hours
+# Hardware: 8 NVIDIA RTX 3090
+# Training time: ±17 hours
 
 
 # Instructions:
 # Instructions:
 # Set the PYTHONPATH environment variable: (Replace "YOUR_LOCAL_PATH" with the path to the downloaded repo):
 # Set the PYTHONPATH environment variable: (Replace "YOUR_LOCAL_PATH" with the path to the downloaded repo):
-#   export PYTHONPATH="YOUR_LOCAL_PATH"/super_gradients/
-
+# export PYTHONPATH="YOUR_LOCAL_PATH"/super_gradients/src:"YOUR_LOCAL_PATH"/super_gradients/
+#
 # Run with:
 # Run with:
-# python3 -m torch.distributed.launch --nproc_per_node=4 train_from_recipe_example/train_from_recipe.py \
-# --config-name=coco_ssd_lite_mobilenet_v2.yaml
+# python3 -m torch.distributed.launch --nproc_per_node=8 train_from_recipe.py --config-name=coco2017_ssd_lite_mobilenet_v2.yaml
 
 
 
 
 # NOTE:
 # NOTE:
@@ -23,20 +22,13 @@
 
 
 
 
 defaults:
 defaults:
-  - training_hyperparams: default_train_params
-  - dataset_params: coco_detection_dataset_params
+  - training_hyperparams: coco2017_ssd_lite_mobilenet_v2_train_params
+  - dataset_params: coco_detection_ssd_lite_mobilenet_v2_dataset_params
   - arch_params: default_arch_params
   - arch_params: default_arch_params
   - checkpoint_params: default_checkpoint_params
   - checkpoint_params: default_checkpoint_params
   - anchors: ssd_anchors
   - anchors: ssd_anchors
 
 
 architecture: ssd_lite_mobilenet_v2
 architecture: ssd_lite_mobilenet_v2
-project_name: SSD_Mobile
-
-dataset_params:
-  batch_size: 64
-  val_batch_size: 64
-  val_image_size: 320
-  train_image_size: 320
 
 
 data_loader_num_workers: 8
 data_loader_num_workers: 8
 model_checkpoints_location: local
 model_checkpoints_location: local
@@ -47,7 +39,7 @@ sg_model:
   _target_: super_gradients.SgModel
   _target_: super_gradients.SgModel
   experiment_name: ${experiment_name}
   experiment_name: ${experiment_name}
   model_checkpoints_location: ${model_checkpoints_location}
   model_checkpoints_location: ${model_checkpoints_location}
-  multi_gpu: AUTO
+  multi_gpu: DDP
 
 
 anchors_resolution: ${dataset_params.val_image_size}x${dataset_params.val_image_size}
 anchors_resolution: ${dataset_params.val_image_size}x${dataset_params.val_image_size}
 anchors_name: stride_16_plus_big
 anchors_name: stride_16_plus_big
@@ -58,40 +50,12 @@ arch_params:
   anchors: ${dboxes}
   anchors: ${dboxes}
 
 
 dataset_interface:
 dataset_interface:
-  coco2017_detection:
+  coco2017_detection_v2:
     dataset_params: ${dataset_params}
     dataset_params: ${dataset_params}
 
 
 training_hyperparams:
 training_hyperparams:
-  ema: True
-  anchors_name: ${anchors_name}
-  max_epochs: 400
-  lr_mode: cosine
-  cosine_final_lr_ratio: 0.01
-  batch_accumulate: 1
-  initial_lr: 0.02
-  loss: ssd_loss
-  loss_logging_items_names: [ smooth_l1, closs, Loss ]
   criterion_params:
   criterion_params:
     alpha: 1.0
     alpha: 1.0
     dboxes: ${dboxes}
     dboxes: ${dboxes}
 
 
-  optimizer: SGD
-  optimizer_params:
-    momentum: 0.9
-    weight_decay: 0.0005
-    nesterov: True
-  lr_warmup_epochs: 3
-  warmup_momentum: 0.8
-  warmup_initial_lr: 1e-06
-  warmup_bias_lr: 0.1
-
-  valid_metrics_list:
-    - _target_: super_gradients.training.metrics.DetectionMetrics
-      post_prediction_callback:
-        _target_: super_gradients.training.utils.ssd_utils.SSDPostPredictCallback
-        conf: 0.001
-        iou: 0.6
-      num_cls: ${arch_params.num_classes}
 
 
-  metric_to_watch: 'mAP@0.50:0.95'
-  greater_metric_to_watch_is_better: True
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
  1. defaults:
  2. - coco_detection_yolox_dataset_params
  3. batch_size: 32
  4. val_batch_size: 16
  5. val_image_size: 320
  6. train_image_size: 320
  7. mixup_prob: 0.
  8. degrees: 0.
  9. shear: 0.
  10. flip_prob: 0.5
  11. hsv_prob: 1.0
  12. hgain: 5
  13. sgain: 30
  14. vgain: 30
  15. mosaic_scale: [0.5, 1.5]
  16. mosaic_prob: 0.
  17. translate: 0.1
  18. targets_format:
  19. _target_: super_gradients.training.utils.detection_utils.DetectionTargetsFormat
  20. value: LABEL_NORMALIZED_CXCYWH
  21. filter_box_candidates: True
Discard
@@ -22,6 +22,10 @@ mosaic_scale: [0.1, 2] # random rescale range (keeps size by padding/cropping) a
 mixup_scale: [0.5, 1.5] # random rescale range for the additional sample in mixup
 mixup_scale: [0.5, 1.5] # random rescale range for the additional sample in mixup
 mosaic_prob: 1. # probability to apply mosaic
 mosaic_prob: 1. # probability to apply mosaic
 translate: 0.1 # image translation fraction
 translate: 0.1 # image translation fraction
+filter_box_candidates: False # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
+wh_thr: 2 # edge size threshold when filter_box_candidates = True (pixels)
+ar_thr: 20 # aspect ratio threshold when filter_box_candidates = True
+area_thr: 0.1 # threshold for area ratio between original image and the transformed one, when when filter_box_candidates = True
 
 
 targets_format:
 targets_format:
   _target_: super_gradients.training.utils.detection_utils.DetectionTargetsFormat # targets format
   _target_: super_gradients.training.utils.detection_utils.DetectionTargetsFormat # targets format
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
  1. ema: True
  2. max_epochs: 400
  3. lr_mode: cosine
  4. cosine_final_lr_ratio: 0.01
  5. batch_accumulate: 1
  6. initial_lr: 0.02
  7. loss: ssd_loss
  8. loss_logging_items_names: [ smooth_l1, closs, Loss ]
  9. criterion_params:
  10. alpha: 1.0
  11. dboxes: # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.
  12. optimizer: SGD
  13. optimizer_params:
  14. momentum: 0.9
  15. weight_decay: 0.0005
  16. nesterov: True
  17. lr_warmup_epochs: 3
  18. warmup_momentum: 0.8
  19. warmup_initial_lr: 1e-06
  20. warmup_bias_lr: 0.1
  21. valid_metrics_list:
  22. - DetectionMetrics:
  23. post_prediction_callback:
  24. _target_: super_gradients.training.utils.ssd_utils.SSDPostPredictCallback
  25. conf: 0.001
  26. iou: 0.6
  27. num_cls: 80
  28. metric_to_watch: 'mAP@0.50:0.95'
  29. greater_metric_to_watch_is_better: True
Discard
@@ -916,7 +916,11 @@ class CocoDetectionDatasetInterfaceV2(DatasetInterface):
                                                   translate=self.dataset_params.translate,
                                                   translate=self.dataset_params.translate,
                                                   scales=self.dataset_params.mosaic_scale,
                                                   scales=self.dataset_params.mosaic_scale,
                                                   shear=self.dataset_params.shear,
                                                   shear=self.dataset_params.shear,
-                                                  target_size=train_input_dim
+                                                  target_size=train_input_dim,
+                                                  filter_box_candidates=self.dataset_params.filter_box_candidates,
+                                                  wh_thr=self.dataset_params.wh_thr,
+                                                  area_thr=self.dataset_params.area_thr,
+                                                  ar_thr=self.dataset_params.ar_thr
                                                   ),
                                                   ),
                             DetectionMixup(input_dim=train_input_dim,
                             DetectionMixup(input_dim=train_input_dim,
                                            mixup_scale=self.dataset_params.mixup_scale,
                                            mixup_scale=self.dataset_params.mixup_scale,
Discard
@@ -20,7 +20,9 @@ MODEL_URLS = {"regnetY800_imagenet": "https://deci-pretrained-models.s3.amazonaw
               "stdc2_seg50_cityscapes": "https://deci-pretrained-models.s3.amazonaws.com/cityscapes_stdc2_seg50_dice_edge/ckpt_best.pth",
               "stdc2_seg50_cityscapes": "https://deci-pretrained-models.s3.amazonaws.com/cityscapes_stdc2_seg50_dice_edge/ckpt_best.pth",
               "stdc2_seg75_cityscapes": "https://deci-pretrained-models.s3.amazonaws.com/stdc2_seg75_cityscapes/ckpt_best.pth",
               "stdc2_seg75_cityscapes": "https://deci-pretrained-models.s3.amazonaws.com/stdc2_seg75_cityscapes/ckpt_best.pth",
               "efficientnet_b0_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/efficientnet_b0/average_model-3.pth",
               "efficientnet_b0_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/efficientnet_b0/average_model-3.pth",
-              "ssd_lite_mobilenet_v2_coco": "https://deci-pretrained-models.s3.amazonaws.com/ssd_lite_mobilenet_v2/ckpt_best_anchors.pth",
+              "ssd_lite_mobilenet_v2_coco":
+                  "https://deci-pretrained-models.s3.amazonaws.com/"
+                  "ssd_lite_mobilenet_v2_coco_res320_new_coco_filtered_affine_scale_5_15_no_mosaic/ckpt_best.pth",
               "ssd_mobilenet_v1_coco": "https://deci-pretrained-models.s3.amazonaws.com/ssd_mobilenet_v1_coco_res320/ckpt_best.pth",
               "ssd_mobilenet_v1_coco": "https://deci-pretrained-models.s3.amazonaws.com/ssd_mobilenet_v1_coco_res320/ckpt_best.pth",
 
 
               "mobilenet_v3_large_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/mobilenetv3+large+300epoch/average_model.pth",
               "mobilenet_v3_large_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/mobilenetv3+large+300epoch/average_model.pth",
Discard
@@ -135,7 +135,7 @@ class SSDPostPredictCallback(DetectionPostPredictionCallback):
         nms_input = predictions[0]
         nms_input = predictions[0]
         if self.nms_type == NMS_Type.ITERATIVE:
         if self.nms_type == NMS_Type.ITERATIVE:
             nms_res = non_max_suppression(nms_input, conf_thres=self.conf, iou_thres=self.iou,
             nms_res = non_max_suppression(nms_input, conf_thres=self.conf, iou_thres=self.iou,
-                                          multi_label_per_box=self.multi_label_per_box)
+                                          multi_label_per_box=self.multi_label_per_box, with_confidence=True)
         else:
         else:
             nms_res = matrix_non_max_suppression(nms_input, conf_thres=self.conf,
             nms_res = matrix_non_max_suppression(nms_input, conf_thres=self.conf,
                                                  max_num_of_detections=self.max_predictions)
                                                  max_num_of_detections=self.max_predictions)
Discard
@@ -4,7 +4,7 @@ from super_gradients.training import MultiGPUMode
 from super_gradients.training import SgModel
 from super_gradients.training import SgModel
 from super_gradients.training.datasets.dataset_interfaces.dataset_interface import ImageNetDatasetInterface, \
 from super_gradients.training.datasets.dataset_interfaces.dataset_interface import ImageNetDatasetInterface, \
     ClassificationTestDatasetInterface, CityscapesDatasetInterface, SegmentationTestDatasetInterface, \
     ClassificationTestDatasetInterface, CityscapesDatasetInterface, SegmentationTestDatasetInterface, \
-    CoCoSegmentationDatasetInterface, CoCoDetectionDatasetInterface, DetectionTestDatasetInterface
+    CoCoSegmentationDatasetInterface, DetectionTestDatasetInterface
 from super_gradients.training.utils.segmentation_utils import coco_sub_classes_inclusion_tuples_list
 from super_gradients.training.utils.segmentation_utils import coco_sub_classes_inclusion_tuples_list
 from super_gradients.training.metrics import Accuracy, IoU
 from super_gradients.training.metrics import Accuracy, IoU
 import os
 import os
@@ -13,7 +13,6 @@ from super_gradients.training.utils.ssd_utils import SSDPostPredictCallback
 from super_gradients.training.models.detection_models.ssd import DEFAULT_SSD_LITE_MOBILENET_V2_ARCH_PARAMS
 from super_gradients.training.models.detection_models.ssd import DEFAULT_SSD_LITE_MOBILENET_V2_ARCH_PARAMS
 import torchvision.transforms as transforms
 import torchvision.transforms as transforms
 from super_gradients.training.losses.ddrnet_loss import DDRNetLoss
 from super_gradients.training.losses.ddrnet_loss import DDRNetLoss
-from super_gradients.training.utils.detection_utils import crowd_detection_collate_fn
 from super_gradients.training.metrics import DetectionMetrics
 from super_gradients.training.metrics import DetectionMetrics
 from super_gradients.training.transforms.transforms import Rescale
 from super_gradients.training.transforms.transforms import Rescale
 from super_gradients.training.losses.stdc_loss import STDCLoss
 from super_gradients.training.losses.stdc_loss import STDCLoss
@@ -111,22 +110,49 @@ class PretrainedModelsTest(unittest.TestCase):
                                                                      "cache_train_images": False,
                                                                      "cache_train_images": False,
                                                                      "cache_val_images": False,
                                                                      "cache_val_images": False,
                                                                      "targets_format": DetectionTargetsFormat.LABEL_CXCYWH,
                                                                      "targets_format": DetectionTargetsFormat.LABEL_CXCYWH,
-                                                                     "with_crowd": True
+                                                                     "with_crowd": True,
+                                                                     "filter_box_candidates": False,
+                                                                     "wh_thr": 0,
+                                                                     "ar_thr": 0,
+                                                                     "area_thr": 0
                                                                      }),
                                                                      }),
-            'ssd_mobilenet': CoCoDetectionDatasetInterface(
-                dataset_params={
-                    "batch_size": 32,
-                    "val_batch_size": 32,
-                    "train_image_size": 320,
-                    "val_image_size": 320,
-                    "val_collate_fn": crowd_detection_collate_fn,
-                    "val_sample_loading_method": "default",
-                    "with_crowd": True
-                }
-            ),
+
+            'ssd_mobilenet': CocoDetectionDatasetInterfaceV2(dataset_params={"data_dir": "/data/coco",
+                                                                             "train_subdir": "images/train2017",
+                                                                             "val_subdir": "images/val2017",
+                                                                             "train_json_file": "instances_train2017.json",
+                                                                             "val_json_file": "instances_val2017.json",
+                                                                             "batch_size": 16,
+                                                                             "val_batch_size": 128,
+                                                                             "val_image_size": 320,
+                                                                             "train_image_size": 320,
+                                                                             "hgain": 5,
+                                                                             "sgain": 30,
+                                                                             "vgain": 30,
+                                                                             "mixup_prob": .0,
+                                                                             "degrees": 0.,
+                                                                             "shear": 0.,
+                                                                             "flip_prob": 0.,
+                                                                             "hsv_prob": 0.,
+                                                                             "mosaic_scale": [0.5, 1.5],
+                                                                             "mixup_scale": [0.5, 1.5],
+                                                                             "mosaic_prob": 1.,
+                                                                             "translate": 0.1,
+                                                                             "val_collate_fn": CrowdDetectionCollateFN(),
+                                                                             "train_collate_fn": DetectionCollateFN(),
+                                                                             "cache_dir_path": None,
+                                                                             "cache_train_images": False,
+                                                                             "cache_val_images": False,
+                                                                             "targets_format": DetectionTargetsFormat.LABEL_NORMALIZED_CXCYWH,
+                                                                             "with_crowd": True,
+                                                                             "filter_box_candidates": True,
+                                                                             "wh_thr": 2,
+                                                                             "ar_thr": 20,
+                                                                             "area_thr": 0.1
+                                                                             })
         }
         }
 
 
-        self.coco_pretrained_maps = {'ssd_lite_mobilenet_v2': 0.215,
+        self.coco_pretrained_maps = {'ssd_lite_mobilenet_v2': 0.2052,
                                      'coco_ssd_mobilenet_v1': 0.243,
                                      'coco_ssd_mobilenet_v1': 0.243,
                                      "yolox_s": 0.4047,
                                      "yolox_s": 0.4047,
                                      "yolox_m": 0.4640,
                                      "yolox_m": 0.4640,
@@ -514,8 +540,7 @@ class PretrainedModelsTest(unittest.TestCase):
                             checkpoint_params=self.coco_pretrained_ckpt_params)
                             checkpoint_params=self.coco_pretrained_ckpt_params)
         ssd_post_prediction_callback = SSDPostPredictCallback()
         ssd_post_prediction_callback = SSDPostPredictCallback()
         res = trainer.test(test_loader=self.coco_dataset['ssd_mobilenet'].val_loader,
         res = trainer.test(test_loader=self.coco_dataset['ssd_mobilenet'].val_loader,
-                           test_metrics_list=[DetectionMetrics(post_prediction_callback=ssd_post_prediction_callback,
-                                                                 num_cls=len(self.coco_dataset['ssd_mobilenet'].coco_classes))],
+                           test_metrics_list=[DetectionMetrics(post_prediction_callback=ssd_post_prediction_callback, num_cls=80)],
                            metrics_progress_verbose=True)[2]
                            metrics_progress_verbose=True)[2]
         self.assertAlmostEqual(res, self.coco_pretrained_maps["ssd_lite_mobilenet_v2"], delta=0.001)
         self.assertAlmostEqual(res, self.coco_pretrained_maps["ssd_lite_mobilenet_v2"], delta=0.001)
 
 
Discard