Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#581 Bug/sg 512 shuffle bugfix in recipe datalaoders

Merged
Ghost merged 1 commits into Deci-AI:master from deci-ai:bug/SG-512_shuffle_bugfix_in_recipe_datalaoders
@@ -20,6 +20,7 @@ train_dataset_params:
   download: True
   download: True
 
 
 train_dataloader_params:
 train_dataloader_params:
+  shuffle: True
   batch_size: 256
   batch_size: 256
   num_workers: 8
   num_workers: 8
   drop_last: False
   drop_last: False
Discard
@@ -32,6 +32,7 @@ train_dataset_params:
   download: True
   download: True
 
 
 train_dataloader_params:
 train_dataloader_params:
+  shuffle: True
   batch_size: 256
   batch_size: 256
   num_workers: 8
   num_workers: 8
   drop_last: False
   drop_last: False
Discard
@@ -25,6 +25,7 @@ train_dataset_params:
   download: True
   download: True
 
 
 train_dataloader_params:
 train_dataloader_params:
+  shuffle: True
   batch_size: 256
   batch_size: 256
   num_workers: 8
   num_workers: 8
   drop_last: False
   drop_last: False
Discard
@@ -15,6 +15,7 @@ val_dataset_params:
   transforms:
   transforms:
 
 
 train_dataloader_params:
 train_dataloader_params:
+  shuffle: True
   batch_size: 8
   batch_size: 8
   num_workers: 8
   num_workers: 8
   drop_last: True                 # drop the last incomplete batch, if dataset size is not divisible by the batch size
   drop_last: True                 # drop the last incomplete batch, if dataset size is not divisible by the batch size
@@ -22,4 +23,4 @@ train_dataloader_params:
 val_dataloader_params:
 val_dataloader_params:
   batch_size: 8
   batch_size: 8
   num_workers: 8
   num_workers: 8
-  drop_last: False
+  drop_last: False
Discard
@@ -44,6 +44,7 @@ train_dataset_params:
   with_crowd: False
   with_crowd: False
 
 
 train_dataloader_params:
 train_dataloader_params:
+  shuffle: True
   batch_size: 16
   batch_size: 16
   num_workers: 8
   num_workers: 8
   batch_sampler: True
   batch_sampler: True
@@ -87,4 +88,4 @@ val_dataloader_params:
     _target_: super_gradients.training.utils.detection_utils.DetectionCollateFN
     _target_: super_gradients.training.utils.detection_utils.DetectionCollateFN
 
 
 
 
-_convert_: all
+_convert_: all
Discard
@@ -1,68 +1,6 @@
 defaults:
 defaults:
   - coco_detection_dataset_params
   - coco_detection_dataset_params
 
 
-cache_dir: # path to a directory that will be used for caching (with numpy.memmap).
-cache_train_images: False
-cache_val_images: False
-
-batch_size: 32
-val_batch_size: 16
-train_image_size: 320
-val_image_size: 320
-train_input_dim:
-  - ${dataset_params.train_image_size}
-  - ${dataset_params.train_image_size}
-val_input_dim:
-  - ${dataset_params.val_image_size}
-  - ${dataset_params.val_image_size}
-
-targets_format:
-  _target_: super_gradients.training.utils.detection_utils.DetectionTargetsFormat
-  value: LABEL_NORMALIZED_CXCYWH
-
-train_transforms:
-  - _target_: super_gradients.training.transforms.transforms.DetectionRandomAffine
-    degrees: 0.                   # rotation degrees, randomly sampled from [-degrees, degrees]
-    translate: 0.1                # image translation fraction
-    scales: [0.5, 1.5]            # random rescale range (keeps size by padding/cropping) after mosaic transform.
-    shear: 0.                     # shear degrees, randomly sampled from [-degrees, degrees]
-    target_size: ${dataset_params.train_input_dim}
-    filter_box_candidates: True   # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
-    wh_thr: 2                     # edge size threshold when filter_box_candidates = True (pixels)
-    area_thr: 0.1                 # threshold for area ratio between original image and the transformed one, when when filter_box_candidates = True
-    ar_thr: 20                    # aspect ratio threshold when filter_box_candidates = True
-  - _target_: super_gradients.training.transforms.transforms.DetectionHSV
-    prob: 1.0                       # probability to apply HSV transform
-    hgain: 5                        # HSV transform hue gain (randomly sampled from [-hgain, hgain])
-    sgain: 30                       # HSV transform saturation gain (randomly sampled from [-sgain, sgain])
-    vgain: 30                       # HSV transform value gain (randomly sampled from [-vgain, vgain])
-  - _target_: super_gradients.training.transforms.transforms.DetectionHorizontalFlip
-    prob: 0.5                       # probability to apply horizontal flip
-  - _target_: super_gradients.training.transforms.transforms.DetectionPaddedRescale
-    input_dim: ${dataset_params.train_input_dim}
-    max_targets: 120
-  - _target_: super_gradients.training.transforms.transforms.DetectionTargetsFormatTransform
-    output_format: ${dataset_params.targets_format}
-
-val_transforms:
-  - _target_: super_gradients.training.transforms.transforms.DetectionPaddedRescale
-    input_dim: ${dataset_params.val_input_dim}
-  - _target_: super_gradients.training.transforms.transforms.DetectionTargetsFormatTransform
-    max_targets: 50
-    output_format: ${dataset_params.targets_format}
-
-val_collate_fn: # collate function for valset
-  _target_: super_gradients.training.utils.detection_utils.DetectionCollateFN
-train_collate_fn: # collate function for trainset
-  _target_: super_gradients.training.utils.detection_utils.DetectionCollateFN
-
-class_inclusion_list: # If not None,every class not included will be ignored.
-train_max_num_samples: # If not None, only specified number of samples will be loaded in train dataset
-val_max_num_samples:   # If not None, only specified number of samples will be loaded in test dataset
-with_crowd: False # whether to return "crowd" labels in validation
-
-# TODO: REMOVE ABOVE, HERE FOR COMPATIBILITY UNTIL WE REMOVE DATASET_INTERFACE
-
 train_dataset_params:
 train_dataset_params:
   data_dir: /data/coco # root path to coco data
   data_dir: /data/coco # root path to coco data
   subdir: images/train2017 # sub directory path of data_dir containing the train data.
   subdir: images/train2017 # sub directory path of data_dir containing the train data.
@@ -144,4 +82,4 @@ val_dataloader_params:
   collate_fn: # collate function for trainset
   collate_fn: # collate function for trainset
     _target_: super_gradients.training.utils.detection_utils.DetectionCollateFN
     _target_: super_gradients.training.utils.detection_utils.DetectionCollateFN
 
 
-_convert_: all
+_convert_: all
Discard
@@ -43,6 +43,7 @@ val_dataset_params:
         mode: center
         mode: center
 
 
 train_dataloader_params:
 train_dataloader_params:
+  shuffle: True
   batch_size: 8
   batch_size: 8
   num_workers: 8
   num_workers: 8
   drop_last: True                 # drop the last incomplete batch, if dataset size is not divisible by the batch size
   drop_last: True                 # drop the last incomplete batch, if dataset size is not divisible by the batch size
Discard
@@ -27,6 +27,7 @@ val_dataset_params:
         std: ${dataset_params.img_std}
         std: ${dataset_params.img_std}
 
 
 train_dataloader_params:
 train_dataloader_params:
+  shuffle: True
   batch_size: 64
   batch_size: 64
   num_workers: 8
   num_workers: 8
   drop_last: False
   drop_last: False
@@ -38,4 +39,4 @@ val_dataloader_params:
   drop_last: False
   drop_last: False
   pin_memory: True
   pin_memory: True
 
 
-_convert_: all
+_convert_: all
Discard
@@ -35,6 +35,7 @@ val_dataset_params:
 
 
 
 
 train_dataloader_params:
 train_dataloader_params:
+  shuffle: True
   batch_size: 16
   batch_size: 16
   num_workers: 8
   num_workers: 8
   batch_sampler: True
   batch_sampler: True
Discard
@@ -50,6 +50,7 @@ val_dataset_params:
 
 
 
 
 train_dataloader_params:
 train_dataloader_params:
+  shuffle: True
   batch_size: 16
   batch_size: 16
   num_workers: 8
   num_workers: 8
   drop_last: True
   drop_last: True
Discard
@@ -30,6 +30,7 @@ val_dataset_params:
         w: 320
         w: 320
 
 
 train_dataloader_params:
 train_dataloader_params:
+  shuffle: True
   batch_size: 8
   batch_size: 8
   drop_last: True
   drop_last: True
 
 
@@ -37,4 +38,4 @@ val_dataloader_params:
   batch_size: 8
   batch_size: 8
   drop_last: False
   drop_last: False
 
 
-_convert_: all
+_convert_: all
Discard
@@ -105,27 +105,28 @@ def _process_dataloader_params(cfg, dataloader_params, dataset, train):
 
 
 def _process_sampler_params(dataloader_params, dataset, default_dataloader_params):
 def _process_sampler_params(dataloader_params, dataset, default_dataloader_params):
     is_dist = super_gradients.is_distributed()
     is_dist = super_gradients.is_distributed()
+    dataloader_params = override_default_params_without_nones(dataloader_params, default_dataloader_params)
     if get_param(dataloader_params, "sampler") is not None:
     if get_param(dataloader_params, "sampler") is not None:
         dataloader_params = _instantiate_sampler(dataset, dataloader_params)
         dataloader_params = _instantiate_sampler(dataset, dataloader_params)
-    elif get_param(default_dataloader_params, "sampler") is not None:
-        default_dataloader_params = _instantiate_sampler(dataset, default_dataloader_params)
     elif is_dist:
     elif is_dist:
-        default_dataloader_params["sampler"] = {"DistributedSampler": {}}
-        default_dataloader_params = _instantiate_sampler(dataset, default_dataloader_params)
-    dataloader_params = override_default_params_without_nones(dataloader_params, default_dataloader_params)
+        dataloader_params["sampler"] = {"DistributedSampler": {}}
+        dataloader_params = _instantiate_sampler(dataset, dataloader_params)
     if get_param(dataloader_params, "batch_sampler"):
     if get_param(dataloader_params, "batch_sampler"):
         sampler = dataloader_params.pop("sampler")
         sampler = dataloader_params.pop("sampler")
         batch_size = dataloader_params.pop("batch_size")
         batch_size = dataloader_params.pop("batch_size")
         if "drop_last" in dataloader_params:
         if "drop_last" in dataloader_params:
             drop_last = dataloader_params.pop("drop_last")
             drop_last = dataloader_params.pop("drop_last")
         else:
         else:
-            drop_last = default_dataloader_params["drop_last"]
+            drop_last = dataloader_params["drop_last"]
         dataloader_params["batch_sampler"] = BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=drop_last)
         dataloader_params["batch_sampler"] = BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=drop_last)
     return dataloader_params
     return dataloader_params
 
 
 
 
 def _instantiate_sampler(dataset, dataloader_params):
 def _instantiate_sampler(dataset, dataloader_params):
     sampler_name = list(dataloader_params["sampler"].keys())[0]
     sampler_name = list(dataloader_params["sampler"].keys())[0]
+    if "shuffle" in dataloader_params.keys():
+        # SHUFFLE IS MUTUALLY EXCLUSIVE WITH SAMPLER ARG IN DATALOADER INIT
+        dataloader_params["sampler"][sampler_name]["shuffle"] = dataloader_params.pop("shuffle")
     dataloader_params["sampler"][sampler_name]["dataset"] = dataset
     dataloader_params["sampler"][sampler_name]["dataset"] = dataset
     dataloader_params["sampler"] = SamplersFactory().get(dataloader_params["sampler"])
     dataloader_params["sampler"] = SamplersFactory().get(dataloader_params["sampler"])
     return dataloader_params
     return dataloader_params
Discard