Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

yolo_v5_coco2017.py 4.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  1. # Yolo v5 Detection training on CoCo2017 Dataset:
  2. # Yolo v5s train on 320x320 mAP@0.5-0.95 (confidence 0.001, test on 320x320 images) ~28.4
  3. # Yolo v5s train in 640x640 mAP@0.5-0.95 (confidence 0.001, test on 320x320 images) ~29.1
  4. # Yolo v5 Detection training on CoCo2014 Dataset:
  5. # Yolo v5s train on 320x320 mAP@0.5-0.95 (confidence 0.001, test on 320x320 images) ~28.77
  6. # batch size may need to change depending on model size and GPU (2080Ti, V100)
  7. # The code is optimized for running with a Mini-Batch of 64 examples... So depending on the amount of GPUs,
  8. # you should change the "batch_accumulate" param in the training_params dict to be batch_size * gpu_num * batch_accumulate = 64.
  9. import pkg_resources
  10. import torch
  11. import hydra
  12. from omegaconf import DictConfig
  13. import super_gradients
  14. from super_gradients.training.sg_model import MultiGPUMode
  15. from super_gradients.common.abstractions.abstract_logger import get_logger
  16. def scale_params(cfg):
  17. """
  18. Scale:
  19. * learning rate,
  20. * weight decay,
  21. * box_loss_gain,
  22. * cls_loss_gain,
  23. * obj_loss_gain
  24. according to:
  25. * effective batch size
  26. * DDP world size
  27. * image size
  28. * num YOLO output layers
  29. * num classes
  30. """
  31. logger = get_logger(__name__)
  32. # Scale LR and weight decay
  33. is_ddp = cfg.sg_model.multi_gpu == MultiGPUMode.DISTRIBUTED_DATA_PARALLEL and torch.distributed.is_initialized()
  34. world_size = torch.distributed.get_world_size() if is_ddp else 1
  35. # Scale LR and WD for DDP due to gradients being averaged between devices
  36. # Equivalent to loss * WORLD_SIZE in ultralytics
  37. cfg.training_params.initial_lr *= world_size
  38. cfg.training_params.warmup_bias_lr *= world_size
  39. cfg.training_params.optimizer_params.weight_decay /= world_size
  40. # Scale WD with a factor of [effective batch size]/64.
  41. batch_size, batch_accumulate = cfg.dataset_params.batch_size, cfg.training_params.batch_accumulate
  42. batch_size_factor = cfg.sg_model.num_devices if is_ddp else cfg.sg_model.dataset_interface.batch_size_factor
  43. effective_batch_size = batch_size * batch_size_factor * batch_accumulate
  44. cfg.training_params.optimizer_params.weight_decay *= effective_batch_size / 64.
  45. # Scale EMA beta to match Ultralytics update
  46. cfg.training_params.ema_params.beta = cfg.training_params.max_epochs * len(cfg.sg_model.train_loader) / 2000.
  47. log_msg = \
  48. f"""
  49. IMPORTANT:\n
  50. Training with world size of {world_size}, {'DDP' if is_ddp else 'no DDP'}, effective batch size of {effective_batch_size},
  51. scaled:
  52. * initial_lr to {cfg.training_params.initial_lr};
  53. * warmup_bias_lr to {cfg.training_params.warmup_bias_lr};
  54. * weight_decay to {cfg.training_params.optimizer_params.weight_decay};
  55. * EMA beta to {cfg.training_params.ema_params.beta};
  56. """
  57. if cfg.training_params.loss == 'yolo_v5_loss':
  58. # Scale loss gains
  59. model = cfg.sg_model.net
  60. model = model.module if hasattr(model, 'module') else model
  61. num_levels = model._head._modules_list[-1].detection_layers_num
  62. train_image_size = cfg.dataset_params.train_image_size
  63. num_branches_norm = 3. / num_levels
  64. num_classes_norm = len(cfg.sg_model.classes) / 80.
  65. image_size_norm = train_image_size / 640.
  66. cfg.training_params.criterion_params.box_loss_gain *= num_branches_norm
  67. cfg.training_params.criterion_params.cls_loss_gain *= num_classes_norm * num_branches_norm
  68. cfg.training_params.criterion_params.obj_loss_gain *= image_size_norm ** 2 * num_branches_norm
  69. log_msg += \
  70. f"""
  71. * box_loss_gain to {cfg.training_params.criterion_params.box_loss_gain};
  72. * cls_loss_gain to {cfg.training_params.criterion_params.cls_loss_gain};
  73. * obj_loss_gain to {cfg.training_params.criterion_params.obj_loss_gain};
  74. """
  75. logger.info(log_msg)
  76. return cfg
  77. @hydra.main(config_path=pkg_resources.resource_filename("conf", ""), config_name="coco2017_yolov5_conf")
  78. def train(cfg: DictConfig) -> None:
  79. # INSTANTIATE ALL OBJECTS IN CFG
  80. cfg = hydra.utils.instantiate(cfg)
  81. # CONNECT THE DATASET INTERFACE WITH DECI MODEL
  82. cfg.sg_model.connect_dataset_interface(cfg.dataset_interface, data_loader_num_workers=cfg.data_loader_num_workers)
  83. # BUILD NETWORK
  84. cfg.sg_model.build_model(cfg.architecture, load_checkpoint=cfg.load_checkpoint)
  85. cfg = scale_params(cfg)
  86. # TRAIN
  87. cfg.sg_model.train(training_params=cfg.training_params)
  88. if __name__ == "__main__":
  89. super_gradients.init_trainer()
  90. train()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...