Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#869 Add DagsHub Logger to Super Gradients

Merged
Ghost merged 1 commits into Deci-AI:master from timho102003:dagshub_logger
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
  1. from typing import Dict, Optional, Union
  2. import torch
  3. from torchmetrics import Metric
  4. import super_gradients
  5. from super_gradients.common.object_names import Metrics
  6. from super_gradients.common.registry.registry import register_metric
  7. from super_gradients.training.utils import tensor_container_to_device
  8. from super_gradients.training.utils.detection_utils import compute_detection_matching, compute_detection_metrics
  9. from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback, IouThreshold
  10. from super_gradients.common.abstractions.abstract_logger import get_logger
  11. logger = get_logger(__name__)
  12. @register_metric(Metrics.DETECTION_METRICS)
  13. class DetectionMetrics(Metric):
  14. """
  15. DetectionMetrics
  16. Metric class for computing F1, Precision, Recall and Mean Average Precision.
  17. :param num_cls: Number of classes.
  18. :param post_prediction_callback: DetectionPostPredictionCallback to be applied on net's output prior to the metric computation (NMS).
  19. :param normalize_targets: Whether to normalize bbox coordinates by image size.
  20. :param iou_thres: IoU threshold to compute the mAP.
  21. :param recall_thres: Recall threshold to compute the mAP.
  22. :param score_thres: Score threshold to compute Recall, Precision and F1.
  23. :param top_k_predictions: Number of predictions per class used to compute metrics, ordered by confidence score
  24. :param dist_sync_on_step: Synchronize metric state across processes at each ``forward()`` before returning the value at the step.
  25. :param accumulate_on_cpu: Run on CPU regardless of device used in other parts.
  26. This is to avoid "CUDA out of memory" that might happen on GPU.
  27. """
  28. def __init__(
  29. self,
  30. num_cls: int,
  31. post_prediction_callback: DetectionPostPredictionCallback,
  32. normalize_targets: bool = False,
  33. iou_thres: Union[IouThreshold, float] = IouThreshold.MAP_05_TO_095,
  34. recall_thres: torch.Tensor = None,
  35. score_thres: float = 0.1,
  36. top_k_predictions: int = 100,
  37. dist_sync_on_step: bool = False,
  38. accumulate_on_cpu: bool = True,
  39. ):
  40. super().__init__(dist_sync_on_step=dist_sync_on_step)
  41. self.num_cls = num_cls
  42. self.iou_thres = iou_thres
  43. if isinstance(iou_thres, IouThreshold):
  44. self.iou_thresholds = iou_thres.to_tensor()
  45. else:
  46. self.iou_thresholds = torch.tensor([iou_thres])
  47. self.map_str = "mAP" + self._get_range_str()
  48. self.greater_component_is_better = {
  49. f"Precision{self._get_range_str()}": True,
  50. f"Recall{self._get_range_str()}": True,
  51. f"mAP{self._get_range_str()}": True,
  52. f"F1{self._get_range_str()}": True,
  53. }
  54. self.component_names = list(self.greater_component_is_better.keys())
  55. self.components = len(self.component_names)
  56. self.post_prediction_callback = post_prediction_callback
  57. self.is_distributed = super_gradients.is_distributed()
  58. self.denormalize_targets = not normalize_targets
  59. self.world_size = None
  60. self.rank = None
  61. self.add_state(f"matching_info{self._get_range_str()}", default=[], dist_reduce_fx=None)
  62. self.recall_thresholds = torch.linspace(0, 1, 101) if recall_thres is None else recall_thres
  63. self.score_threshold = score_thres
  64. self.top_k_predictions = top_k_predictions
  65. self.accumulate_on_cpu = accumulate_on_cpu
  66. def update(self, preds, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
  67. """
  68. Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.
  69. :param preds: Raw output of the model, the format might change from one model to another,
  70. but has to fit the input format of the post_prediction_callback (cx,cy,wh)
  71. :param target: Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. format: (index, label, cx, cy, w, h)
  72. :param device: Device to run on
  73. :param inputs: Input image tensor of shape (batch_size, n_img, height, width)
  74. :param crowd_targets: Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH
  75. """
  76. self.iou_thresholds = self.iou_thresholds.to(device)
  77. _, _, height, width = inputs.shape
  78. targets = target.clone()
  79. crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()
  80. preds = self.post_prediction_callback(preds, device=device)
  81. new_matching_info = compute_detection_matching(
  82. preds,
  83. targets,
  84. height,
  85. width,
  86. self.iou_thresholds,
  87. crowd_targets=crowd_targets,
  88. top_k=self.top_k_predictions,
  89. denormalize_targets=self.denormalize_targets,
  90. device=self.device,
  91. return_on_cpu=self.accumulate_on_cpu,
  92. )
  93. accumulated_matching_info = getattr(self, f"matching_info{self._get_range_str()}")
  94. setattr(self, f"matching_info{self._get_range_str()}", accumulated_matching_info + new_matching_info)
  95. def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
  96. """Compute the metrics for all the accumulated results.
  97. :return: Metrics of interest
  98. """
  99. mean_ap, mean_precision, mean_recall, mean_f1 = -1.0, -1.0, -1.0, -1.0
  100. accumulated_matching_info = getattr(self, f"matching_info{self._get_range_str()}")
  101. if len(accumulated_matching_info):
  102. matching_info_tensors = [torch.cat(x, 0) for x in list(zip(*accumulated_matching_info))]
  103. # shape (n_class, nb_iou_thresh)
  104. ap, precision, recall, f1, unique_classes = compute_detection_metrics(
  105. *matching_info_tensors,
  106. recall_thresholds=self.recall_thresholds,
  107. score_threshold=self.score_threshold,
  108. device="cpu" if self.accumulate_on_cpu else self.device,
  109. )
  110. # Precision, recall and f1 are computed for IoU threshold range, averaged over classes
  111. # results before version 3.0.4 (Dec 11 2022) were computed only for smallest value (i.e IoU 0.5 if metric is @0.5:0.95)
  112. mean_precision, mean_recall, mean_f1 = precision.mean(), recall.mean(), f1.mean()
  113. # MaP is averaged over IoU thresholds and over classes
  114. mean_ap = ap.mean()
  115. return {
  116. f"Precision{self._get_range_str()}": mean_precision,
  117. f"Recall{self._get_range_str()}": mean_recall,
  118. f"mAP{self._get_range_str()}": mean_ap,
  119. f"F1{self._get_range_str()}": mean_f1,
  120. }
  121. def _sync_dist(self, dist_sync_fn=None, process_group=None):
  122. """
  123. When in distributed mode, stats are aggregated after each forward pass to the metric state. Since these have all
  124. different sizes we override the synchronization function since it works only for tensors (and use
  125. all_gather_object)
  126. :param dist_sync_fn:
  127. :return:
  128. """
  129. if self.world_size is None:
  130. self.world_size = torch.distributed.get_world_size() if self.is_distributed else -1
  131. if self.rank is None:
  132. self.rank = torch.distributed.get_rank() if self.is_distributed else -1
  133. if self.is_distributed:
  134. local_state_dict = {attr: getattr(self, attr) for attr in self._reductions.keys()}
  135. gathered_state_dicts = [None] * self.world_size
  136. torch.distributed.barrier()
  137. torch.distributed.all_gather_object(gathered_state_dicts, local_state_dict)
  138. matching_info = []
  139. for state_dict in gathered_state_dicts:
  140. matching_info += state_dict[f"matching_info{self._get_range_str()}"]
  141. matching_info = tensor_container_to_device(matching_info, device="cpu" if self.accumulate_on_cpu else self.device)
  142. setattr(self, f"matching_info{self._get_range_str()}", matching_info)
  143. def _get_range_str(self):
  144. return "@%.2f" % self.iou_thresholds[0] if not len(self.iou_thresholds) > 1 else "@%.2f:%.2f" % (self.iou_thresholds[0], self.iou_thresholds[-1])
  145. @register_metric(Metrics.DETECTION_METRICS_050)
  146. class DetectionMetrics_050(DetectionMetrics):
  147. def __init__(
  148. self,
  149. num_cls: int,
  150. post_prediction_callback: DetectionPostPredictionCallback = None,
  151. normalize_targets: bool = False,
  152. recall_thres: torch.Tensor = None,
  153. score_thres: float = 0.1,
  154. top_k_predictions: int = 100,
  155. dist_sync_on_step: bool = False,
  156. accumulate_on_cpu: bool = True,
  157. ):
  158. super().__init__(
  159. num_cls,
  160. post_prediction_callback,
  161. normalize_targets,
  162. IouThreshold.MAP_05,
  163. recall_thres,
  164. score_thres,
  165. top_k_predictions,
  166. dist_sync_on_step,
  167. accumulate_on_cpu,
  168. )
  169. @register_metric(Metrics.DETECTION_METRICS_075)
  170. class DetectionMetrics_075(DetectionMetrics):
  171. def __init__(
  172. self,
  173. num_cls: int,
  174. post_prediction_callback: DetectionPostPredictionCallback = None,
  175. normalize_targets: bool = False,
  176. recall_thres: torch.Tensor = None,
  177. score_thres: float = 0.1,
  178. top_k_predictions: int = 100,
  179. dist_sync_on_step: bool = False,
  180. accumulate_on_cpu: bool = True,
  181. ):
  182. super().__init__(
  183. num_cls, post_prediction_callback, normalize_targets, 0.75, recall_thres, score_thres, top_k_predictions, dist_sync_on_step, accumulate_on_cpu
  184. )
  185. @register_metric(Metrics.DETECTION_METRICS_050_095)
  186. class DetectionMetrics_050_095(DetectionMetrics):
  187. def __init__(
  188. self,
  189. num_cls: int,
  190. post_prediction_callback: DetectionPostPredictionCallback = None,
  191. normalize_targets: bool = False,
  192. recall_thres: torch.Tensor = None,
  193. score_thres: float = 0.1,
  194. top_k_predictions: int = 100,
  195. dist_sync_on_step: bool = False,
  196. accumulate_on_cpu: bool = True,
  197. ):
  198. super().__init__(
  199. num_cls,
  200. post_prediction_callback,
  201. normalize_targets,
  202. IouThreshold.MAP_05_TO_095,
  203. recall_thres,
  204. score_thres,
  205. top_k_predictions,
  206. dist_sync_on_step,
  207. accumulate_on_cpu,
  208. )
Discard
Tip!

Press p or to see the previous file or, n or to see the next file