Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#634 Feature/sg 573 pose estimation

Merged
Ghost merged 1 commits into Deci-AI:master from deci-ai:feature/SG-573-pose-estimation
23 changed files with 1135 additions and 6 deletions
  1. 7
    0
      src/super_gradients/common/factories/target_generator_factory.py
  2. 10
    0
      src/super_gradients/common/object_names.py
  3. 2
    0
      src/super_gradients/common/registry/registry.py
  4. 86
    0
      src/super_gradients/recipes/dataset_params/coco_pose_estimation_dataset_params.yaml
  5. 21
    0
      src/super_gradients/recipes/dataset_params/coco_pose_estimation_dekr_dataset_params.yaml
  6. 23
    0
      src/super_gradients/training/dataloaders/dataloaders.py
  7. 36
    0
      src/super_gradients/training/datasets/Dataset_Setup_Instructions.md
  8. 2
    1
      src/super_gradients/training/datasets/__init__.py
  9. 2
    0
      src/super_gradients/training/datasets/all_datasets.py
  10. 3
    0
      src/super_gradients/training/datasets/all_target_generators.py
  11. 4
    0
      src/super_gradients/training/datasets/pose_estimation_datasets/__init__.py
  12. 115
    0
      src/super_gradients/training/datasets/pose_estimation_datasets/base_keypoints.py
  13. 138
    0
      src/super_gradients/training/datasets/pose_estimation_datasets/coco_keypoints.py
  14. 202
    0
      src/super_gradients/training/datasets/pose_estimation_datasets/target_generators.py
  15. 19
    1
      src/super_gradients/training/transforms/all_transforms.py
  16. 302
    0
      src/super_gradients/training/transforms/keypoint_transforms.py
  17. 2
    2
      tests/deci_core_integration_test_suite_runner.py
  18. 2
    0
      tests/deci_core_unit_test_suite_runner.py
  19. 2
    1
      tests/integration_tests/__init__.py
  20. 38
    0
      tests/integration_tests/pose_estimation_dataset_test.py
  21. 2
    1
      tests/unit_tests/__init__.py
  22. 29
    0
      tests/unit_tests/pose_estimation_dataset_test.py
  23. 88
    0
      tests/unit_tests/transforms_test.py
1
2
3
4
5
6
7
  1. from super_gradients.common.factories.base_factory import BaseFactory
  2. from super_gradients.training.datasets.all_target_generators import ALL_TARGET_GENERATORS
  3. class TargetGeneratorsFactory(BaseFactory):
  4. def __init__(self):
  5. super().__init__(ALL_TARGET_GENERATORS)
Discard
@@ -97,6 +97,16 @@ class Transforms:
     RandomAutocontrast = "RandomAutocontrast"
     RandomAutocontrast = "RandomAutocontrast"
     RandomEqualize = "RandomEqualize"
     RandomEqualize = "RandomEqualize"
 
 
+    # Keypoints
+    KeypointsRandomAffineTransform = "KeypointsRandomAffineTransform"
+    KeypointsImageNormalize = "KeypointsImageNormalize"
+    KeypointsImageToTensor = "KeypointsImageToTensor"
+    KeypointTransform = "KeypointTransform"
+    KeypointsPadIfNeeded = "KeypointsPadIfNeeded"
+    KeypointsLongestMaxSize = "KeypointsLongestMaxSize"
+    KeypointsRandomVerticalFlip = "KeypointsRandomVerticalFlip"
+    KeypointsRandomHorizontalFlip = "KeypointsRandomHorizontalFlip"
+
 
 
 class Optimizers:
 class Optimizers:
     """Static class holding all the supported optimizer names"""
     """Static class holding all the supported optimizer names"""
Discard
@@ -12,6 +12,7 @@ from super_gradients.training.datasets.all_datasets import ALL_DATASETS
 from super_gradients.training.pre_launch_callbacks import ALL_PRE_LAUNCH_CALLBACKS
 from super_gradients.training.pre_launch_callbacks import ALL_PRE_LAUNCH_CALLBACKS
 from super_gradients.training.models.segmentation_models.unet.unet_encoder import BACKBONE_STAGES
 from super_gradients.training.models.segmentation_models.unet.unet_encoder import BACKBONE_STAGES
 from super_gradients.training.models.segmentation_models.unet.unet_decoder import UP_FUSE_BLOCKS
 from super_gradients.training.models.segmentation_models.unet.unet_decoder import UP_FUSE_BLOCKS
+from super_gradients.training.datasets.all_target_generators import ALL_TARGET_GENERATORS
 
 
 
 
 def create_register_decorator(registry: Dict[str, Callable]) -> Callable:
 def create_register_decorator(registry: Dict[str, Callable]) -> Callable:
@@ -57,3 +58,4 @@ register_dataset = create_register_decorator(registry=ALL_DATASETS)
 register_pre_launch_callback = create_register_decorator(registry=ALL_PRE_LAUNCH_CALLBACKS)
 register_pre_launch_callback = create_register_decorator(registry=ALL_PRE_LAUNCH_CALLBACKS)
 register_unet_backbone_stage = create_register_decorator(registry=BACKBONE_STAGES)
 register_unet_backbone_stage = create_register_decorator(registry=BACKBONE_STAGES)
 register_unet_up_block = create_register_decorator(registry=UP_FUSE_BLOCKS)
 register_unet_up_block = create_register_decorator(registry=UP_FUSE_BLOCKS)
+register_target_generator = create_register_decorator(registry=ALL_TARGET_GENERATORS)
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
  1. train_dataset_params:
  2. data_dir: /data/coco # root path to coco data
  3. images_dir: images/train2017
  4. json_file: annotations/person_keypoints_train2017.json
  5. include_empty_samples: False
  6. min_instance_area: 128
  7. transforms:
  8. - KeypointsLongestMaxSize:
  9. max_height: 640
  10. max_width: 640
  11. - KeypointsPadIfNeeded:
  12. min_height: 640
  13. min_width: 640
  14. image_pad_value: [ 127, 127, 127 ]
  15. mask_pad_value: 1
  16. - KeypointsRandomHorizontalFlip:
  17. # Note these indexes are COCO-specific. If you're using a different dataset, you'll need to change these accordingly.
  18. flip_index: [ 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15 ]
  19. prob: 0.5
  20. - KeypointsRandomAffineTransform:
  21. max_rotation: 30
  22. min_scale: 0.75
  23. max_scale: 1.5
  24. max_translate: 0.2
  25. image_pad_value: [ 127, 127, 127 ]
  26. mask_pad_value: 1
  27. prob: 0.5
  28. - KeypointsImageToTensor
  29. - KeypointsImageNormalize:
  30. mean: [ 0.485, 0.456, 0.406 ]
  31. std: [ 0.229, 0.224, 0.225 ]
  32. target_generator: ???
  33. val_dataset_params:
  34. data_dir: /data/coco/
  35. images_dir: images/val2017
  36. json_file: annotations/person_keypoints_val2017.json
  37. include_empty_samples: True
  38. min_instance_area: 128
  39. transforms:
  40. - KeypointsLongestMaxSize:
  41. max_height: 640
  42. max_width: 640
  43. - KeypointsPadIfNeeded:
  44. min_height: 640
  45. min_width: 640
  46. image_pad_value: [ 127, 127, 127 ]
  47. mask_pad_value: 1
  48. - KeypointsImageToTensor
  49. - KeypointsImageNormalize:
  50. mean: [0.485, 0.456, 0.406]
  51. std: [0.229, 0.224, 0.225]
  52. target_generator: ???
  53. train_dataloader_params:
  54. shuffle: True
  55. batch_size: 8
  56. num_workers: 8
  57. drop_last: True
  58. worker_init_fn:
  59. _target_: super_gradients.training.utils.utils.load_func
  60. dotpath: super_gradients.training.datasets.datasets_utils.worker_init_reset_seed
  61. collate_fn:
  62. _target_: super_gradients.training.datasets.pose_estimation_datasets.KeypointsCollate
  63. val_dataloader_params:
  64. batch_size: 24
  65. num_workers: 8
  66. drop_last: False
  67. collate_fn:
  68. _target_: super_gradients.training.datasets.pose_estimation_datasets.KeypointsCollate
  69. _convert_: all
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
  1. defaults:
  2. - coco_pose_estimation_dataset_params
  3. - _self_
  4. train_dataset_params:
  5. target_generator:
  6. DEKRTargetsGenerator:
  7. output_stride: 4
  8. sigma: 2
  9. center_sigma: 4
  10. bg_weight: 0.1
  11. offset_radius: 4
  12. val_dataset_params:
  13. target_generator:
  14. DEKRTargetsGenerator:
  15. output_stride: 4
  16. sigma: 2
  17. center_sigma: 4
  18. bg_weight: 0.1
  19. offset_radius: 4
Discard
@@ -39,6 +39,7 @@ from super_gradients.training.utils.distributed_training_utils import (
 from super_gradients.common.abstractions.abstract_logger import get_logger
 from super_gradients.common.abstractions.abstract_logger import get_logger
 from super_gradients.training.utils.utils import override_default_params_without_nones
 from super_gradients.training.utils.utils import override_default_params_without_nones
 from super_gradients.common.factories.datasets_factory import DatasetsFactory
 from super_gradients.common.factories.datasets_factory import DatasetsFactory
+from super_gradients.training.datasets.pose_estimation_datasets import COCOKeypointsDataset
 
 
 logger = get_logger(__name__)
 logger = get_logger(__name__)
 
 
@@ -590,6 +591,26 @@ def pascal_voc_detection_val(dataset_params: Dict = None, dataloader_params: Dic
     )
     )
 
 
 
 
+def coco2017_pose_train(dataset_params: Dict = None, dataloader_params: Dict = None):
+    return get_data_loader(
+        config_name="coco_pose_estimation_dataset_params",
+        dataset_cls=COCOKeypointsDataset,
+        train=True,
+        dataset_params=dataset_params,
+        dataloader_params=dataloader_params,
+    )
+
+
+def coco2017_pose_val(dataset_params: Dict = None, dataloader_params: Dict = None):
+    return get_data_loader(
+        config_name="coco_pose_estimation_dataset_params",
+        dataset_cls=COCOKeypointsDataset,
+        train=False,
+        dataset_params=dataset_params,
+        dataloader_params=dataloader_params,
+    )
+
+
 ALL_DATALOADERS = {
 ALL_DATALOADERS = {
     "coco2017_train": coco2017_train,
     "coco2017_train": coco2017_train,
     "coco2017_val": coco2017_val,
     "coco2017_val": coco2017_val,
@@ -597,6 +618,8 @@ ALL_DATALOADERS = {
     "coco2017_val_yolox": coco2017_val_yolox,
     "coco2017_val_yolox": coco2017_val_yolox,
     "coco2017_train_ssd_lite_mobilenet_v2": coco2017_train_ssd_lite_mobilenet_v2,
     "coco2017_train_ssd_lite_mobilenet_v2": coco2017_train_ssd_lite_mobilenet_v2,
     "coco2017_val_ssd_lite_mobilenet_v2": coco2017_val_ssd_lite_mobilenet_v2,
     "coco2017_val_ssd_lite_mobilenet_v2": coco2017_val_ssd_lite_mobilenet_v2,
+    "coco2017_pose_train": coco2017_pose_train,
+    "coco2017_pose_val": coco2017_pose_val,
     "imagenet_train": imagenet_train,
     "imagenet_train": imagenet_train,
     "imagenet_val": imagenet_val,
     "imagenet_val": imagenet_val,
     "imagenet_efficientnet_train": imagenet_efficientnet_train,
     "imagenet_efficientnet_train": imagenet_efficientnet_train,
Discard
@@ -384,3 +384,39 @@ valid_set = SuperviselyPersonsDataset(root_dir='.../supervisely-persons', list_f
 
 
 NOTE: this dataset is only available for training. To test, please use PascalVOC2012SegmentationDataSet.
 NOTE: this dataset is only available for training. To test, please use PascalVOC2012SegmentationDataSet.
  </details>
  </details>
+
+
+
+### Pose Estimation Datasets
+
+
+<details>
+<summary>COCO 2017</summary>
+
+1. Download coco dataset:
+    - annotations: http://images.cocodataset.org/annotations/annotations_trainval2017.zip
+    - train2017: http://images.cocodataset.org/zips/train2017.zip
+    - val2017: http://images.cocodataset.org/zips/val2017.zip
+
+2. Unzip and organize it as below:
+```
+    coco
+    ├── annotations
+    │      ├─ person_keypoints_train2017.json
+    │      ├─ person_keypoints_val2017.json
+    │      └─ ...
+    └── images
+        ├── train2017
+        │   ├─ 000000000001.jpg
+        │   └─ ...
+        └── val2017
+            └─ ...
+```
+
+3. Instantiate the dataset:
+```python
+from super_gradients.training.datasets import COCOKeypointsDataset
+train_set = COCOKeypointsDataset(data_dir='.../coco', images_dir='images/train2017', json_file='annotations/instances_train2017.json', ...)
+valid_set = COCOKeypointsDataset(data_dir='.../coco', images_dir='images/val2017', json_file='annotations/instances_val2017.json', ...)
+```
+</details>
Discard
@@ -14,7 +14,7 @@ from super_gradients.training.datasets.segmentation_datasets.pascal_voc_segmenta
 from super_gradients.training.datasets.segmentation_datasets.cityscape_segmentation import CityscapesDataset
 from super_gradients.training.datasets.segmentation_datasets.cityscape_segmentation import CityscapesDataset
 from super_gradients.training.datasets.segmentation_datasets.coco_segmentation import CoCoSegmentationDataSet
 from super_gradients.training.datasets.segmentation_datasets.coco_segmentation import CoCoSegmentationDataSet
 from super_gradients.training.datasets.segmentation_datasets.supervisely_persons_segmentation import SuperviselyPersonsDataset
 from super_gradients.training.datasets.segmentation_datasets.supervisely_persons_segmentation import SuperviselyPersonsDataset
-
+from super_gradients.training.datasets.pose_estimation_datasets import COCOKeypointsDataset
 
 
 cv2.setNumThreads(0)
 cv2.setNumThreads(0)
 
 
@@ -36,4 +36,5 @@ __all__ = [
     "Cifar10",
     "Cifar10",
     "Cifar100",
     "Cifar100",
     "SuperviselyPersonsDataset",
     "SuperviselyPersonsDataset",
+    "COCOKeypointsDataset",
 ]
 ]
Discard
@@ -9,6 +9,7 @@ from super_gradients.training.datasets.segmentation_datasets import (
     SuperviselyPersonsDataset,
     SuperviselyPersonsDataset,
     PascalVOCAndAUGUnifiedDataset,
     PascalVOCAndAUGUnifiedDataset,
 )
 )
+from super_gradients.training.datasets.pose_estimation_datasets import COCOKeypointsDataset
 
 
 ALL_DATASETS = {
 ALL_DATASETS = {
     "Cifar10": Cifar10,
     "Cifar10": Cifar10,
@@ -24,4 +25,5 @@ ALL_DATASETS = {
     "CityscapesDataset": CityscapesDataset,
     "CityscapesDataset": CityscapesDataset,
     "SuperviselyPersonsDataset": SuperviselyPersonsDataset,
     "SuperviselyPersonsDataset": SuperviselyPersonsDataset,
     "PascalVOCAndAUGUnifiedDataset": PascalVOCAndAUGUnifiedDataset,
     "PascalVOCAndAUGUnifiedDataset": PascalVOCAndAUGUnifiedDataset,
+    "COCOKeypointsDataset": COCOKeypointsDataset,
 }
 }
Discard
1
2
3
  1. from super_gradients.training.datasets.pose_estimation_datasets.target_generators import DEKRTargetsGenerator
  2. ALL_TARGET_GENERATORS = {"DEKRTargetsGenerator": DEKRTargetsGenerator}
Discard
1
2
3
4
  1. from super_gradients.training.datasets.pose_estimation_datasets.coco_keypoints import COCOKeypointsDataset
  2. from super_gradients.training.datasets.pose_estimation_datasets.base_keypoints import BaseKeypointsDataset, KeypointsCollate
  3. __all__ = ["COCOKeypointsDataset", "BaseKeypointsDataset", "KeypointsCollate"]
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
  1. import abc
  2. from typing import Tuple, List, Mapping, Any, Dict, Callable
  3. import numpy as np
  4. import torch
  5. from torch.utils.data import default_collate, Dataset
  6. from super_gradients.common.abstractions.abstract_logger import get_logger
  7. from super_gradients.training.transforms.keypoint_transforms import KeypointsCompose, KeypointTransform
  8. logger = get_logger(__name__)
  9. class BaseKeypointsDataset(Dataset):
  10. """
  11. Base class for pose estimation datasets.
  12. Descendants should implement the load_sample method to read a sample from the disk and return (image, mask, joints, extras) tuple.
  13. """
  14. def __init__(
  15. self,
  16. target_generator: Callable,
  17. transforms: List[KeypointTransform],
  18. min_instance_area: float,
  19. ):
  20. """
  21. :param target_generator: Target generator that will be used to generate the targets for the model.
  22. See DEKRTargetsGenerator for an example.
  23. :param transforms: Transforms to be applied to the image & keypoints
  24. :param min_instance_area: Minimum area of an instance to be included in the dataset
  25. """
  26. super().__init__()
  27. self.target_generator = target_generator
  28. self.transforms = KeypointsCompose(transforms)
  29. self.min_instance_area = min_instance_area
  30. @abc.abstractmethod
  31. def __len__(self) -> int:
  32. raise NotImplementedError()
  33. @abc.abstractmethod
  34. def load_sample(self, index) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
  35. """
  36. Read a sample from the disk and return (image, mask, joints, extras) tuple
  37. :param index: Sample index
  38. :return: Tuple of (image, mask, joints)
  39. image - Numpy array of [H,W,3] shape, which represents input RGB image
  40. mask - Numpy array of [H,W] shape, which represents a binary mask with zero values corresponding to an
  41. ignored region which should not be used for training (contribute to loss)
  42. joints - Numpy array of [Num Instances, Num Joints, 3] shape, which represents the skeletons of the instances
  43. extras - Dictionary of extra information about the sample that should be included in extras output
  44. """
  45. raise NotImplementedError()
  46. def __getitem__(self, index: int) -> Tuple[torch.Tensor, Any, Mapping[str, Any]]:
  47. img, mask, joints, extras = self.load_sample(index)
  48. img, mask, joints = self.transforms(img, mask, joints)
  49. joints = self.filter_joints(joints, img)
  50. targets = self.target_generator(img, joints, mask)
  51. return img, targets, {"joints": joints, **extras}
  52. def compute_area(self, joints: np.ndarray) -> np.ndarray:
  53. """
  54. Compute area of a bounding box for each instance.
  55. :param joints: [Num Instances, Num Joints, 3]
  56. :return: [Num Instances]
  57. """
  58. w = np.max(joints[:, :, 0], axis=-1) - np.min(joints[:, :, 0], axis=-1)
  59. h = np.max(joints[:, :, 1], axis=-1) - np.min(joints[:, :, 1], axis=-1)
  60. return w * h
  61. def filter_joints(self, joints: np.ndarray, image: np.ndarray) -> np.ndarray:
  62. """
  63. Filter instances that are either too small or do not have visible keypoints
  64. :param joints: Array of shape [Num Instances, Num Joints, 3]
  65. :param image:
  66. :return: [New Num Instances, Num Joints, 3], New Num Instances <= Num Instances
  67. """
  68. # Update visibility of joints for those that are outside the image
  69. outside_image_mask = (joints[:, :, 0] < 0) | (joints[:, :, 1] < 0) | (joints[:, :, 0] >= image.shape[1]) | (joints[:, :, 1] >= image.shape[0])
  70. joints[outside_image_mask, 2] = 0
  71. # Filter instances with all invisible keypoints
  72. instances_with_visible_joints = np.count_nonzero(joints[:, :, 2], axis=-1) > 0
  73. joints = joints[instances_with_visible_joints]
  74. # Remove instances with too small area
  75. areas = self.compute_area(joints)
  76. joints = joints[areas > self.min_instance_area]
  77. return joints
  78. class KeypointsCollate:
  79. """
  80. Collate image & targets, return extras as is.
  81. """
  82. def __call__(self, batch):
  83. images = []
  84. targets = []
  85. extras = []
  86. for image, target, extra in batch:
  87. images.append(image)
  88. targets.append(target)
  89. extras.append(extra)
  90. extras = {k: [dic[k] for dic in extras] for k in extras[0]} # Convert list of dicts to dict of lists
  91. images = default_collate(images)
  92. targets = default_collate(targets)
  93. return images, targets, extras
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  1. import os
  2. from typing import Tuple, List, Mapping, Any, Dict
  3. import cv2
  4. import numpy as np
  5. import pycocotools
  6. from pycocotools.coco import COCO
  7. from super_gradients.common.abstractions.abstract_logger import get_logger
  8. from super_gradients.common.decorators.factory_decorator import resolve_param
  9. from super_gradients.common.factories.target_generator_factory import TargetGeneratorsFactory
  10. from super_gradients.common.factories.transforms_factory import TransformsFactory
  11. from super_gradients.training.datasets.pose_estimation_datasets.base_keypoints import BaseKeypointsDataset
  12. from super_gradients.training.transforms.keypoint_transforms import KeypointTransform
  13. logger = get_logger(__name__)
  14. class COCOKeypointsDataset(BaseKeypointsDataset):
  15. """
  16. Dataset class for training pose estimation models on COCO Keypoints dataset.
  17. Use should pass a target generator class that is model-specific and generates the targets for the model.
  18. """
  19. @resolve_param("transforms", TransformsFactory())
  20. @resolve_param("target_generator", TargetGeneratorsFactory())
  21. def __init__(
  22. self,
  23. data_dir: str,
  24. images_dir: str,
  25. json_file: str,
  26. include_empty_samples: bool,
  27. target_generator,
  28. transforms: List[KeypointTransform],
  29. min_instance_area: float,
  30. ):
  31. """
  32. :param data_dir: Root directory of the COCO dataset
  33. :param images_dir: path suffix to the images directory inside the dataset_root
  34. :param json_file: path suffix to the json file inside the dataset_root
  35. :param include_empty_samples: if True, images without any annotations will be included in the dataset.
  36. Otherwise, they will be filtered out.
  37. :param target_generator: Target generator that will be used to generate the targets for the model.
  38. See DEKRTargetsGenerator for an example.
  39. :param transforms: Transforms to be applied to the image & keypoints
  40. :param min_instance_area: Minimum area of an instance to be included in the dataset
  41. """
  42. super().__init__(transforms=transforms, target_generator=target_generator, min_instance_area=min_instance_area)
  43. self.root = data_dir
  44. self.images_dir = os.path.join(data_dir, images_dir)
  45. self.json_file = os.path.join(data_dir, json_file)
  46. coco = COCO(self.json_file)
  47. if len(coco.dataset["categories"]) != 1:
  48. raise ValueError("Dataset must contain exactly one category")
  49. self.coco = coco
  50. self.ids = list(self.coco.imgs.keys())
  51. self.joints = coco.dataset["categories"][0]["keypoints"]
  52. self.num_joints = len(self.joints)
  53. if not include_empty_samples:
  54. subset = [img_id for img_id in self.ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0]
  55. self.ids = subset
  56. def __len__(self):
  57. return len(self.ids)
  58. def load_sample(self, index) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
  59. img_id = self.ids[index]
  60. image_info = self.coco.loadImgs(img_id)[0]
  61. file_name = image_info["file_name"]
  62. file_path = os.path.join(self.images_dir, file_name)
  63. ann_ids = self.coco.getAnnIds(imgIds=img_id)
  64. anno = self.coco.loadAnns(ann_ids)
  65. anno = [obj for obj in anno if bool(obj["iscrowd"]) is False and obj["num_keypoints"] > 0]
  66. orig_image = cv2.imread(file_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
  67. if orig_image.shape[0] != image_info["height"] or orig_image.shape[1] != image_info["width"]:
  68. raise RuntimeError(f"Annotated image size ({image_info['height'],image_info['width']}) does not match image size in file {orig_image.shape[:2]}")
  69. joints: np.ndarray = self.get_joints(anno)
  70. mask: np.ndarray = self.get_mask(anno, image_info)
  71. extras = dict(file_name=image_info["file_name"])
  72. return orig_image, mask, joints, extras
  73. def get_joints(self, anno: List[Mapping[str, Any]]) -> np.ndarray:
  74. """
  75. Decode the keypoints from the COCO annotation and return them as an array of shape [Num Instances, Num Joints, 3].
  76. The visibility of keypoints is encoded in the third dimension of the array with following values:
  77. - 0 being invisible (outside image)
  78. - 1 present in image but occluded
  79. - 2 - fully visible
  80. :param anno:
  81. :return: [Num Instances, Num Joints, 3], where last channel represents (x, y, visibility)
  82. """
  83. joints = []
  84. for i, obj in enumerate(anno):
  85. keypoints = np.array(obj["keypoints"]).reshape([-1, 3])
  86. joints.append(keypoints)
  87. num_instances = len(joints)
  88. joints = np.array(joints, dtype=np.float32).reshape((num_instances, self.num_joints, 3))
  89. return joints
  90. def get_mask(self, anno, img_info) -> np.ndarray:
  91. """
  92. This method computes ignore mask, which describes crowd objects / objects w/o keypoints to exclude these predictions from contributing to the loss
  93. :param anno:
  94. :param img_info:
  95. :return: Float mask of [H,W] shape (same as image dimensions),
  96. where 1.0 values corresponds to pixels that should contribute to the loss, and 0.0 pixels indicates areas that should be excluded.
  97. """
  98. m = np.zeros((img_info["height"], img_info["width"]), dtype=np.float32)
  99. for obj in anno:
  100. if obj["iscrowd"]:
  101. rle = pycocotools.mask.frPyObjects(obj["segmentation"], img_info["height"], img_info["width"])
  102. mask = pycocotools.mask.decode(rle)
  103. if mask.shape != m.shape:
  104. logger.warning(f"Mask shape {mask.shape} does not match image shape {m.shape} for image {img_info['file_name']}")
  105. continue
  106. m += mask
  107. elif obj["num_keypoints"] == 0:
  108. rles = pycocotools.mask.frPyObjects(obj["segmentation"], img_info["height"], img_info["width"])
  109. for rle in rles:
  110. mask = pycocotools.mask.decode(rle)
  111. if mask.shape != m.shape:
  112. logger.warning(f"Mask shape {mask.shape} does not match image shape {m.shape} for image {img_info['file_name']}")
  113. continue
  114. m += mask
  115. return (m < 0.5).astype(np.float32)
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
  1. from typing import Tuple
  2. import cv2
  3. import numpy as np
  4. from torch import Tensor
  5. class DEKRTargetsGenerator:
  6. """
  7. Target generator for pose estimation task tailored for the DEKR paper (https://arxiv.org/abs/2104.02300)
  8. """
  9. def __init__(self, output_stride: int, sigma: float, center_sigma: float, bg_weight: float, offset_radius: float):
  10. """
  11. :param output_stride: Downsampling factor for target maps (w.r.t to input image resolution)
  12. :param sigma: Sigma of the gaussian kernel used to generate the heatmap (Effective radius of the heatmap would be 3*sigma)
  13. :param center_sigma: Sigma of the gaussian kernel used to generate the instance "center" heatmap (Effective radius of the heatmap would be 3*sigma)
  14. :param bg_weight: Weight assigned to all background pixels (used to re-weight the heatmap loss)
  15. :param offset_radius: Radius for the offset encoding (in pixels)
  16. """
  17. self.output_stride = output_stride
  18. self.sigma = sigma
  19. self.center_sigma = center_sigma
  20. self.bg_weight = bg_weight
  21. self.offset_radius = offset_radius
  22. def get_heat_val(self, sigma: float, x, y, x0, y0) -> float:
  23. g = np.exp(-((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma**2))
  24. return g
  25. def compute_area(self, joints: np.ndarray) -> np.ndarray:
  26. """
  27. Compute area of a bounding box for each instance
  28. :param joints: [Num Instances, Num Joints, 3]
  29. :return: [Num Instances]
  30. """
  31. w = np.max(joints[:, :, 0], axis=-1) - np.min(joints[:, :, 0], axis=-1)
  32. h = np.max(joints[:, :, 1], axis=-1) - np.min(joints[:, :, 1], axis=-1)
  33. return w * h
  34. def sort_joints_by_area(self, joints: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
  35. """
  36. Rearrange joints in descending order of area of bounding box around them
  37. """
  38. area = self.compute_area(joints)
  39. order = np.argsort(-area)
  40. joints = joints[order]
  41. area = area[order]
  42. return joints, area
  43. def augment_with_center_joint(self, joints: np.ndarray) -> np.ndarray:
  44. """
  45. Augment set of joints with additional center joint.
  46. Returns a new array with shape [Instances, Joints+1, 3] where the last joint is the center joint.
  47. Only instances with at least one visible joint are returned.
  48. :param joints: [Num Instances, Num Joints, 3] Last channel represents (x, y, visibility)
  49. :return: [Num Instances, Num Joints + 1, 3]
  50. """
  51. augmented_joints = []
  52. num_joints = joints.shape[1]
  53. num_joints_with_center = num_joints + 1
  54. for keypoints in joints:
  55. # Computing a center point for each person
  56. visible_keypoints = keypoints[:, 2] > 0
  57. joints_sum = np.sum(keypoints[:, :2] * np.expand_dims(visible_keypoints, -1), axis=0)
  58. num_vis_joints = np.count_nonzero(visible_keypoints)
  59. if num_vis_joints == 0:
  60. raise ValueError("No visible joints found in instance. ")
  61. keypoints_with_center = np.zeros((num_joints_with_center, 3))
  62. keypoints_with_center[0:num_joints] = keypoints
  63. keypoints_with_center[-1, :2] = joints_sum / num_vis_joints
  64. keypoints_with_center[-1, 2] = 1
  65. augmented_joints.append(keypoints_with_center)
  66. joints = np.array(augmented_joints, dtype=np.float32).reshape((-1, num_joints_with_center, 3))
  67. return joints
  68. def __call__(self, image: Tensor, joints: np.ndarray, mask: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
  69. """
  70. Encode the keypoints into dense targets that participate in loss computation.
  71. :param image: Image tensor [3, H, W]
  72. :param joints: [Instances, NumJoints, 3]
  73. :param mask: [H,W] A mask that indicates which pixels should be included (1) or which one should be excluded (0) from loss computation.
  74. :return: Tuple of (heatmap, mask, offset, offset_weight)
  75. heatmap - [NumJoints+1, H // Output Stride, W // Output Stride]
  76. mask - [NumJoints+1, H // Output Stride, H // Output Stride]
  77. offset - [NumJoints*2, H // Output Stride, W // Output Stride]
  78. offset_weight - [NumJoints*2, H // Output Stride, W // Output Stride]
  79. """
  80. if image.shape[1:3] != mask.shape[:2]:
  81. raise ValueError(f"Image and mask should have the same shape {image.shape[1:3]} != {mask.shape[:2]}")
  82. if image.shape[1] % self.output_stride != 0 or image.shape[2] % self.output_stride != 0:
  83. raise ValueError("Image shape should be divisible by output stride")
  84. num_instances, num_joints, _ = joints.shape
  85. num_joints_with_center = num_joints + 1
  86. joints, area = self.sort_joints_by_area(joints)
  87. joints = self.augment_with_center_joint(joints)
  88. # Compute the size of the target maps
  89. rows, cols = mask.shape
  90. output_rows, output_cols = rows // self.output_stride, cols // self.output_stride
  91. heatmaps = np.zeros(
  92. shape=(num_joints_with_center, output_rows, output_cols),
  93. dtype=np.float32,
  94. )
  95. ignored_hms = 2 * np.ones(
  96. shape=(num_joints_with_center, output_rows, output_cols),
  97. dtype=np.float32,
  98. ) # Start with 2 in all places
  99. offset_map = np.zeros(
  100. (num_joints * 2, output_rows, output_cols),
  101. dtype=np.float32,
  102. )
  103. offset_weight = np.zeros(
  104. (num_joints * 2, output_rows, output_cols),
  105. dtype=np.float32,
  106. )
  107. sx = output_cols / cols
  108. sy = output_rows / rows
  109. joints = joints.copy()
  110. joints[:, :, 0] *= sx
  111. joints[:, :, 1] *= sy
  112. for person_id, p in enumerate(joints):
  113. for idx, pt in enumerate(p):
  114. if idx < num_joints: # Last joint index is object center
  115. sigma = self.sigma
  116. else:
  117. sigma = self.center_sigma
  118. if pt[2] > 0:
  119. x, y = pt[0], pt[1]
  120. if x < 0 or y < 0 or x >= output_cols or y >= output_rows:
  121. continue
  122. ul = int(np.floor(x - 3 * sigma - 1)), int(np.floor(y - 3 * sigma - 1))
  123. br = int(np.ceil(x + 3 * sigma + 1)), int(np.ceil(y + 3 * sigma + 1))
  124. aa, bb = max(0, ul[1]), min(br[1], output_rows)
  125. cc, dd = max(0, ul[0]), min(br[0], output_cols)
  126. joint_rg = np.zeros((bb - aa, dd - cc), dtype=np.float32)
  127. for sy in range(aa, bb):
  128. for sx in range(cc, dd):
  129. # EK: Note we round x/y values here to obtain clear peak in the center of odd-sized heatmap
  130. # joint_rg[sy - aa, sx - cc] = self.get_heat_val(sigma, sx, sy, x, y)
  131. joint_rg[sy - aa, sx - cc] = self.get_heat_val(sigma, sx, sy, int(x), int(y))
  132. # It is important for RFL loss to have 1.0 in heatmap. since 0.9999 would be interpreted as negative pixel
  133. joint_rg[joint_rg.shape[0] // 2, joint_rg.shape[1] // 2] = 1
  134. heatmaps[idx, aa:bb, cc:dd] = np.maximum(heatmaps[idx, aa:bb, cc:dd], joint_rg)
  135. # print(heatmaps[-1, 0, 0])
  136. ignored_hms[idx, aa:bb, cc:dd] = 1.0
  137. for person_id, p in enumerate(joints):
  138. ct_x = int(p[-1, 0])
  139. ct_y = int(p[-1, 1])
  140. ct_v = int(p[-1, 2])
  141. if ct_v < 1 or ct_x < 0 or ct_y < 0 or ct_x >= output_cols or ct_y >= output_rows:
  142. continue
  143. for idx, pt in enumerate(p[:-1]):
  144. if pt[2] > 0:
  145. x, y = pt[0], pt[1]
  146. if x < 0 or y < 0 or x >= output_cols or y >= output_rows:
  147. continue
  148. start_x = max(int(ct_x - self.offset_radius), 0)
  149. start_y = max(int(ct_y - self.offset_radius), 0)
  150. end_x = min(int(ct_x + self.offset_radius), output_cols)
  151. end_y = min(int(ct_y + self.offset_radius), output_rows)
  152. for pos_x in range(start_x, end_x):
  153. for pos_y in range(start_y, end_y):
  154. offset_x = pos_x - x
  155. offset_y = pos_y - y
  156. offset_map[idx * 2, pos_y, pos_x] = offset_x
  157. offset_map[idx * 2 + 1, pos_y, pos_x] = offset_y
  158. offset_weight[idx * 2, pos_y, pos_x] = 1.0 / np.sqrt(area[person_id])
  159. offset_weight[idx * 2 + 1, pos_y, pos_x] = 1.0 / np.sqrt(area[person_id])
  160. ignored_hms[ignored_hms == 2] = self.bg_weight
  161. mask = cv2.resize(mask, dsize=(output_cols, output_rows), interpolation=cv2.INTER_LINEAR)
  162. mask = (mask > 0).astype(np.float32)
  163. mask = mask * ignored_hms
  164. return heatmaps, mask, offset_map, offset_weight
Discard
@@ -66,7 +66,16 @@ from torchvision.transforms import (
     RandomAutocontrast,
     RandomAutocontrast,
     RandomEqualize,
     RandomEqualize,
 )
 )
-
+from super_gradients.training.transforms.keypoint_transforms import (
+    KeypointsRandomAffineTransform,
+    KeypointsImageNormalize,
+    KeypointsImageToTensor,
+    KeypointTransform,
+    KeypointsPadIfNeeded,
+    KeypointsLongestMaxSize,
+    KeypointsRandomVerticalFlip,
+    KeypointsRandomHorizontalFlip,
+)
 
 
 TRANSFORMS = {
 TRANSFORMS = {
     Transforms.SegRandomFlip: SegRandomFlip,
     Transforms.SegRandomFlip: SegRandomFlip,
@@ -129,6 +138,15 @@ TRANSFORMS = {
     Transforms.RandomAutocontrast: RandomAutocontrast,
     Transforms.RandomAutocontrast: RandomAutocontrast,
     Transforms.RandomEqualize: RandomEqualize,
     Transforms.RandomEqualize: RandomEqualize,
     Transforms.Standardize: Standardize,
     Transforms.Standardize: Standardize,
+    # Keypoints
+    Transforms.KeypointsRandomAffineTransform: KeypointsRandomAffineTransform,
+    Transforms.KeypointsImageNormalize: KeypointsImageNormalize,
+    Transforms.KeypointsImageToTensor: KeypointsImageToTensor,
+    Transforms.KeypointTransform: KeypointTransform,
+    Transforms.KeypointsPadIfNeeded: KeypointsPadIfNeeded,
+    Transforms.KeypointsLongestMaxSize: KeypointsLongestMaxSize,
+    Transforms.KeypointsRandomVerticalFlip: KeypointsRandomVerticalFlip,
+    Transforms.KeypointsRandomHorizontalFlip: KeypointsRandomHorizontalFlip,
 }
 }
 logger = get_logger(__name__)
 logger = get_logger(__name__)
 
 
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
  1. import random
  2. from abc import abstractmethod
  3. from typing import Tuple, List, Iterable, Union
  4. import cv2
  5. import numpy as np
  6. from torch import Tensor
  7. from torchvision.transforms import functional as F
  8. __all__ = [
  9. "KeypointsImageNormalize",
  10. "KeypointsImageToTensor",
  11. "KeypointsPadIfNeeded",
  12. "KeypointsLongestMaxSize",
  13. "KeypointTransform",
  14. "KeypointsCompose",
  15. "KeypointsRandomHorizontalFlip",
  16. "KeypointsRandomAffineTransform",
  17. "KeypointsRandomVerticalFlip",
  18. ]
  19. class KeypointTransform(object):
  20. """
  21. Base class for all transforms for keypoints augmnetation.
  22. All transforms subclassing it should implement __call__ method which takes image, mask and keypoints as input and
  23. returns transformed image, mask and keypoints.
  24. """
  25. @abstractmethod
  26. def __call__(self, image: np.ndarray, mask: np.ndarray, joints: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
  27. """
  28. Apply transformation to image, mask and keypoints.
  29. :param image: Input image of [H,W,3] shape
  30. :param mask: Numpy array of [H,W] shape, where zero values are considered as ignored mask (not contributing to the loss)
  31. :param joints: Numpy array of [NumInstances, NumJoints, 3] shape. Last dimension contains (x,y,visibility) for each joint.
  32. :return: (image, mask, joints)
  33. """
  34. raise NotImplementedError
  35. class KeypointsCompose(KeypointTransform):
  36. def __init__(self, transforms: List[KeypointTransform]):
  37. self.transforms = transforms
  38. def __call__(self, image: np.ndarray, mask: np.ndarray, joints: np.ndarray) -> Tuple[Union[np.ndarray, Tensor], np.ndarray, np.ndarray]:
  39. for t in self.transforms:
  40. image, mask, joints = t(image, mask, joints)
  41. return image, mask, joints
  42. class KeypointsImageToTensor(KeypointTransform):
  43. """
  44. Convert image from numpy array to tensor and permute axes to [C,H,W].
  45. This function also divides image by 255.0 to convert it to [0,1] range.
  46. """
  47. def __call__(self, image: np.ndarray, mask: np.ndarray, joints: np.ndarray):
  48. return F.to_tensor(image), mask, joints
  49. class KeypointsImageNormalize(KeypointTransform):
  50. """
  51. Normalize image with mean and std. Note this transform should come after KeypointsImageToTensor
  52. since it operates on torch Tensor and not numpy array.
  53. """
  54. def __init__(self, mean, std):
  55. self.mean = mean
  56. self.std = std
  57. def __call__(self, image: Tensor, mask: np.ndarray, joints: np.ndarray):
  58. image = F.normalize(image, mean=self.mean, std=self.std)
  59. return image, mask, joints
  60. class KeypointsRandomHorizontalFlip(KeypointTransform):
  61. """
  62. Flip image, mask and joints horizontally with a given probability.
  63. """
  64. def __init__(self, flip_index: List[int], prob: float = 0.5):
  65. """
  66. :param flip_index: Indexes of keypoints on the flipped image. When doing left-right flip, left hand becomes right hand.
  67. So this array contains order of keypoints on the flipped image. This is dataset specific and depends on
  68. how keypoints are defined in dataset.
  69. :param prob: Probability of flipping
  70. """
  71. self.flip_index = flip_index
  72. self.prob = prob
  73. def __call__(self, image, mask, joints):
  74. if image.shape[:2] != mask.shape[:2]:
  75. raise RuntimeError(f"Image shape ({image.shape[:2]}) does not match mask shape ({mask.shape[:2]}).")
  76. if random.random() < self.prob:
  77. image = np.ascontiguousarray(np.fliplr(image))
  78. mask = np.ascontiguousarray(np.fliplr(mask))
  79. rows, cols = image.shape[:2]
  80. joints = joints.copy()
  81. joints = joints[:, self.flip_index]
  82. joints[:, :, 0] = cols - joints[:, :, 0] - 1
  83. return image, mask, joints
  84. class KeypointsRandomVerticalFlip(KeypointTransform):
  85. """
  86. Flip image, mask and joints vertically with a given probability.
  87. """
  88. def __init__(self, prob: float = 0.5):
  89. self.prob = prob
  90. def __call__(self, image, mask, joints):
  91. if image.shape[:2] != mask.shape[:2]:
  92. raise RuntimeError(f"Image shape ({image.shape[:2]}) does not match mask shape ({mask.shape[:2]}).")
  93. if random.random() < self.prob:
  94. image = np.ascontiguousarray(np.flipud(image))
  95. mask = np.ascontiguousarray(np.flipud(mask))
  96. rows, cols = image.shape[:2]
  97. joints = joints.copy()
  98. joints[:, :, 1] = rows - joints[:, :, 1] - 1
  99. return image, mask, joints
  100. class KeypointsLongestMaxSize(KeypointTransform):
  101. """
  102. Resize image, mask and joints to ensure that resulting image does not exceed max_sizes (rows, cols).
  103. """
  104. def __init__(self, max_height: int, max_width: int, interpolation: int = cv2.INTER_LINEAR, prob: float = 1.0):
  105. """
  106. :param max_sizes: (rows, cols) - Maximum size of the image after resizing
  107. :param interpolation: Used interpolation method for image
  108. :param prob: Probability of applying this transform
  109. """
  110. self.max_height = max_height
  111. self.max_width = max_width
  112. self.interpolation = interpolation
  113. self.prob = prob
  114. def __call__(self, image, mask, joints: float):
  115. if random.random() < self.prob:
  116. height, width = image.shape[:2]
  117. scale = min(self.max_height / height, self.max_width / width)
  118. image = self.rescale_image(image, scale, cv2.INTER_LINEAR)
  119. if image.shape[0] != self.max_height and image.shape[1] != self.max_width:
  120. raise RuntimeError(f"Image shape is not as expected (scale={scale}, input_shape={height, width}, resized_shape={image.shape[:2]})")
  121. if image.shape[0] > self.max_height or image.shape[1] > self.max_width:
  122. raise RuntimeError(f"Image shape is not as expected (scale={scale}, input_shape={height, width}, resized_shape={image.shape[:2]}")
  123. mask = self.rescale_image(mask, scale, cv2.INTER_LINEAR)
  124. joints = joints.copy()
  125. joints[:, :, 0:2] = joints[:, :, 0:2] * scale
  126. return image, mask, joints
  127. @classmethod
  128. def rescale_image(cls, img, scale, interpolation):
  129. height, width = img.shape[:2]
  130. if scale != 1.0:
  131. new_height, new_width = tuple(int(dim * scale + 0.5) for dim in (height, width))
  132. img = cv2.resize(img, dsize=(new_width, new_height), interpolation=interpolation)
  133. return img
  134. class KeypointsPadIfNeeded(KeypointTransform):
  135. """
  136. Pad image and mask to ensure that resulting image size is not less than `output_size` (rows, cols).
  137. Image and mask padded from right and bottom, thus joints remains unchanged.
  138. """
  139. def __init__(self, min_height: int, min_width: int, image_pad_value: int, mask_pad_value: float):
  140. """
  141. :param output_size: Desired image size (rows, cols)
  142. :param image_pad_value: Padding value of image
  143. :param mask_pad_value: Padding value for mask
  144. """
  145. self.min_height = min_height
  146. self.min_width = min_width
  147. self.image_pad_value = tuple(image_pad_value) if isinstance(image_pad_value, Iterable) else int(image_pad_value)
  148. self.mask_pad_value = mask_pad_value
  149. def __call__(self, image, mask, joints):
  150. height, width = image.shape[:2]
  151. pad_bottom = max(0, self.min_height - height)
  152. pad_right = max(0, self.min_width - width)
  153. image = cv2.copyMakeBorder(image, top=0, bottom=pad_bottom, left=0, right=pad_right, value=self.image_pad_value, borderType=cv2.BORDER_CONSTANT)
  154. original_dtype = mask.dtype
  155. mask = cv2.copyMakeBorder(
  156. mask.astype(np.uint8), top=0, bottom=pad_bottom, left=0, right=pad_right, value=self.mask_pad_value, borderType=cv2.BORDER_CONSTANT
  157. )
  158. mask = mask.astype(original_dtype)
  159. return image, mask, joints
  160. class KeypointsRandomAffineTransform(KeypointTransform):
  161. """
  162. Apply random affine transform to image, mask and joints.
  163. """
  164. def __init__(
  165. self,
  166. max_rotation: float,
  167. min_scale: float,
  168. max_scale: float,
  169. max_translate: float,
  170. image_pad_value: int,
  171. mask_pad_value: float,
  172. prob: float = 0.5,
  173. ):
  174. """
  175. :param max_rotation: Max rotation angle in degrees
  176. :param min_scale: Lower bound for the scale change. For +- 20% size jitter this should be 0.8
  177. :param max_scale: Lower bound for the scale change. For +- 20% size jitter this should be 1.2
  178. :param max_translate: Max translation offset in percents of image size
  179. """
  180. self.max_rotation = max_rotation
  181. self.min_scale = min_scale
  182. self.max_scale = max_scale
  183. self.max_translate = max_translate
  184. self.image_pad_value = tuple(image_pad_value) if isinstance(image_pad_value, Iterable) else int(image_pad_value)
  185. self.mask_pad_value = mask_pad_value
  186. self.prob = prob
  187. def _get_affine_matrix(self, img, angle, scale, dx, dy):
  188. """
  189. :param center: (x,y)
  190. :param scale:
  191. :param output_size: (rows, cols)
  192. :param rot:
  193. :return:
  194. """
  195. height, width = img.shape[:2]
  196. center = (width / 2 + dx * width, height / 2 + dy * height)
  197. matrix = cv2.getRotationMatrix2D(center, angle, scale)
  198. return matrix
  199. def apply_to_keypoints(self, joints: np.ndarray, mat: np.ndarray):
  200. shape = joints.shape
  201. joints = joints.reshape(-1, 2)
  202. return np.dot(np.concatenate((joints, joints[:, 0:1] * 0 + 1), axis=1), mat.T).reshape(shape)
  203. def apply_to_image(self, image, mat, interpolation, padding_value, padding_mode=cv2.BORDER_CONSTANT):
  204. return cv2.warpAffine(
  205. image,
  206. mat,
  207. dsize=(image.shape[1], image.shape[0]),
  208. flags=interpolation,
  209. borderValue=padding_value,
  210. borderMode=padding_mode,
  211. )
  212. def __call__(self, image: np.ndarray, mask: np.ndarray, joints: np.ndarray):
  213. """
  214. :param image: (np.ndarray) Image of shape [H,W,3]
  215. :param mask: Single-element array with mask of [H,W] shape.
  216. :param joints: Single-element array of joints of [Num instances, Num Joints, 3] shape. Semantics of last channel is: x, y, joint index (?)
  217. :param area: Area each instance occipy: [Num instances, 1]
  218. :return:
  219. """
  220. if random.random() < self.prob:
  221. angle = random.uniform(-self.max_rotation, self.max_rotation)
  222. scale = random.uniform(self.min_scale, self.max_scale)
  223. dx = random.uniform(-self.max_translate, self.max_translate)
  224. dy = random.uniform(-self.max_translate, self.max_translate)
  225. mat_output = self._get_affine_matrix(image, angle, scale, dx, dy)
  226. mat_output = mat_output[:2]
  227. mask = self.apply_to_image(mask, mat_output, cv2.INTER_NEAREST, self.mask_pad_value, cv2.BORDER_CONSTANT)
  228. image = self.apply_to_image(image, mat_output, cv2.INTER_LINEAR, self.image_pad_value, cv2.BORDER_CONSTANT)
  229. joints = joints.copy()
  230. joints[:, :, 0:2] = self.apply_to_keypoints(joints[:, :, 0:2], mat_output)
  231. # Update visibility status of joints that were moved outside visible area
  232. joints_outside_image = (joints[:, :, 0] < 0) | (joints[:, :, 0] >= image.shape[1]) | (joints[:, :, 1] < 0) | (joints[:, :, 1] >= image.shape[0])
  233. joints[joints_outside_image, 2] = 0
  234. return image, mask, joints
Discard
@@ -1,7 +1,7 @@
 import sys
 import sys
 import unittest
 import unittest
 
 
-from tests.integration_tests import TestDataset, EMAIntegrationTest, LRTest
+from tests.integration_tests import EMAIntegrationTest, LRTest, PoseEstimationDatasetIntegrationTest
 
 
 
 
 class CoreIntegrationTestSuiteRunner:
 class CoreIntegrationTestSuiteRunner:
@@ -16,9 +16,9 @@ class CoreIntegrationTestSuiteRunner:
         _add_modules_to_integration_tests_suite - Adds unit tests to the Unit Tests Test Suite
         _add_modules_to_integration_tests_suite - Adds unit tests to the Unit Tests Test Suite
             :return:
             :return:
         """
         """
-        self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestDataset))
         self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(EMAIntegrationTest))
         self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(EMAIntegrationTest))
         self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(LRTest))
         self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(LRTest))
+        self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(PoseEstimationDatasetIntegrationTest))
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
Discard
@@ -20,6 +20,7 @@ from tests.unit_tests import (
     ResumeTrainingTest,
     ResumeTrainingTest,
     CallTrainAfterTestTest,
     CallTrainAfterTestTest,
     CrashTipTest,
     CrashTipTest,
+    TestTransforms,
 )
 )
 from tests.end_to_end_tests import TestTrainer
 from tests.end_to_end_tests import TestTrainer
 from tests.unit_tests.detection_utils_test import TestDetectionUtils
 from tests.unit_tests.detection_utils_test import TestDetectionUtils
@@ -121,6 +122,7 @@ class CoreUnitTestSuiteRunner:
         self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestModelsONNXExport))
         self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestModelsONNXExport))
         self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(MaxBatchesLoopBreakTest))
         self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(MaxBatchesLoopBreakTest))
         self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestTrainingUtils))
         self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestTrainingUtils))
+        self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestTransforms))
 
 
     def _add_modules_to_end_to_end_tests_suite(self):
     def _add_modules_to_end_to_end_tests_suite(self):
         """
         """
Discard
@@ -2,5 +2,6 @@
 
 
 from tests.integration_tests.ema_train_integration_test import EMAIntegrationTest
 from tests.integration_tests.ema_train_integration_test import EMAIntegrationTest
 from tests.integration_tests.lr_test import LRTest
 from tests.integration_tests.lr_test import LRTest
+from tests.integration_tests.pose_estimation_dataset_test import PoseEstimationDatasetIntegrationTest
 
 
-__all__ = ["EMAIntegrationTest", "LRTest"]
+__all__ = ["EMAIntegrationTest", "LRTest", "PoseEstimationDatasetIntegrationTest"]
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
  1. import os
  2. import unittest
  3. import pkg_resources
  4. from hydra import initialize_config_dir, compose
  5. from hydra.core.global_hydra import GlobalHydra
  6. from super_gradients.common.environment.path_utils import normalize_path
  7. from super_gradients.training.dataloaders.dataloaders import _process_dataset_params, get_data_loader
  8. from super_gradients.training.datasets.pose_estimation_datasets import COCOKeypointsDataset
  9. class PoseEstimationDatasetIntegrationTest(unittest.TestCase):
  10. def test_datasets_instantiation(self):
  11. GlobalHydra.instance().clear()
  12. sg_recipes_dir = pkg_resources.resource_filename("super_gradients.recipes", "")
  13. dataset_config = os.path.join("dataset_params", "coco_pose_estimation_dekr_dataset_params")
  14. with initialize_config_dir(config_dir=normalize_path(sg_recipes_dir), version_base="1.2"):
  15. # config is relative to a module
  16. cfg = compose(config_name=normalize_path(dataset_config))
  17. train_dataset_params = _process_dataset_params(cfg, dict(), True)
  18. val_dataset_params = _process_dataset_params(cfg, dict(), True)
  19. train_dataset = COCOKeypointsDataset(**train_dataset_params)
  20. assert train_dataset[0] is not None
  21. val_dataset = COCOKeypointsDataset(**val_dataset_params)
  22. assert val_dataset[0] is not None
  23. def test_dataloaders_instantiation(self):
  24. train_loader = get_data_loader("coco_pose_estimation_dekr_dataset_params", COCOKeypointsDataset, train=True, dataloader_params=dict(num_workers=0))
  25. val_loader = get_data_loader("coco_pose_estimation_dekr_dataset_params", COCOKeypointsDataset, train=False, dataloader_params=dict(num_workers=0))
  26. assert next(iter(train_loader)) is not None
  27. assert next(iter(val_loader)) is not None
  28. if __name__ == "__main__":
  29. unittest.main()
Discard
@@ -22,7 +22,7 @@ from tests.unit_tests.conv_bn_relu_test import TestConvBnRelu
 from tests.unit_tests.initialize_with_dataloaders_test import InitializeWithDataloadersTest
 from tests.unit_tests.initialize_with_dataloaders_test import InitializeWithDataloadersTest
 from tests.unit_tests.training_params_factory_test import TrainingParamsTest
 from tests.unit_tests.training_params_factory_test import TrainingParamsTest
 from tests.unit_tests.config_inspector_test import ConfigInspectTest
 from tests.unit_tests.config_inspector_test import ConfigInspectTest
-
+from tests.unit_tests.transforms_test import TestTransforms
 
 
 __all__ = [
 __all__ = [
     "CrashTipTest",
     "CrashTipTest",
@@ -48,4 +48,5 @@ __all__ = [
     "ResumeTrainingTest",
     "ResumeTrainingTest",
     "CallTrainAfterTestTest",
     "CallTrainAfterTestTest",
     "ConfigInspectTest",
     "ConfigInspectTest",
+    "TestTransforms",
 ]
 ]
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
  1. from super_gradients.training.datasets.pose_estimation_datasets.coco_keypoints import COCOKeypointsDataset
  2. from super_gradients.training.datasets.pose_estimation_datasets.target_generators import DEKRTargetsGenerator
  3. from super_gradients.training.transforms.keypoint_transforms import KeypointsCompose, KeypointsRandomVerticalFlip
  4. def test_dataset():
  5. target_generator = DEKRTargetsGenerator(
  6. output_stride=4,
  7. sigma=2,
  8. center_sigma=4,
  9. bg_weight=0.1,
  10. offset_radius=4,
  11. )
  12. dataset = COCOKeypointsDataset(
  13. data_dir="e:/coco2017",
  14. images_dir="images/train2017",
  15. json_file="annotations/person_keypoints_train2017.json",
  16. include_empty_samples=False,
  17. transforms=KeypointsCompose(
  18. [
  19. KeypointsRandomVerticalFlip(),
  20. ]
  21. ),
  22. target_generator=target_generator,
  23. )
  24. assert dataset is not None
  25. assert dataset[0] is not None
Discard
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  1. import unittest
  2. import numpy as np
  3. from super_gradients.training.transforms.keypoint_transforms import (
  4. KeypointsRandomHorizontalFlip,
  5. KeypointsRandomVerticalFlip,
  6. KeypointsRandomAffineTransform,
  7. KeypointsPadIfNeeded,
  8. KeypointsLongestMaxSize,
  9. )
  10. class TestTransforms(unittest.TestCase):
  11. def test_keypoints_random_affine(self):
  12. image = np.random.rand(640, 480, 3)
  13. mask = np.random.rand(640, 480)
  14. joints = np.random.randint(0, 480, size=(1, 17, 3))
  15. joints[..., 2] = 2 # all visible
  16. aug = KeypointsRandomAffineTransform(min_scale=0.8, max_scale=1.2, max_rotation=30, max_translate=0.5, prob=1, image_pad_value=0, mask_pad_value=0)
  17. aug_image, aug_mask, aug_joints = aug(image, mask, joints)
  18. joints_outside_image = (
  19. (aug_joints[:, :, 0] < 0) | (aug_joints[:, :, 1] < 0) | (aug_joints[:, :, 0] >= aug_image.shape[1]) | (aug_joints[:, :, 1] >= aug_image.shape[0])
  20. )
  21. # Ensure that keypoints outside the image are not visible
  22. self.assertTrue((aug_joints[joints_outside_image, 2] == 0).all())
  23. self.assertTrue((aug_joints[~joints_outside_image, 2] != 0).all())
  24. def test_keypoints_horizontal_flip(self):
  25. image = np.random.rand(640, 480, 3)
  26. mask = np.random.rand(640, 480)
  27. joints = np.random.randint(0, 100, size=(1, 17, 3))
  28. aug = KeypointsRandomHorizontalFlip(flip_index=[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], prob=1)
  29. aug_image, aug_mask, aug_joints = aug(image, mask, joints)
  30. np.testing.assert_array_equal(aug_image, image[:, ::-1, :])
  31. np.testing.assert_array_equal(aug_mask, mask[:, ::-1])
  32. np.testing.assert_array_equal(image.shape[1] - aug_joints[:, ::-1, 0] - 1, joints[..., 0])
  33. np.testing.assert_array_equal(aug_joints[:, ::-1, 1], joints[..., 1])
  34. np.testing.assert_array_equal(aug_joints[:, ::-1, 2], joints[..., 2])
  35. def test_keypoints_vertical_flip(self):
  36. image = np.random.rand(640, 480, 3)
  37. mask = np.random.rand(640, 480)
  38. joints = np.random.randint(0, 100, size=(1, 17, 3))
  39. aug = KeypointsRandomVerticalFlip(prob=1)
  40. aug_image, aug_mask, aug_joints = aug(image, mask, joints)
  41. np.testing.assert_array_equal(aug_image, image[::-1, :, :])
  42. np.testing.assert_array_equal(aug_mask, mask[::-1, :])
  43. np.testing.assert_array_equal(aug_joints[..., 0], joints[..., 0])
  44. np.testing.assert_array_equal(image.shape[0] - aug_joints[..., 1] - 1, joints[..., 1])
  45. np.testing.assert_array_equal(aug_joints[..., 2], joints[..., 2])
  46. def test_keypoints_pad_if_needed(self):
  47. image = np.random.rand(640, 480, 3)
  48. mask = np.random.rand(640, 480)
  49. joints = np.random.randint(0, 100, size=(1, 17, 3))
  50. aug = KeypointsPadIfNeeded(min_width=768, min_height=768, image_pad_value=0, mask_pad_value=0)
  51. aug_image, aug_mask, aug_joints = aug(image, mask, joints)
  52. self.assertEqual(aug_image.shape, (768, 768, 3))
  53. self.assertEqual(aug_mask.shape, (768, 768))
  54. np.testing.assert_array_equal(aug_joints, joints)
  55. def test_keypoints_longest_max_size(self):
  56. image = np.random.rand(640, 480, 3)
  57. mask = np.random.rand(640, 480)
  58. joints = np.random.randint(0, 480, size=(1, 17, 3))
  59. aug = KeypointsLongestMaxSize(max_height=512, max_width=512)
  60. aug_image, aug_mask, aug_joints = aug(image, mask, joints)
  61. self.assertEqual(aug_image.shape[:2], aug_mask.shape[:2])
  62. self.assertLessEqual(aug_image.shape[0], 512)
  63. self.assertLessEqual(aug_image.shape[1], 512)
  64. self.assertTrue((aug_joints[..., 0] < aug_image.shape[1]).all())
  65. self.assertTrue((aug_joints[..., 1] < aug_image.shape[0]).all())
  66. if __name__ == "__main__":
  67. unittest.main()
Discard