#578 Feature/sg 516 support head replacement for local pretrained weights unknown dataset

Merged

Ghost merged 1 commits into Deci-AI:master from deci-ai:feature/SG-516_support_head_replacement_for_local_pretrained_weights_unknown_dataset

  
    
        
          
1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

          
            import math
import torch
from torch.utils.data import Sampler
import torch.distributed as dist
# TODO: Add unit test for RepeatAugSampler once DDP unit tests are supported.
class RepeatAugSampler(Sampler):
    """
    Sampler that restricts data loading to a subset of the dataset for distributed,
    with repeated augmentation.
    It ensures that different each augmented version of a sample will be visible to a
    different process (GPU). Heavily based on torch.utils.data.DistributedSampler
    This sampler was taken from https://github.com/facebookresearch/deit/blob/0c4b8f60/samplers.py
    Copyright (c) 2015-present, Facebook, Inc.
    Below code is modified from:
     https://github.com/rwightman/pytorch-image-models/blame/master/timm/data/distributed_sampler.py
    Note this sampler is currently supported only for DDP training.
    Arguments:
        dataset (torch.utils.data.Dataset): dataset to sample from.
        num_replicas (int): Number of dataset replicas, equals to world_size when set to 0 (default=0).
        shuffle (bool): whether to shuffle the dataset indices (default=True).
        num_repeats (int): amount of repetitions for each example.
        selected_round (int): When > 0, the number of samples to select per epoch for each rank is determined by
            int(math.floor(len(self.dataset) // selected_round * selected_round / selected_ratio))
            (default=256)
        selected_ratio (int): ratio to reduce selected samples by, num_replicas if 0.
    """
    def __init__(
            self,
            dataset: torch.utils.data.Dataset,
            num_replicas: int = None,
            rank: int = None,
            shuffle: bool = True,
            num_repeats: int = 3,
            selected_round: int = 256,
            selected_ratio: int = 0,
    ):
        if num_replicas is None:
            if not dist.is_available():
                raise RuntimeError("Requires distributed package to be available")
            num_replicas = dist.get_world_size()
        if rank is None:
            if not dist.is_available():
                raise RuntimeError("Requires distributed package to be available")
            rank = dist.get_rank()
        self.dataset = dataset
        self.num_replicas = num_replicas
        self.rank = rank
        self.shuffle = shuffle
        self.num_repeats = num_repeats
        self.epoch = 0
        self.num_samples = int(math.ceil(len(self.dataset) * num_repeats / self.num_replicas))
        self.total_size = self.num_samples * self.num_replicas
        # Determine the number of samples to select per epoch for each rank.
        # num_selected logic defaults to be the same as original RASampler impl, but this one can be tweaked
        # via selected_ratio and selected_round args.
        selected_ratio = selected_ratio or num_replicas  # ratio to reduce selected samples by, num_replicas if 0
        if selected_round:
            self.num_selected_samples = int(math.floor(
                len(self.dataset) // selected_round * selected_round / selected_ratio))
        else:
            self.num_selected_samples = int(math.ceil(len(self.dataset) / selected_ratio))
    def __iter__(self):
        # deterministically shuffle based on epoch
        g = torch.Generator()
        g.manual_seed(self.epoch)
        if self.shuffle:
            indices = torch.randperm(len(self.dataset), generator=g)
        else:
            indices = torch.arange(start=0, end=len(self.dataset))
        # produce repeats e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2....]
        if isinstance(self.num_repeats, float) and not self.num_repeats.is_integer():
            # resample for repeats w/ non-integer ratio
            repeat_size = math.ceil(self.num_repeats * len(self.dataset))
            indices = indices[torch.tensor([int(i // self.num_repeats) for i in range(repeat_size)])]
        else:
            indices = torch.repeat_interleave(indices, repeats=int(self.num_repeats), dim=0)
        indices = indices.tolist()  # leaving as tensor thrashes dataloader memory
        # add extra samples to make it evenly divisible
        padding_size = self.total_size - len(indices)
        if padding_size > 0:
            indices += indices[:padding_size]
        assert len(indices) == self.total_size
        # subsample per rank
        indices = indices[self.rank:self.total_size:self.num_replicas]
        assert len(indices) == self.num_samples
        # return up to num selected samples
        return iter(indices[:self.num_selected_samples])
    def __len__(self):
        return self.num_selected_samples
    def set_epoch(self, epoch):
        self.epoch = epoch

          
        
      

  

Tip!

Press p or to see the previous file or, n or to see the next file

Deci-AI / super-gradients connected to https://github.com/Deci-AI/super-gradients.git

#578 Feature/sg 516 support head replacement for local pretrained weights unknown dataset

Deci-AI
/
super-gradients
connected to https://github.com/Deci-AI/super-gradients.git