#578 Feature/sg 516 support head replacement for local pretrained weights unknown dataset

Merged
Ghost merged 1 commits into Deci-AI:master from deci-ai:feature/SG-516_support_head_replacement_for_local_pretrained_weights_unknown_dataset
  
    
        
          
1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

          
            import csv
import numpy as np
import os
import os.path
from typing import Callable
from torchvision.datasets import VisionDataset
from torchvision.datasets.folder import default_loader
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')
class BaseSgVisionDataset(VisionDataset):
    """
    BaseSgVisionDataset
    """
    def __init__(self, root: str, sample_loader: Callable = default_loader, target_loader: Callable = None,
                 collate_fn: Callable = None, valid_sample_extensions: tuple = IMG_EXTENSIONS,
                 sample_transform: Callable = None, target_transform: Callable = None):
        """
        Ctor
            :param root:
            :param sample_loader:
            :param target_loader:
            :param collate_fn:
            :param valid_sample_extensions:
            :param sample_transform:
            :param target_transform:
        """
        super().__init__(root=root, transform=sample_transform, target_transform=target_transform)
        self.samples_targets_tuples_list = list(tuple())
        self.classes = []
        self.valid_sample_extensions = valid_sample_extensions
        self.sample_loader = sample_loader
        self.target_loader = target_loader
        self._generate_samples_and_targets()
        # IF collate_fn IS PROVIDED IN CTOR WE ASSUME THERE IS A BASE-CLASS INHERITANCE W/O collate_fn IMPLEMENTATION
        if collate_fn is not None:
            self.collate_fn = collate_fn
    def __getitem__(self, item):
        """
        :param item:
        :return:
        """
        raise NotImplementedError
    def __len__(self):
        """
        :return:
        """
        return len(self.samples_targets_tuples_list)
    def _generate_samples_and_targets(self):
        """
        _generate_samples_and_targets - An abstract method that fills the samples and targets members of the class
        """
        raise NotImplementedError
    def _validate_file(self, filename: str) -> bool:
        """
        validate_file
            :param filename:
            :return:
        """
        for valid_extension in self.valid_sample_extensions:
            if filename.lower().endswith(valid_extension):
                return True
        return False
    @staticmethod
    def numpy_loader_func(path):
        """
        _numpy_loader_func - Uses numpy load func
            :param path:
            :return:
        """
        return np.load(path)
    @staticmethod
    def text_file_loader_func(text_file_path: str, inline_splitter: str = ' ') -> list:
        """
        text_file_loader_func - Uses a line by line based code to get vectorized data from a text-based file
            :param text_file_path:  Input text file
            :param inline_splitter: The char to use in order to separate between different VALUES of the SAME vector
                                    please notice that DIFFERENT VECTORS SHOULD BE IN SEPARATE LINES ('\n') SEPARATED
            :return: a list of tuples, where each tuple is a vector of target values
        """
        if not os.path.isfile(text_file_path):
            raise ValueError(" Error in text file path")
        with open(text_file_path, "r", encoding="utf-8") as text_file:
            targets_list = [tuple(map(float, line.split(inline_splitter))) for line in text_file]
        return targets_list
class DirectoryDataSet(BaseSgVisionDataset):
    """
    DirectoryDataSet - A PyTorch Vision Data Set extension that receives a root Dir and two separate sub directories:
                        - Sub-Directory for Samples
                        - Sub-Directory for Targets
    """
    def __init__(self, root: str,
                 samples_sub_directory: str, targets_sub_directory: str, target_extension: str,
                 sample_loader: Callable = default_loader, target_loader: Callable = None, collate_fn: Callable = None,
                 sample_extensions: tuple = IMG_EXTENSIONS, sample_transform: Callable = None,
                 target_transform: Callable = None):
        """
        CTOR
            :param root:                    root directory that contains all of the Data Set
            :param samples_sub_directory:   name of the samples sub-directory
            :param targets_sub_directory:   name of the targets sub-directory
            :param sample_extensions:       file extensions for samples
            :param target_extension:        file extension of the targets
            :param sample_loader:           Func to load samples
            :param target_loader:           Func to load targets
            :param collate_fn:              collate_fn func to process batches for the Data Loader
            :param sample_transform:        Func to pre-process samples for data loading
            :param target_transform:        Func to pre-process targets for data loading
        """
        # INITIALIZING THE TARGETS LOADER TO USE THE TEXT FILE LOADER FUNC
        if target_loader is None:
            target_loader = self.text_file_loader_func
        self.target_extension = target_extension
        self.samples_dir_suffix = samples_sub_directory
        self.targets_dir_suffix = targets_sub_directory
        super().__init__(root=root, sample_loader=sample_loader, target_loader=target_loader,
                         collate_fn=collate_fn, valid_sample_extensions=sample_extensions,
                         sample_transform=sample_transform, target_transform=target_transform)
    def __getitem__(self, item):
        """
        getter method for iteration
            :param item:
            :return:
        """
        sample_path, target_path = self.samples_targets_tuples_list[item]
        sample = self.sample_loader(sample_path)
        target = self.target_loader(target_path)
        if self.transform is not None:
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return sample, target
    def _generate_samples_and_targets(self):
        """
        _generate_samples_and_targets - Uses class built in members to generate the list of (SAMPLE, TARGET/S)
                                        that is saved in self.samples_targets_tuples_list
        """
        missing_sample_files, missing_target_files = 0, 0
        # VALIDATE DATA PATH
        samples_dir_path = self.root + os.path.sep + self.samples_dir_suffix
        targets_dir_path = self.root + os.path.sep + self.targets_dir_suffix
        if not os.path.exists(samples_dir_path) or not os.path.exists(targets_dir_path):
            raise ValueError(" Error in data path")
        # ITERATE OVER SAMPLES AND MAKE SURE THERE ARE MATCHING LABELS
        for sample_file_name in os.listdir(samples_dir_path):
            sample_file_path = samples_dir_path + os.path.sep + sample_file_name
            if os.path.isfile(sample_file_path) and self._validate_file(sample_file_path):
                sample_file_prefix = str(sample_file_name.split('.')[:-1][0])
                # TRY TO GET THE MATCHING LABEL
                matching_target_file_name = sample_file_prefix + self.target_extension
                target_file_path = targets_dir_path + os.path.sep + matching_target_file_name
                if os.path.isfile(target_file_path):
                    self.samples_targets_tuples_list.append((sample_file_path, target_file_path))
                else:
                    missing_target_files += 1
            else:
                missing_sample_files += 1
        for counter_name, missing_files_counter in [('samples', missing_sample_files),
                                                    ('targets', missing_target_files)]:
            if missing_files_counter > 0:
                print(__name__ + ' There are ' + str(missing_files_counter) + ' missing  ' + counter_name)
class ListDataset(BaseSgVisionDataset):
    """
    ListDataset - A PyTorch Vision Data Set extension that receives a file with FULL PATH to each of the samples.
                  Then, the assumption is that for every sample, there is a * matching target * in the same
                  path but with a different extension, i.e:
                        for the samples paths:  (That appear in the list file)
                                                    /root/dataset/class_x/sample1.png
                                                    /root/dataset/class_y/sample123.png
                        the matching labels paths:  (That DO NOT appear in the list file)
                                                    /root/dataset/class_x/sample1.ext
                                                    /root/dataset/class_y/sample123.ext
    """
    def __init__(self, root, file, sample_loader: Callable = default_loader, target_loader: Callable = None,
                 collate_fn: Callable = None, sample_extensions: tuple = IMG_EXTENSIONS,
                 sample_transform: Callable = None, target_transform: Callable = None, target_extension='.npy'):
        """
           CTOR
               :param root:                    root directory that contains all of the Data Set
               :param file:                    Path to the file with the samples list
               :param sample_extensions:       file extension for samples
               :param target_extension:        file extension of the targets
               :param sample_loader:           Func to load samples
               :param target_loader:           Func to load targets
               :param collate_fn:              collate_fn func to process batches for the Data Loader
               :param sample_transform:        Func to pre-process samples for data loading
               :param target_transform:        Func to pre-process targets for data loading
           """
        if target_loader is None:
            target_loader = self.numpy_loader_func
        self.list_file_path = file
        self.loader = sample_loader
        self.target_loader = target_loader
        self.extensions = sample_extensions
        self.target_extension = target_extension
        super().__init__(root, sample_loader=sample_loader, target_loader=target_loader,
                         collate_fn=collate_fn, sample_transform=sample_transform,
                         valid_sample_extensions=sample_extensions,
                         target_transform=target_transform)
    def __getitem__(self, item):
        """
        Args:
            item (int): Index
        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        """
        sample_path, target_path = self.samples_targets_tuples_list[item]
        sample = self.loader(sample_path)
        target = self.target_loader(target_path)[0]
        if self.transform is not None:
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return sample, target
    def _generate_samples_and_targets(self):
        """
        _generate_samples_and_targets
        """
        file = open(self.root + os.path.sep + self.list_file_path, "r", encoding="utf-8")
        reader = csv.reader(file)
        data = [row[0] for row in reader]
        for f in data:
            path = self.root + os.path.sep + f
            target_path = path[:-4] + self.target_extension
            if self._validate_file(path) and os.path.exists(target_path):
                self.samples_targets_tuples_list.append((path, target_path))

          
        
      

  
Tip!
Press p or to see the previous file or, n or to see the next file
Deci-AI / super-gradients connected to https://github.com/Deci-AI/super-gradients.git

#578 Feature/sg 516 support head replacement for local pretrained weights unknown dataset

Deci-AI
/
super-gradients
connected to https://github.com/Deci-AI/super-gradients.git