Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#578 Feature/sg 516 support head replacement for local pretrained weights unknown dataset

Merged
Ghost merged 1 commits into Deci-AI:master from deci-ai:feature/SG-516_support_head_replacement_for_local_pretrained_weights_unknown_dataset
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
  1. import csv
  2. import numpy as np
  3. import os
  4. import os.path
  5. from typing import Callable
  6. from torchvision.datasets import VisionDataset
  7. from torchvision.datasets.folder import default_loader
  8. IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')
  9. class BaseSgVisionDataset(VisionDataset):
  10. """
  11. BaseSgVisionDataset
  12. """
  13. def __init__(self, root: str, sample_loader: Callable = default_loader, target_loader: Callable = None,
  14. collate_fn: Callable = None, valid_sample_extensions: tuple = IMG_EXTENSIONS,
  15. sample_transform: Callable = None, target_transform: Callable = None):
  16. """
  17. Ctor
  18. :param root:
  19. :param sample_loader:
  20. :param target_loader:
  21. :param collate_fn:
  22. :param valid_sample_extensions:
  23. :param sample_transform:
  24. :param target_transform:
  25. """
  26. super().__init__(root=root, transform=sample_transform, target_transform=target_transform)
  27. self.samples_targets_tuples_list = list(tuple())
  28. self.classes = []
  29. self.valid_sample_extensions = valid_sample_extensions
  30. self.sample_loader = sample_loader
  31. self.target_loader = target_loader
  32. self._generate_samples_and_targets()
  33. # IF collate_fn IS PROVIDED IN CTOR WE ASSUME THERE IS A BASE-CLASS INHERITANCE W/O collate_fn IMPLEMENTATION
  34. if collate_fn is not None:
  35. self.collate_fn = collate_fn
  36. def __getitem__(self, item):
  37. """
  38. :param item:
  39. :return:
  40. """
  41. raise NotImplementedError
  42. def __len__(self):
  43. """
  44. :return:
  45. """
  46. return len(self.samples_targets_tuples_list)
  47. def _generate_samples_and_targets(self):
  48. """
  49. _generate_samples_and_targets - An abstract method that fills the samples and targets members of the class
  50. """
  51. raise NotImplementedError
  52. def _validate_file(self, filename: str) -> bool:
  53. """
  54. validate_file
  55. :param filename:
  56. :return:
  57. """
  58. for valid_extension in self.valid_sample_extensions:
  59. if filename.lower().endswith(valid_extension):
  60. return True
  61. return False
  62. @staticmethod
  63. def numpy_loader_func(path):
  64. """
  65. _numpy_loader_func - Uses numpy load func
  66. :param path:
  67. :return:
  68. """
  69. return np.load(path)
  70. @staticmethod
  71. def text_file_loader_func(text_file_path: str, inline_splitter: str = ' ') -> list:
  72. """
  73. text_file_loader_func - Uses a line by line based code to get vectorized data from a text-based file
  74. :param text_file_path: Input text file
  75. :param inline_splitter: The char to use in order to separate between different VALUES of the SAME vector
  76. please notice that DIFFERENT VECTORS SHOULD BE IN SEPARATE LINES ('\n') SEPARATED
  77. :return: a list of tuples, where each tuple is a vector of target values
  78. """
  79. if not os.path.isfile(text_file_path):
  80. raise ValueError(" Error in text file path")
  81. with open(text_file_path, "r", encoding="utf-8") as text_file:
  82. targets_list = [tuple(map(float, line.split(inline_splitter))) for line in text_file]
  83. return targets_list
  84. class DirectoryDataSet(BaseSgVisionDataset):
  85. """
  86. DirectoryDataSet - A PyTorch Vision Data Set extension that receives a root Dir and two separate sub directories:
  87. - Sub-Directory for Samples
  88. - Sub-Directory for Targets
  89. """
  90. def __init__(self, root: str,
  91. samples_sub_directory: str, targets_sub_directory: str, target_extension: str,
  92. sample_loader: Callable = default_loader, target_loader: Callable = None, collate_fn: Callable = None,
  93. sample_extensions: tuple = IMG_EXTENSIONS, sample_transform: Callable = None,
  94. target_transform: Callable = None):
  95. """
  96. CTOR
  97. :param root: root directory that contains all of the Data Set
  98. :param samples_sub_directory: name of the samples sub-directory
  99. :param targets_sub_directory: name of the targets sub-directory
  100. :param sample_extensions: file extensions for samples
  101. :param target_extension: file extension of the targets
  102. :param sample_loader: Func to load samples
  103. :param target_loader: Func to load targets
  104. :param collate_fn: collate_fn func to process batches for the Data Loader
  105. :param sample_transform: Func to pre-process samples for data loading
  106. :param target_transform: Func to pre-process targets for data loading
  107. """
  108. # INITIALIZING THE TARGETS LOADER TO USE THE TEXT FILE LOADER FUNC
  109. if target_loader is None:
  110. target_loader = self.text_file_loader_func
  111. self.target_extension = target_extension
  112. self.samples_dir_suffix = samples_sub_directory
  113. self.targets_dir_suffix = targets_sub_directory
  114. super().__init__(root=root, sample_loader=sample_loader, target_loader=target_loader,
  115. collate_fn=collate_fn, valid_sample_extensions=sample_extensions,
  116. sample_transform=sample_transform, target_transform=target_transform)
  117. def __getitem__(self, item):
  118. """
  119. getter method for iteration
  120. :param item:
  121. :return:
  122. """
  123. sample_path, target_path = self.samples_targets_tuples_list[item]
  124. sample = self.sample_loader(sample_path)
  125. target = self.target_loader(target_path)
  126. if self.transform is not None:
  127. sample = self.transform(sample)
  128. if self.target_transform is not None:
  129. target = self.target_transform(target)
  130. return sample, target
  131. def _generate_samples_and_targets(self):
  132. """
  133. _generate_samples_and_targets - Uses class built in members to generate the list of (SAMPLE, TARGET/S)
  134. that is saved in self.samples_targets_tuples_list
  135. """
  136. missing_sample_files, missing_target_files = 0, 0
  137. # VALIDATE DATA PATH
  138. samples_dir_path = self.root + os.path.sep + self.samples_dir_suffix
  139. targets_dir_path = self.root + os.path.sep + self.targets_dir_suffix
  140. if not os.path.exists(samples_dir_path) or not os.path.exists(targets_dir_path):
  141. raise ValueError(" Error in data path")
  142. # ITERATE OVER SAMPLES AND MAKE SURE THERE ARE MATCHING LABELS
  143. for sample_file_name in os.listdir(samples_dir_path):
  144. sample_file_path = samples_dir_path + os.path.sep + sample_file_name
  145. if os.path.isfile(sample_file_path) and self._validate_file(sample_file_path):
  146. sample_file_prefix = str(sample_file_name.split('.')[:-1][0])
  147. # TRY TO GET THE MATCHING LABEL
  148. matching_target_file_name = sample_file_prefix + self.target_extension
  149. target_file_path = targets_dir_path + os.path.sep + matching_target_file_name
  150. if os.path.isfile(target_file_path):
  151. self.samples_targets_tuples_list.append((sample_file_path, target_file_path))
  152. else:
  153. missing_target_files += 1
  154. else:
  155. missing_sample_files += 1
  156. for counter_name, missing_files_counter in [('samples', missing_sample_files),
  157. ('targets', missing_target_files)]:
  158. if missing_files_counter > 0:
  159. print(__name__ + ' There are ' + str(missing_files_counter) + ' missing ' + counter_name)
  160. class ListDataset(BaseSgVisionDataset):
  161. """
  162. ListDataset - A PyTorch Vision Data Set extension that receives a file with FULL PATH to each of the samples.
  163. Then, the assumption is that for every sample, there is a * matching target * in the same
  164. path but with a different extension, i.e:
  165. for the samples paths: (That appear in the list file)
  166. /root/dataset/class_x/sample1.png
  167. /root/dataset/class_y/sample123.png
  168. the matching labels paths: (That DO NOT appear in the list file)
  169. /root/dataset/class_x/sample1.ext
  170. /root/dataset/class_y/sample123.ext
  171. """
  172. def __init__(self, root, file, sample_loader: Callable = default_loader, target_loader: Callable = None,
  173. collate_fn: Callable = None, sample_extensions: tuple = IMG_EXTENSIONS,
  174. sample_transform: Callable = None, target_transform: Callable = None, target_extension='.npy'):
  175. """
  176. CTOR
  177. :param root: root directory that contains all of the Data Set
  178. :param file: Path to the file with the samples list
  179. :param sample_extensions: file extension for samples
  180. :param target_extension: file extension of the targets
  181. :param sample_loader: Func to load samples
  182. :param target_loader: Func to load targets
  183. :param collate_fn: collate_fn func to process batches for the Data Loader
  184. :param sample_transform: Func to pre-process samples for data loading
  185. :param target_transform: Func to pre-process targets for data loading
  186. """
  187. if target_loader is None:
  188. target_loader = self.numpy_loader_func
  189. self.list_file_path = file
  190. self.loader = sample_loader
  191. self.target_loader = target_loader
  192. self.extensions = sample_extensions
  193. self.target_extension = target_extension
  194. super().__init__(root, sample_loader=sample_loader, target_loader=target_loader,
  195. collate_fn=collate_fn, sample_transform=sample_transform,
  196. valid_sample_extensions=sample_extensions,
  197. target_transform=target_transform)
  198. def __getitem__(self, item):
  199. """
  200. Args:
  201. item (int): Index
  202. Returns:
  203. tuple: (sample, target) where target is class_index of the target class.
  204. """
  205. sample_path, target_path = self.samples_targets_tuples_list[item]
  206. sample = self.loader(sample_path)
  207. target = self.target_loader(target_path)[0]
  208. if self.transform is not None:
  209. sample = self.transform(sample)
  210. if self.target_transform is not None:
  211. target = self.target_transform(target)
  212. return sample, target
  213. def _generate_samples_and_targets(self):
  214. """
  215. _generate_samples_and_targets
  216. """
  217. file = open(self.root + os.path.sep + self.list_file_path, "r", encoding="utf-8")
  218. reader = csv.reader(file)
  219. data = [row[0] for row in reader]
  220. for f in data:
  221. path = self.root + os.path.sep + f
  222. target_path = path[:-4] + self.target_extension
  223. if self._validate_file(path) and os.path.exists(target_path):
  224. self.samples_targets_tuples_list.append((path, target_path))
Discard
Tip!

Press p or to see the previous file or, n or to see the next file