Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

make_dataset.py 2.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  1. import os
  2. import sys
  3. import yaml
  4. import json
  5. from utils.dataset_config import dataset_config
  6. from utils.misc import seed_everything
  7. '''
  8. Main function of this script. It is explained in more detail in the tutorial.
  9. '''
  10. def run(parameters, directories):
  11. #Retrieving directories information
  12. data_path = directories['data_path']
  13. train_test_path = directories['train_test_path']
  14. csv_path = directories['csv_path']
  15. train_dir = os.path.join(train_test_path, "train")
  16. val_dir = os.path.join(train_test_path, "val")
  17. test_dir = os.path.join(train_test_path, "test")
  18. #Retrieving parameters
  19. seed = parameters['seed']
  20. val_size = parameters['val_size']
  21. test_size = parameters['test_size']
  22. #Setting random processes seed to assure reproducibility
  23. seed_everything(seed)
  24. #Gathering dataset's images information from their folder
  25. dataset = dataset_config(dir=data_path)
  26. #Getting train and test images' ids
  27. if os.path.isdir(train_dir) and os.path.isdir(val_dir) and os.path.isdir(test_dir):
  28. #In the case that the dataset has already been split
  29. train_ids, val_ids, test_ids = dataset.load(train_test_path)
  30. else:
  31. #In the case that the dataset has not yet been split
  32. train_ids, val_ids, test_ids = dataset.train_split(path=train_test_path, csv_path=csv_path, val_size=val_size, test_size=test_size)
  33. ids_dict = {'train_ids': train_ids, 'val_ids': val_ids, 'test_ids': test_ids}
  34. with open('ids_dict.json', 'w') as fp:
  35. json.dump(ids_dict, fp)
  36. if __name__ == "__main__":
  37. #Attempting to read dir.yaml file that contains directories information
  38. try:
  39. with open(os.path.join("configs","dir.yaml"), "r") as fp:
  40. directories = yaml.load(fp, Loader=yaml.FullLoader)
  41. except:
  42. sys.exit('File dir.yaml does not exist in the current folder')
  43. #Attempting to read parameters.yaml file that contains parameters information
  44. try:
  45. with open(os.path.join("configs","parameters.yaml"), "r") as fp:
  46. parameters = yaml.load(fp, Loader=yaml.FullLoader)
  47. except:
  48. sys.exit('File parameters.yaml does not exist in the current folder')
  49. run(parameters, directories)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...