Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

train_valid_test_split.py 1.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
  1. from dagshub.streaming import install_hooks
  2. install_hooks()
  3. import dagshub
  4. from dagshub.data_engine import datasources
  5. import logging
  6. from utils.data import create_splits
  7. logger = logging.getLogger('root')
  8. logger.setLevel(logging.INFO)
  9. # Environment Variables
  10. DAGSHUB_REPO_OWNER = "yonomitt"
  11. DAGSHUB_REPO="ToothFairy"
  12. DAGSHUB_FULL_REPO=DAGSHUB_REPO_OWNER + "/" + DAGSHUB_REPO
  13. DATASOURCE_NAME = "Tooth-Segmentation"
  14. DATASOURCE_PATH = "s3://tooth-dataset/data"
  15. ANNOTATION_FILE = "s3://tooth-dataset/tooth_segmentation.json"
  16. def get_or_create_datasource(name):
  17. try:
  18. ds = datasources.get_datasource(repo=DAGSHUB_FULL_REPO, name=name)
  19. except:
  20. ds = datasources.create(repo=DAGSHUB_FULL_REPO, name=name, path=DATASOURCE_PATH)
  21. return ds
  22. def main():
  23. logger.info('Getting or creating the datasource')
  24. ds = get_or_create_datasource(DATASOURCE_NAME)
  25. logger.info('Create train/valid/test splits')
  26. md = ds.all().dataframe
  27. md = create_splits(md)
  28. # Upload the metadata
  29. logger.info('Uploading the metadata to Data Engine')
  30. dagshub.common.config.dataengine_metadata_upload_batch_size = 50
  31. ds.upload_metadata_from_dataframe(md, path_column="path")
  32. if __name__ == '__main__':
  33. main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...