Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

enrich_metadata.py 1.5 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
  1. from dagshub.streaming import install_hooks
  2. install_hooks()
  3. import dagshub
  4. from dagshub.data_engine import datasources
  5. import logging
  6. from utils.data import DataFunctions
  7. logger = logging.getLogger('root')
  8. logger.setLevel(logging.INFO)
  9. # Environment Variables
  10. DAGSHUB_REPO_OWNER = "yonomitt"
  11. DAGSHUB_REPO="ToothFairy"
  12. DAGSHUB_FULL_REPO=DAGSHUB_REPO_OWNER + "/" + DAGSHUB_REPO
  13. DATASOURCE_NAME = "Tooth-Segmentation"
  14. DATASOURCE_PATH = "s3://tooth-dataset/data"
  15. ANNOTATION_FILE = "s3://tooth-dataset/tooth_segmentation.json"
  16. def get_or_create_datasource(name):
  17. try:
  18. ds = datasources.get_datasource(repo=DAGSHUB_FULL_REPO, name=name)
  19. except:
  20. ds = datasources.create(repo=DAGSHUB_FULL_REPO, name=name, path=DATASOURCE_PATH)
  21. return ds
  22. def main():
  23. logger.info('Getting or creating the datasource')
  24. ds = get_or_create_datasource(DATASOURCE_NAME)
  25. dataset_func = DataFunctions(annotation_file=ANNOTATION_FILE, yolo_dir='yolo_data', label_type='segmentation')
  26. logger.info('Converting the datasource to a pandas Dataframe')
  27. md = ds.all().dataframe
  28. # Enrich metadata
  29. logger.info('Enriching the metadata')
  30. enriched_md = md.apply(lambda x: dataset_func.create_metadata(x), axis=1)
  31. # Upload the metadata
  32. logger.info('Uploading the metadata to Data Engine')
  33. dagshub.common.config.dataengine_metadata_upload_batch_size = 50
  34. ds.upload_metadata_from_dataframe(enriched_md, path_column="path")
  35. if __name__ == '__main__':
  36. main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...