Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

utils.py 2.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
  1. import git
  2. import os
  3. from dagshub.streaming import DagsHubFilesystem
  4. from dagshub.upload import Repo
  5. from dagshub_config import *
  6. from config import *
  7. import pandas as pd
  8. import numpy as np
  9. from sklearn.model_selection import train_test_split
  10. import cv2
  11. ### Clone the Git Repo - read the necessary info from the Dagshub_config.py file
  12. def gitclone():
  13. git.Git().clone("https://"+DAGSHUB_USERNAME+":"+DAGSHUB_TOKEN+"@dagshub.com/"+DAGSHUB_USERNAME+"/"+DAGSHUB_REPO_NAME+".git")
  14. ### Create a Streaming Client
  15. def create_streaming_client():
  16. fs = DagsHubFilesystem(project_root=DAGSHUB_REPO_NAME,username=DAGSHUB_USERNAME,password=DAGSHUB_TOKEN)
  17. return fs
  18. ### Getting the list of Image Files
  19. def list_files(fs,folder_path,file_ext="png"):
  20. img_files=fs.listdir(folder_path)
  21. img_files=[__file__ for __file__ in img_files if "."+file_ext in __file__]
  22. print("Number of Image Files in the Path",len(img_files))
  23. return img_files
  24. ### Reading the Training DataFrame
  25. def get_train_dataframe(fs,file_name):
  26. file_path=os.path.join(RAW_DATA_PATH,file_name)
  27. ## This will create a cache of the file to be
  28. train_data=pd.read_csv(fs.open(file_path))
  29. train_data['int_labels']=train_data['label'].apply(lambda x:0 if x=="CE" else 1)
  30. return train_data
  31. ### Splitting the Training Data Frame into train and validation
  32. def train_split(train_data,train_split=0.8):
  33. X=train_data['image_id']
  34. y=train_data['int_labels'].tolist()
  35. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_split, random_state=42,stratify=y)
  36. train_images=X_train.tolist()
  37. train_data['is_train']=train_data['image_id'].apply(lambda x: "train" if x in train_images else "val")
  38. return train_data
  39. ## Download the efficient data using Streaming Client
  40. def download_EfficientNet(fs,model_filename):
  41. fs.open(os.path.join(EFFICIENT_NET_MODEL_PATH,model_filename)) ##This will download the model files into local system
  42. return os.path.join(EFFICIENT_NET_MODEL_PATH,model_filename)
  43. ## Read the Images using Streaming Client
  44. def read_images(fs,image_path):
  45. fs.open(image_path)
  46. img=cv2.imread(image_path)
  47. return img
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...