Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

train_test_split.py 1.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
  1. import os
  2. import pandas as pd
  3. import plac
  4. from sklearn.model_selection import train_test_split
  5. @plac.annotations(
  6. data_path=("Path to source data", "option", "i", str),
  7. out_path=("Path to save split data", "option", "o", str),
  8. )
  9. def main(data_path: str = "data/iris.csv", out_path: str = "data/split/") -> None:
  10. df = pd.read_csv(data_path)
  11. train, test = train_test_split(
  12. df, stratify=df["class"].values, test_size=0.2, random_state=42
  13. )
  14. if not os.path.isdir(out_path):
  15. os.mkdir(out_path)
  16. train.to_csv(f"{out_path}train.csv", index=False)
  17. test.to_csv(f"{out_path}test.csv", index=False)
  18. print("Finished Splitting Data:\nStats:")
  19. print(
  20. f'\tTotal: {df.shape}\tClass vise samples: {df["class"].value_counts().values}'
  21. )
  22. print(
  23. f'\tTrain: {train.shape}\tClass vise samples: {train["class"].value_counts().values}'
  24. )
  25. print(
  26. f'\tTest: {test.shape}\tClass vise samples: {test["class"].value_counts().values}'
  27. )
  28. if __name__ == "__main__":
  29. plac.call(main)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...