Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

train_test_split.py 979 B

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
  1. import os
  2. import pandas as pd
  3. import plac
  4. from sklearn.model_selection import train_test_split
  5. @plac.annotations(
  6. data_path=("Path to source data", "option", "i", str),
  7. out_path=("Path to save split data", "option", "o", str)
  8. )
  9. def main(data_path='data/iris.csv', out_path='data/split/'):
  10. df = pd.read_csv(data_path)
  11. train, test = train_test_split(df, stratify=df['class'].values, test_size=0.2, random_state=42)
  12. if not os.path.isdir(out_path):
  13. os.mkdir(out_path)
  14. train.to_csv(f'{out_path}train.csv', index=False)
  15. test.to_csv(f'{out_path}test.csv', index=False)
  16. print(f'Finished Splitting Data:\nStats:')
  17. print(f'\tTotal: {df.shape}\tClass vise samples: {df["class"].value_counts().values}')
  18. print(f'\tTrain: {train.shape}\tClass vise samples: {train["class"].value_counts().values}')
  19. print(f'\tTest: {test.shape}\tClass vise samples: {test["class"].value_counts().values}')
  20. if __name__ == '__main__':
  21. plac.call(main)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...