Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

random_forrest.py 1.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
  1. import _pickle as cpickle
  2. import os
  3. import matplotlib.pyplot as plt
  4. import pandas as pd
  5. import plac
  6. from dagshub import dagshub_logger
  7. from sklearn.ensemble import RandomForestClassifier
  8. from sklearn.metrics import plot_confusion_matrix
  9. @plac.annotations(
  10. data_path=("Path to source data", "option", "i", str),
  11. n_estimators=("Path to save trained Model", "option", "e", str),
  12. max_samples=("Path to save trained Model", "option", "s", str),
  13. out_path=("Path to save trained Model", "option", "o", str)
  14. )
  15. def main(data_path='data/features/', out_path='data/models/r_forrest/', n_estimators=10, max_samples=30):
  16. train = pd.read_csv(f'{data_path}train.csv')
  17. test = pd.read_csv(f'{data_path}test.csv')
  18. X_train, y_train = train.drop(columns=['class']), train['class']
  19. X_test, y_test = test.drop(columns=['class']), test['class']
  20. model = RandomForestClassifier(n_estimators=n_estimators, max_samples=max_samples, n_jobs=4)
  21. model.fit(X_train, y_train)
  22. if not os.path.isdir(out_path):
  23. os.makedirs(out_path)
  24. with open(f'{out_path}model.pkl', 'wb+') as fp:
  25. cpickle.dump(model, fp)
  26. cmd = plot_confusion_matrix(model, X_test, y_test, cmap=plt.cm.Reds)
  27. cmd.figure_.savefig(f'{out_path}confusion_matrix.svg', format='svg')
  28. c_matrix = cmd.confusion_matrix
  29. accuracy = model.score(X_test, y_test)
  30. print(f'Finished Training RandomForrest Model:\nStats:')
  31. print(f'\tConfusion Matrix:\n{c_matrix}')
  32. print(f'\tModel Accuracy: {accuracy}')
  33. with dagshub_logger(metrics_path=f'{out_path}metrics.csv', hparams_path=f'{out_path}params.yml') as logger:
  34. logger.log_hyperparams(model.get_params())
  35. logger.log_metrics(accuracy=accuracy)
  36. if __name__ == '__main__':
  37. plac.call(main)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...