Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

ensemble.py 1.9 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
  1. import os
  2. import matplotlib.pyplot as plt
  3. import pandas as pd
  4. import plac
  5. from dagshub import dagshub_logger
  6. from joblib import dump, load
  7. from sklearn.ensemble import VotingClassifier
  8. from sklearn.metrics import plot_confusion_matrix
  9. @plac.annotations(
  10. data_path=("Path to source data", "option", "i", str),
  11. model_path=("Path to save trained Model", "option", "m", str),
  12. out_path=("Path to save trained Model", "option", "o", str)
  13. )
  14. def main(data_path='data/features/', model_path='data/models/', out_path='data/models/ensemble/'):
  15. train = pd.read_csv(f'{data_path}train.csv')
  16. test = pd.read_csv(f'{data_path}test.csv')
  17. X_train, y_train = train.drop(columns=['class']), train['class']
  18. X_test, y_test = test.drop(columns=['class']), test['class']
  19. cl1 = load_model(model_path, 'logistic')
  20. cl2 = load_model(model_path, 'svc')
  21. cl3 = load_model(model_path, 'r_forrest')
  22. estimators = [
  23. ('l_regression', cl1),
  24. ('l_svc', cl2),
  25. ('r_forrest', cl3)
  26. ]
  27. model = VotingClassifier(estimators)
  28. model.fit(X_train, y_train)
  29. if not os.path.isdir(out_path):
  30. os.makedirs(out_path)
  31. dump(model, f'{out_path}model.pkl')
  32. cmd = plot_confusion_matrix(model, X_test, y_test, cmap=plt.cm.Reds)
  33. cmd.figure_.savefig(f'{out_path}confusion_matrix.svg', format='svg')
  34. c_matrix = cmd.confusion_matrix
  35. accuracy = model.score(X_test, y_test)
  36. print(f'Finished Training Ensemble Model:\nStats:')
  37. print(f'\tConfusion Matrix:\n{c_matrix}')
  38. print(f'\tModel Accuracy: {accuracy}')
  39. with dagshub_logger(metrics_path=f'{out_path}theBestMetric.csv',
  40. hparams_path=f'{out_path}theBestParams.yaml') as logger:
  41. logger.log_hyperparams(voting=model.voting)
  42. logger.log_metrics(accuracy=accuracy)
  43. def load_model(model_path, model_name):
  44. return load(f'{model_path}{model_name}/model.pkl')
  45. if __name__ == '__main__':
  46. plac.call(main)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...