Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

3-train.py 3.8 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  1. # -*- coding: utf-8 -*-
  2. """
  3. @author: Jessica Cabral
  4. """
  5. ############################################################################
  6. # Train Script
  7. ############################################################################
  8. import sys
  9. import os
  10. import numpy as np
  11. import pandas as pd
  12. from datetime import datetime
  13. from sklearn.model_selection import train_test_split
  14. from sklearn.ensemble import RandomForestRegressor
  15. from sklearn.linear_model import LinearRegression
  16. from sklearn.linear_model import Ridge
  17. from sklearn.linear_model import Lasso
  18. from sklearn.linear_model import ElasticNet
  19. from sklearn.tree import DecisionTreeRegressor
  20. from sklearn.svm import SVR # Support Vector Regression
  21. from sklearn.model_selection import cross_validate
  22. try:
  23. import cPickle as pickle
  24. except ImportError:
  25. import pickle
  26. ##############################
  27. # Get Sys Paths
  28. ##############################
  29. #if len(sys.argv) != 3:
  30. # sys.stderr.write('Arguments error. Usage:\n')
  31. # sys.stderr.write('\tpython train.py data/processed models\n')
  32. # sys.exit(1)
  33. PATH_PROCESSED_DATA = sys.argv[1] #'../data/processed'
  34. PATH_MODEL = sys.argv[2] #'../models'
  35. if not os.path.exists(PATH_MODEL):
  36. os.makedirs(PATH_MODEL)
  37. # Test data set split ratio
  38. split = 0.33
  39. seed = 201909
  40. ###############################
  41. ## Create Folder model version
  42. ###############################
  43. #try:
  44. # model_version = len(next(os.walk(PATH_MODEL))[1])
  45. # if model_version is None:
  46. # model_version = 0
  47. #except StopIteration :
  48. # model_version = 0
  49. #
  50. #model_version_name = 'modelv{}-{}'.format(model_version+1, datetime.now().strftime("%d%m%Y-%H%M%S"))
  51. #model_version_path = r'{}/{}'.format(PATH_MODEL,model_version_name)
  52. #
  53. #if not os.path.exists(model_version_path):
  54. # os.makedirs(model_version_path)
  55. #
  56. #print('Versao do modelo: {}'.format(model_version_name))
  57. ##############################
  58. # Features and Target
  59. ##############################
  60. bike_processed = pd.read_csv(PATH_PROCESSED_DATA+'/bikes_processed.tsv', sep='\t')
  61. X = bike_processed[bike_processed.columns.difference(['cnt', 'dteday'])].values
  62. y = bike_processed['cnt'].values
  63. ##############################
  64. # Train Test Split
  65. ##############################
  66. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=seed)
  67. X_train_final = pd.DataFrame(X_train)
  68. X_train_final['target'] = y_train
  69. X_test_final = pd.DataFrame(X_test)
  70. X_test_final['target'] = y_test
  71. #with open(os.path.join(model_version_path, 'train.tsv'), 'w', encoding='utf8') as fd_out_train:
  72. with open(os.path.join(PATH_MODEL, 'train.tsv'), 'w', encoding='utf8') as fd_out_train:
  73. fd_out_train.write(X_train_final.to_csv(sep='\t', index=False))
  74. #with open(os.path.join(model_version_path, 'test.tsv'), 'w', encoding='utf8') as fd_out_train:
  75. with open(os.path.join(PATH_MODEL, 'test.tsv'), 'w', encoding='utf8') as fd_out_train:
  76. fd_out_train.write(X_test_final.to_csv(sep='\t', index=False))
  77. ##############################
  78. # Train Model
  79. ##############################
  80. print('Input matrix size {}'.format(bike_processed.shape))
  81. print('X_train_final matrix size {}'.format(X_train_final.shape))
  82. print('X_test_final matrix size {}'.format(X_test_final.shape))
  83. X = X_train_final[X_train_final.columns.difference(['target'])].values
  84. y= X_train_final['target'].values
  85. regr = RandomForestRegressor(max_depth=2, n_estimators=100, random_state=seed)
  86. regr.fit(X, y)
  87. print('Modelo treinado com sucesso!')
  88. ##############################
  89. # Save Model
  90. ##############################
  91. #with open(os.path.join(model_version_path, '{}.pkl'.format(model_version_name)), 'wb') as fd:
  92. with open(os.path.join(PATH_MODEL, 'model.pkl'), 'wb') as fd:
  93. pickle.dump(regr, fd)
  94. #print('Modelo salvo em: {}'.format(model_version_path, '{}.pkl'.format(model_version_name)))
  95. print('Modelo salvo em: {}'.format(PATH_MODEL, 'model.pkl'))
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...