Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

regression-nn.py 4.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  1. #%%
  2. import pandas as pd
  3. import numpy as np
  4. #%%
  5. automobile_data = pd.read_csv('./data/Automobile_data.csv', sep=r'\s*,\s*', engine='python')
  6. #%%
  7. automobile_data.head()
  8. #%%
  9. automobile_data = automobile_data.replace('?', np.nan)
  10. automobile_data.head()
  11. #%%
  12. automobile_data = automobile_data.dropna() # clean out the rows with missing data
  13. automobile_data.head()
  14. #%%
  15. col = ['make', 'fuel-type', 'body-style', 'horsepower']
  16. automobile_features = automobile_data[col]
  17. automobile_features.head()
  18. #%%
  19. automobile_target = automobile_data[['price']]
  20. automobile_target.head()
  21. #%%
  22. automobile_features['horsepower'].describe() # dtype: object (becuase strings)
  23. #%%
  24. pd.options.mode.chained_assignment = None # turn off the SettingWithCopyWarning which warns of unpredictable results with chained assignments, ref: http://pandas-docs.github.io/pandas-docs-travis/indexing.html#why-does-assignment-fail-when-using-chained-indexing
  25. #%%
  26. automobile_features['horsepower'] = pd.to_numeric(automobile_features['horsepower'])
  27. automobile_features['horsepower'].describe() # now dtype: float64 with cool stats!
  28. #%%
  29. automobile_target = automobile_target.astype(float)
  30. automobile_target['price'].describe()
  31. #%%
  32. automobile_features = pd.get_dummies(automobile_features, columns=['make', 'fuel-type', 'body-style']) # one-hot encoding for non-numeric values
  33. automobile_features.head()
  34. #%%
  35. automobile_features.columns
  36. #%%
  37. from sklearn import preprocessing
  38. automobile_features[['horsepower']] = preprocessing.scale(automobile_features[['horsepower']]) # standardize the numeric values: subtract mean, divide by standard deviation
  39. automobile_features[['horsepower']].head() # all ML algorithms work better when the numeric values are standardized to be roughly in the same range
  40. #%%
  41. from sklearn.model_selection import train_test_split
  42. X_train, x_test, Y_train, y_test = train_test_split(automobile_features, automobile_target, test_size=0.2, random_state=0) # use 80% of the data for training purposes
  43. #%%
  44. import torch
  45. dtype = torch.float
  46. #%%
  47. X_train_tensor = torch.tensor(X_train.values, dtype = dtype)
  48. x_test_tensor = torch.tensor(x_test.values, dtype = dtype)
  49. X_train_tensor.shape
  50. #%%
  51. Y_train_tensor = torch.tensor(Y_train.values, dtype = dtype)
  52. y_test_tensor = torch.tensor(y_test.values, dtype = dtype)
  53. Y_train_tensor.shape
  54. #%%
  55. inp = 26 # the 26 features in X_train_tensor
  56. out = 1 # a single value output, the price
  57. hid = 100 # hidden layer neurons
  58. loss_fn = torch.nn.MSELoss() # use the torch.nn library to not calculate the loss by hand as in the previous example
  59. learning_rate = 0.0001
  60. #%%
  61. model = torch.nn.Sequential(torch.nn.Linear(inp, hid), torch.nn.Sigmoid(), torch.nn.Linear(hid, out)) # a sequential model holding NN layers in sequence; all neural network classes derive from the base torch.nn.Module class; any layer can contain a nested module; the Sigmoid is our choice for the activation function
  62. #%%
  63. for iter in range(10000): # 10K epochs, or passes through our network
  64. y_pred = model(X_train_tensor) # apply the model to the input training data
  65. loss = loss_fn(y_pred, Y_train_tensor)
  66. if iter % 1000 == 0:
  67. print(iter, loss.item())
  68. model.zero_grad() # zero out the model gradients before backpropagation
  69. loss.backward()
  70. with torch.no_grad(): # don't calculate gradients while we're updating our model's parameters
  71. for param in model.parameters(): # accesses all params from the nn
  72. param -= learning_rate * param.grad
  73. #%%
  74. # take a sample from our test data and perform a prediction
  75. sample = x_test.iloc[23]
  76. sample
  77. #%%
  78. sample_tensor = torch.tensor(sample.values, dtype = dtype) # convert to tensor
  79. sample_tensor
  80. #%%
  81. y_pred = model(sample_tensor) # pass the sample tensor through our nn model
  82. print("Predicted price of automible is: ", int(y_pred.item()))
  83. print("Actual price of automible is: ", int(y_test.iloc[23]))
  84. #%%
  85. if False:
  86. #%%
  87. # now run predictions on the entire test dataset
  88. y_pred_tensor = model(x_test_tensor)
  89. y_pred = y_pred_tensor.detach().numpy() # for visualisation
  90. plt.scatter(y_pred, y_test.values)
  91. plt.xlabel("Actual Price")
  92. plt.ylabel("Predicted Price")
  93. plt.show()
  94. #%%
  95. # serialize the model to disk
  96. torch.save(model, 'my_model')
  97. saved_model = torch.load('my_model')
  98. y_pred_tensor = saved_model(x_test_tensor)
  99. y_pred = y_pred_tensor.detach().numpy()
  100. plt.figure(figsize=(15, 6))
  101. plt.plot(y_pred, label='Predicted Price')
  102. plt.plot(y_test.values, label='Actual Price')
  103. plt.legend()
  104. plt.show()
  105. #%%
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Haider Ali

commented in commit77b72a90d5on branch master

2 years ago

Why have you kept the test condition in False as it is not a variable it cannot be converted to True in Runtime. You should add an option for user to choose if testing is needed or not.

Loading...