practice-nn
/
pytorch-tensors


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
93

	
94

	
95

	
96

	
97

	
98

	
99

	
100

	
101

	
102

	
103

	
104

	
105

	
106

	
107

	
108

	
109

	
110

	
111

	
112

	
113

	
114

	
115

	
116

	
117

	
118

	
119

	
120

	
121

	
122

	
123

	
124

	
125

	
126

	
127

	
128

	
129

	
130

	
131

	
132

	
133

	
134

	
135

	
136

	
137

	
138

	
            #%%
import pandas as pd
import numpy as np

#%%
automobile_data = pd.read_csv('./data/Automobile_data.csv', sep=r'\s*,\s*', engine='python')

#%%
automobile_data.head()

#%%
automobile_data = automobile_data.replace('?', np.nan)
automobile_data.head()

#%%
automobile_data = automobile_data.dropna() # clean out the rows with missing data
automobile_data.head()

#%%
col = ['make', 'fuel-type', 'body-style', 'horsepower']
automobile_features = automobile_data[col]
automobile_features.head()

#%%
automobile_target = automobile_data[['price']]
automobile_target.head()

#%%
automobile_features['horsepower'].describe() # dtype: object (becuase strings)

#%%
pd.options.mode.chained_assignment = None # turn off the SettingWithCopyWarning which warns of unpredictable results with chained assignments, ref: http://pandas-docs.github.io/pandas-docs-travis/indexing.html#why-does-assignment-fail-when-using-chained-indexing


#%%
automobile_features['horsepower'] = pd.to_numeric(automobile_features['horsepower'])
automobile_features['horsepower'].describe() # now dtype: float64 with cool stats!

#%%
automobile_target = automobile_target.astype(float)
automobile_target['price'].describe()

#%%
automobile_features = pd.get_dummies(automobile_features, columns=['make', 'fuel-type', 'body-style']) # one-hot encoding for non-numeric values
automobile_features.head()

#%%
automobile_features.columns

#%%
from sklearn import preprocessing
automobile_features[['horsepower']] = preprocessing.scale(automobile_features[['horsepower']]) # standardize the numeric values: subtract mean, divide by standard deviation
automobile_features[['horsepower']].head() # all ML algorithms work better when the numeric values are standardized to be roughly in the same range

#%%
from sklearn.model_selection import train_test_split
X_train, x_test, Y_train, y_test = train_test_split(automobile_features, automobile_target, test_size=0.2, random_state=0) # use 80% of the data for training purposes

#%%
import torch
dtype = torch.float

#%%
X_train_tensor = torch.tensor(X_train.values, dtype = dtype)
x_test_tensor = torch.tensor(x_test.values, dtype = dtype)
X_train_tensor.shape

#%%
Y_train_tensor = torch.tensor(Y_train.values, dtype = dtype)
y_test_tensor = torch.tensor(y_test.values, dtype = dtype)
Y_train_tensor.shape

#%%
inp = 26 # the 26 features in X_train_tensor
out = 1 # a single value output, the price

hid = 100 # hidden layer neurons

loss_fn = torch.nn.MSELoss() # use the torch.nn library to not calculate the loss by hand as in the previous example

learning_rate = 0.0001

#%%
model = torch.nn.Sequential(torch.nn.Linear(inp, hid), torch.nn.Sigmoid(), torch.nn.Linear(hid, out)) # a sequential model holding NN layers in sequence; all neural network classes derive from the base torch.nn.Module class; any layer can contain a nested module; the Sigmoid is our choice for the activation function

#%%
for iter in range(10000): # 10K epochs, or passes through our network
    y_pred = model(X_train_tensor) # apply the model to the input training data
    loss = loss_fn(y_pred, Y_train_tensor)
    if iter % 1000 == 0:
        print(iter, loss.item())
    model.zero_grad() # zero out the model gradients before backpropagation
    loss.backward()
    with torch.no_grad(): # don't calculate gradients while we're updating our model's parameters
        for param in model.parameters(): # accesses all params from the nn
            param -= learning_rate * param.grad


#%%
# take a sample from our test data and perform a prediction
sample = x_test.iloc[23]
sample

#%%
sample_tensor = torch.tensor(sample.values, dtype = dtype) # convert to tensor
sample_tensor


#%%
y_pred = model(sample_tensor) # pass the sample tensor through our nn model
print("Predicted price of automible is: ", int(y_pred.item()))
print("Actual price of automible is: ", int(y_test.iloc[23]))

#%%
if False:
    #%%
    # now run predictions on the entire test dataset
    y_pred_tensor = model(x_test_tensor)
    y_pred = y_pred_tensor.detach().numpy() # for visualisation
    plt.scatter(y_pred, y_test.values)
    plt.xlabel("Actual Price")
    plt.ylabel("Predicted Price")
    plt.show()

    #%%
    # serialize the model to disk
    torch.save(model, 'my_model')
    saved_model = torch.load('my_model')
    y_pred_tensor = saved_model(x_test_tensor)
    y_pred = y_pred_tensor.detach().numpy()
    plt.figure(figsize=(15, 6))
    plt.plot(y_pred, label='Predicted Price')
    plt.plot(y_test.values, label='Actual Price')
    plt.legend()
    plt.show()


#%%