1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
- import torch
- import torch.nn as nn
- import pickle
- import matplotlib.pyplot as plt
- import mlflow
- from config import *
- mlflow.set_tracking_uri('https://dagshub.com/SHENSHENZYC/next-word-prediction-with-LSTM.mlflow')
- with open(EMBEDDED_CONTEXT_TRAIN_PATH, 'rb') as f:
- X_train = pickle.load(f)
- with open(EMBEDDED_TARGET_TRAIN_PATH, 'rb') as f:
- y_train = pickle.load(f)
- # create batches for training data
- train_loader = torch.utils.data.DataLoader(dataset=list(zip(X_train, y_train)), batch_size=batch_size, shuffle=True)
- # create a LSTM model for next-word-prediction
- class NWP_LSTM(nn.Module):
- def __init__(self, input_size, hidden_size, word_vector_size, num_layers):
- super(NWP_LSTM, self).__init__()
-
- self.num_layers = num_layers
- self.hidden_size = hidden_size
- # lstm layer
- self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
- self.fc = nn.Linear(hidden_size, word_vector_size)
-
- def forward(self, x):
- h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size) # hidden state: short-term memories
- c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size) # cell state: long-term memories
-
- out, _ = self.lstm(x, (h0, c0))
- out = out[:, -1, :] # only use the output from last sequence
- out = self.fc(out)
- return out
- #with dagshub_logger(metrics_path='logs/train_metrics.csv', hparams_path='logs/train_params.yml') as logger:
- model = NWP_LSTM(input_size, hidden_size, word_vector_size, num_layers)
- # Loss and optimizer
- criterion = nn.CosineEmbeddingLoss()
- optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
- # log hyperparameters
- mlflow.start_run()
- mlflow.log_params({'model_class': 'unidirectional LSTM',
- 'model': model.parameters(),
- 'optimizer': 'adam',
- 'criterion': 'cosine embedding',
- 'epochs': num_epochs,
- 'batch size': batch_size,
- 'learning rate': learning_rate,
- 'hidden layer size': hidden_size,
- 'number of LSTM layers': num_layers,
- 'context window size': CONTEXT_WINDOW})
- # Train the model
- n_total_steps = len(train_loader)
- steps = []
- losses = []
- for epoch in range(num_epochs):
- for i, (contexts, targets) in enumerate(train_loader):
- # Forward pass
- preds = model(contexts)
- loss = criterion(preds, targets, torch.tensor([1] * batch_size))
-
- # Backward and optimize
- loss.backward()
- optimizer.step()
- optimizer.zero_grad()
-
- if (i+1) % 100 == 0:
- # print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
- steps.append(epoch + 1 + (i + 1) / n_total_steps)
- losses.append(loss.item())
- mlflow.log_metric(key='training_loss', value=loss.item(), step=epoch)
- # test loss
- with open(EMBEDDED_CONTEXT_TEST_PATH, 'rb') as f:
- X_test = torch.tensor(pickle.load(f))
- with open(EMBEDDED_TARGET_TEST_PATH, 'rb') as f:
- y_test = torch.tensor(pickle.load(f))
- test_preds = model(X_test)
- test_loss = criterion(test_preds, y_test, torch.tensor([1] * X_test.size(0)))
- mlflow.log_metric(key='test loss', value=test_loss.item())
- plt.plot(steps, losses)
- plt.xlabel('Epoches')
- plt.ylabel('Loss')
- plt.savefig('plots/train_losses_lstm.png')
- torch.save(model, LSTM_MODEL_PATH)
- mlflow.log_artifact(LSTM_MODEL_PATH)
- mlflow.end_run()
|