1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
- import torch.nn as nn
- import torch.nn.functional as F
- class DQN(nn.Module):
- def __init__(self, in_channels=4, num_actions=18):
- """
- Initialize a deep Q-learning network as described in
- https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf
- Arguments:
- in_channels: number of channel of input.
- i.e The number of most recent frames stacked together as describe in the paper
- num_actions: number of action-value to output, one-to-one correspondence to action in game.
- """
- super(DQN, self).__init__()
- self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=8, stride=4)
- self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
- self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
- self.fc4 = nn.Linear(7 * 7 * 64, 512)
- self.fc5 = nn.Linear(512, num_actions)
- def forward(self, x):
- x = F.relu(self.conv1(x))
- x = F.relu(self.conv2(x))
- x = F.relu(self.conv3(x))
- x = F.relu(self.fc4(x.view(x.size(0), -1)))
- return self.fc5(x)
- class DQN_RAM(nn.Module):
- def __init__(self, in_features=4, num_actions=18):
- """
- Initialize a deep Q-learning network for testing algorithm
- in_features: number of features of input.
- num_actions: number of action-value to output, one-to-one correspondence to action in game.
- """
- super(DQN_RAM, self).__init__()
- self.fc1 = nn.Linear(in_features, 256)
- self.fc2 = nn.Linear(256, 128)
- self.fc3 = nn.Linear(128, 64)
- self.fc4 = nn.Linear(64, num_actions)
- def forward(self, x):
- x = F.relu(self.fc1(x))
- x = F.relu(self.fc2(x))
- x = F.relu(self.fc3(x))
- return self.fc4(x)
- class DQN_SEPARABLE(nn.Module):
- def __init__(self, in_channels=4, num_actions=18):
- """
- Similar architecture to DQN above, but the classic conv2d is replaced with depthwise separable convolutions:
- https://arxiv.org/abs/1704.04861
- This should yield much more efficient models, with an order of magnitude less trainable parameters and therefore
- much more efficient forward and backprop.
- Also, replaced the ReLUs with leaky ReLUs, to prevent "dead" neurons.
- """
- super(DQN_SEPARABLE, self).__init__()
- self.conv1_depth = nn.Conv2d(in_channels, in_channels, kernel_size=8, stride=4, groups=in_channels)
- self.conv1_point = nn.Conv2d(in_channels, 32, kernel_size=1)
- self.conv2_depth = nn.Conv2d(32, 32, kernel_size=4, stride=2, groups=32)
- self.conv2_point = nn.Conv2d(32, 64, kernel_size=1)
- self.conv3_depth = nn.Conv2d(64, 64, kernel_size=3, stride=1, groups=64)
- self.conv3_point = nn.Conv2d(64, 64, kernel_size=1)
- self.fc4 = nn.Linear(7 * 7 * 64, 512)
- self.fc5 = nn.Linear(512, num_actions)
- def forward(self, x):
- x = F.leaky_relu(self.conv1_point(self.conv1_depth(x)))
- x = F.leaky_relu(self.conv2_point(self.conv2_depth(x)))
- x = F.leaky_relu(self.conv3_point(self.conv3_depth(x)))
- x = F.leaky_relu(self.fc4(x.view(x.size(0), -1)))
- return self.fc5(x)
- class DQN_SEPARABLE_DEEP(nn.Module):
- def __init__(self, in_channels=4, num_actions=18, num_layers=18, features=32, in_height=84, in_width=84):
- """
- Similar to DQN_SEPARABLE, but (almost) arbitrarily deep, and actually DOES have an order of magnitude less
- weights than DQN (since the final linear layer was the actual source of most of the weights, and it should be smaller here)
- """
- super(DQN_SEPARABLE_DEEP, self).__init__()
- self.num_layers = num_layers
- self.conv0_depth = nn.Conv2d(in_channels, in_channels, kernel_size=5, stride=2, groups=in_channels)
- self.conv0_point = nn.Conv2d(in_channels, features, kernel_size=1)
- for i in range(1, num_layers):
- setattr(self, 'conv{}_depth'.format(i), nn.Conv2d(features, features, kernel_size=3, groups=features))
- setattr(self, 'conv{}_point'.format(i), nn.Conv2d(features, features, kernel_size=1))
- def out_size(in_size):
- return ((in_size - 4) / 2) - (2 * (num_layers - 1))
- out_height = out_size(in_height)
- out_width = out_size(in_width)
- self.fc1 = nn.Linear(int(out_height * out_width * features), 512)
- self.fc2 = nn.Linear(512, num_actions)
- def forward(self, x):
- for i in range(self.num_layers):
- conv_depth = getattr(self, 'conv{}_depth'.format(i))
- conv_point = getattr(self, 'conv{}_point'.format(i))
- x = conv_depth(x)
- x = conv_point(x)
- x = F.leaky_relu(x)
- x = F.leaky_relu(self.fc1(x.view(x.size(0), -1)))
- return self.fc2(x)
|