Guy
/
DQN-2018-TAU


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
93

	
94

	
95

	
96

	
97

	
98

	
99

	
100

	
101

	
102

	
103

	
104

	
105

	
106

	
            import torch.nn as nn
import torch.nn.functional as F

class DQN(nn.Module):
    def __init__(self, in_channels=4, num_actions=18):
        """
        Initialize a deep Q-learning network as described in
        https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf
        Arguments:
            in_channels: number of channel of input.
                i.e The number of most recent frames stacked together as describe in the paper
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
        self.fc4 = nn.Linear(7 * 7 * 64, 512)
        self.fc5 = nn.Linear(512, num_actions)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.fc4(x.view(x.size(0), -1)))
        return self.fc5(x)

class DQN_RAM(nn.Module):
    def __init__(self, in_features=4, num_actions=18):
        """
        Initialize a deep Q-learning network for testing algorithm
            in_features: number of features of input.
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQN_RAM, self).__init__()
        self.fc1 = nn.Linear(in_features, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, num_actions)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        return self.fc4(x)


class DQN_SEPARABLE(nn.Module):
    def __init__(self, in_channels=4, num_actions=18):
        """
        Similar architecture to DQN above, but the classic conv2d is replaced with depthwise separable convolutions:
        https://arxiv.org/abs/1704.04861

        This should yield much more efficient models, with an order of magnitude less trainable parameters and therefore
        much more efficient forward and backprop.

        Also, replaced the ReLUs with leaky ReLUs, to prevent "dead" neurons.
        """
        super(DQN_SEPARABLE, self).__init__()
        self.conv1_depth = nn.Conv2d(in_channels, in_channels, kernel_size=8, stride=4, groups=in_channels)
        self.conv1_point = nn.Conv2d(in_channels, 32, kernel_size=1)
        self.conv2_depth = nn.Conv2d(32, 32, kernel_size=4, stride=2, groups=32)
        self.conv2_point = nn.Conv2d(32, 64, kernel_size=1)
        self.conv3_depth = nn.Conv2d(64, 64, kernel_size=3, stride=1, groups=64)
        self.conv3_point = nn.Conv2d(64, 64, kernel_size=1)
        self.fc4 = nn.Linear(7 * 7 * 64, 512)
        self.fc5 = nn.Linear(512, num_actions)

    def forward(self, x):
        x = F.leaky_relu(self.conv1_point(self.conv1_depth(x)))
        x = F.leaky_relu(self.conv2_point(self.conv2_depth(x)))
        x = F.leaky_relu(self.conv3_point(self.conv3_depth(x)))
        x = F.leaky_relu(self.fc4(x.view(x.size(0), -1)))
        return self.fc5(x)


class DQN_SEPARABLE_DEEP(nn.Module):
    def __init__(self, in_channels=4, num_actions=18, num_layers=18, features=32, in_height=84, in_width=84):
        """
        Similar to DQN_SEPARABLE, but (almost) arbitrarily deep, and actually DOES have an order of magnitude less
        weights than DQN (since the final linear layer was the actual source of most of the weights, and it should be smaller here)
        """
        super(DQN_SEPARABLE_DEEP, self).__init__()
        self.num_layers = num_layers
        self.conv0_depth = nn.Conv2d(in_channels, in_channels, kernel_size=5, stride=2, groups=in_channels)
        self.conv0_point = nn.Conv2d(in_channels, features, kernel_size=1)
        for i in range(1, num_layers):
            setattr(self, 'conv{}_depth'.format(i), nn.Conv2d(features, features, kernel_size=3, groups=features))
            setattr(self, 'conv{}_point'.format(i), nn.Conv2d(features, features, kernel_size=1))

        def out_size(in_size):
            return ((in_size - 4) / 2) - (2 * (num_layers - 1))
        out_height = out_size(in_height)
        out_width = out_size(in_width)
        self.fc1 = nn.Linear(int(out_height * out_width * features), 512)
        self.fc2 = nn.Linear(512, num_actions)

    def forward(self, x):
        for i in range(self.num_layers):
            conv_depth = getattr(self, 'conv{}_depth'.format(i))
            conv_point = getattr(self, 'conv{}_point'.format(i))
            x = conv_depth(x)
            x = conv_point(x)
            x = F.leaky_relu(x)
        x = F.leaky_relu(self.fc1(x.view(x.size(0), -1)))
        return self.fc2(x)