Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

atari_wrapper.py 5.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
  1. """
  2. This file is copied/apdated from https://github.com/berkeleydeeprlcourse/homework/tree/master/hw3
  3. """
  4. import numpy as np
  5. from collections import deque
  6. import gym
  7. from gym import spaces
  8. from PIL import Image
  9. class NoopResetEnv(gym.Wrapper):
  10. def __init__(self, env=None, noop_max=30):
  11. """Sample initial states by taking random number of no-ops on reset.
  12. No-op is assumed to be action 0.
  13. """
  14. super(NoopResetEnv, self).__init__(env)
  15. self.noop_max = noop_max
  16. assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
  17. def _reset(self):
  18. """ Do no-op action for a number of steps in [1, noop_max]."""
  19. self.env.reset()
  20. noops = np.random.randint(1, self.noop_max + 1)
  21. for _ in range(noops):
  22. obs, _, _, _ = self.env.step(0)
  23. return obs
  24. class FireResetEnv(gym.Wrapper):
  25. def __init__(self, env=None):
  26. """Take action on reset for environments that are fixed until firing."""
  27. super(FireResetEnv, self).__init__(env)
  28. assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
  29. assert len(env.unwrapped.get_action_meanings()) >= 3
  30. def _reset(self):
  31. self.env.reset()
  32. obs, _, _, _ = self.env.step(1)
  33. obs, _, _, _ = self.env.step(2)
  34. return obs
  35. class EpisodicLifeEnv(gym.Wrapper):
  36. def __init__(self, env=None):
  37. """Make end-of-life == end-of-episode, but only reset on true game over.
  38. Done by DeepMind for the DQN and co. since it helps value estimation.
  39. """
  40. super(EpisodicLifeEnv, self).__init__(env)
  41. self.lives = 0
  42. self.was_real_done = True
  43. self.was_real_reset = False
  44. def _step(self, action):
  45. obs, reward, done, info = self.env.step(action)
  46. self.was_real_done = done
  47. # check current lives, make loss of life terminal,
  48. # then update lives to handle bonus lives
  49. lives = self.env.unwrapped.ale.lives()
  50. if lives < self.lives and lives > 0:
  51. # for Qbert somtimes we stay in lives == 0 condtion for a few frames
  52. # so its important to keep lives > 0, so that we only reset once
  53. # the environment advertises done.
  54. done = True
  55. self.lives = lives
  56. return obs, reward, done, info
  57. def _reset(self):
  58. """Reset only when lives are exhausted.
  59. This way all states are still reachable even though lives are episodic,
  60. and the learner need not know about any of this behind-the-scenes.
  61. """
  62. if self.was_real_done:
  63. obs = self.env.reset()
  64. self.was_real_reset = True
  65. else:
  66. # no-op step to advance from terminal/lost life state
  67. obs, _, _, _ = self.env.step(0)
  68. self.was_real_reset = False
  69. self.lives = self.env.unwrapped.ale.lives()
  70. return obs
  71. class MaxAndSkipEnv(gym.Wrapper):
  72. def __init__(self, env=None, skip=4):
  73. """Return only every `skip`-th frame"""
  74. super(MaxAndSkipEnv, self).__init__(env)
  75. # most recent raw observations (for max pooling across time steps)
  76. self._obs_buffer = deque(maxlen=2)
  77. self._skip = skip
  78. def _step(self, action):
  79. total_reward = 0.0
  80. done = None
  81. for _ in range(self._skip):
  82. obs, reward, done, info = self.env.step(action)
  83. self._obs_buffer.append(obs)
  84. total_reward += reward
  85. if done:
  86. break
  87. max_frame = np.max(np.stack(self._obs_buffer), axis=0)
  88. return max_frame, total_reward, done, info
  89. def _reset(self):
  90. """Clear past frame buffer and init. to first obs. from inner env."""
  91. self._obs_buffer.clear()
  92. obs = self.env.reset()
  93. self._obs_buffer.append(obs)
  94. return obs
  95. def _process_frame84(frame):
  96. img = np.reshape(frame, [210, 160, 3]).astype(np.float32)
  97. img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114
  98. img = Image.fromarray(img)
  99. resized_screen = img.resize((84, 110), Image.BILINEAR)
  100. resized_screen = np.array(resized_screen)
  101. x_t = resized_screen[18:102, :]
  102. x_t = np.reshape(x_t, [84, 84, 1])
  103. return x_t.astype(np.uint8)
  104. class ProcessFrame84(gym.Wrapper):
  105. def __init__(self, env=None):
  106. super(ProcessFrame84, self).__init__(env)
  107. self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1))
  108. def _step(self, action):
  109. obs, reward, done, info = self.env.step(action)
  110. return _process_frame84(obs), reward, done, info
  111. def _reset(self):
  112. return _process_frame84(self.env.reset())
  113. class ClippedRewardsWrapper(gym.Wrapper):
  114. def _step(self, action):
  115. obs, reward, done, info = self.env.step(action)
  116. return obs, np.sign(reward), done, info
  117. def wrap_deepmind_ram(env):
  118. env = EpisodicLifeEnv(env)
  119. env = NoopResetEnv(env, noop_max=30)
  120. env = MaxAndSkipEnv(env, skip=4)
  121. if 'FIRE' in env.unwrapped.get_action_meanings():
  122. env = FireResetEnv(env)
  123. env = ClippedRewardsWrapper(env)
  124. return env
  125. def wrap_deepmind(env):
  126. assert 'NoFrameskip' in env.spec.id
  127. env = EpisodicLifeEnv(env)
  128. env = NoopResetEnv(env, noop_max=30)
  129. env = MaxAndSkipEnv(env, skip=4)
  130. if 'FIRE' in env.unwrapped.get_action_meanings():
  131. env = FireResetEnv(env)
  132. env = ProcessFrame84(env)
  133. env = ClippedRewardsWrapper(env)
  134. return env
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...