Skip to content

Commit 6af4a5b

Browse files
Migrate to mujoco-py 1.50 (#834)
* all envs run offscreen * render works * changed mujoco-py version * Bump versions * Update version and README * Same versioning for all mujoco envs * Fix typo * Fix version * Bump version again * Revert "Fix version" This reverts commit decc577.
1 parent 0c91364 commit 6af4a5b

18 files changed

+89
-102
lines changed

README.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ OpenAI Gym
44
**OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to a standardized set of environments.
55

66
.. image:: https://travis-ci.org/openai/gym.svg?branch=master
7-
:target: https://travis-ci.org/openai/gym
7+
:target: https://travis-ci.org/openai/gym
88

99
`See What's New section below <#what-s-new>`_
1010

@@ -126,7 +126,7 @@ fake display. The easiest way to do this is by running under
126126

127127
.. code:: shell
128128
129-
xvfb-run -s "-screen 0 1400x900x24" bash
129+
xvfb-run -s "-screen 0 1400x900x24" bash
130130
131131
Installing dependencies for specific environments
132132
-------------------------------------------------
@@ -261,6 +261,10 @@ We are using `pytest <http://doc.pytest.org>`_ for tests. You can run them via:
261261
What's new
262262
==========
263263

264+
- 2018-01-24: All continuous control environments now use mujoco_py >= 1.50.
265+
Versions have been updated accordingly to -v2, e.g. HalfCheetah-v2. Performance
266+
should be similar (see https://github.com/openai/gym/pull/834) but there are likely
267+
some differences due to changes in MuJoCo.
264268
- 2017-06-16: Make env.spec into a property to fix a bug that occurs
265269
when you try to print out an unregistered Env.
266270
- 2017-05-13: BACKWARDS INCOMPATIBILITY: The Atari environments are now at

gym/envs/__init__.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -204,89 +204,89 @@
204204
# 2D
205205

206206
register(
207-
id='Reacher-v1',
207+
id='Reacher-v2',
208208
entry_point='gym.envs.mujoco:ReacherEnv',
209209
max_episode_steps=50,
210210
reward_threshold=-3.75,
211211
)
212212

213213
register(
214-
id='Pusher-v0',
214+
id='Pusher-v2',
215215
entry_point='gym.envs.mujoco:PusherEnv',
216216
max_episode_steps=100,
217217
reward_threshold=0.0,
218218
)
219219

220220
register(
221-
id='Thrower-v0',
221+
id='Thrower-v2',
222222
entry_point='gym.envs.mujoco:ThrowerEnv',
223223
max_episode_steps=100,
224224
reward_threshold=0.0,
225225
)
226226

227227
register(
228-
id='Striker-v0',
228+
id='Striker-v2',
229229
entry_point='gym.envs.mujoco:StrikerEnv',
230230
max_episode_steps=100,
231231
reward_threshold=0.0,
232232
)
233233

234234
register(
235-
id='InvertedPendulum-v1',
235+
id='InvertedPendulum-v2',
236236
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
237237
max_episode_steps=1000,
238238
reward_threshold=950.0,
239239
)
240240

241241
register(
242-
id='InvertedDoublePendulum-v1',
242+
id='InvertedDoublePendulum-v2',
243243
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
244244
max_episode_steps=1000,
245245
reward_threshold=9100.0,
246246
)
247247

248248
register(
249-
id='HalfCheetah-v1',
249+
id='HalfCheetah-v2',
250250
entry_point='gym.envs.mujoco:HalfCheetahEnv',
251251
max_episode_steps=1000,
252252
reward_threshold=4800.0,
253253
)
254254

255255
register(
256-
id='Hopper-v1',
256+
id='Hopper-v2',
257257
entry_point='gym.envs.mujoco:HopperEnv',
258258
max_episode_steps=1000,
259259
reward_threshold=3800.0,
260260
)
261261

262262
register(
263-
id='Swimmer-v1',
263+
id='Swimmer-v2',
264264
entry_point='gym.envs.mujoco:SwimmerEnv',
265265
max_episode_steps=1000,
266266
reward_threshold=360.0,
267267
)
268268

269269
register(
270-
id='Walker2d-v1',
270+
id='Walker2d-v2',
271271
max_episode_steps=1000,
272272
entry_point='gym.envs.mujoco:Walker2dEnv',
273273
)
274274

275275
register(
276-
id='Ant-v1',
276+
id='Ant-v2',
277277
entry_point='gym.envs.mujoco:AntEnv',
278278
max_episode_steps=1000,
279279
reward_threshold=6000.0,
280280
)
281281

282282
register(
283-
id='Humanoid-v1',
283+
id='Humanoid-v2',
284284
entry_point='gym.envs.mujoco:HumanoidEnv',
285285
max_episode_steps=1000,
286286
)
287287

288288
register(
289-
id='HumanoidStandup-v1',
289+
id='HumanoidStandup-v2',
290290
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
291291
max_episode_steps=1000,
292292
)

gym/envs/mujoco/ant.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def _step(self, a):
1414
forward_reward = (xposafter - xposbefore)/self.dt
1515
ctrl_cost = .5 * np.square(a).sum()
1616
contact_cost = 0.5 * 1e-3 * np.sum(
17-
np.square(np.clip(self.model.data.cfrc_ext, -1, 1)))
17+
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
1818
survive_reward = 1.0
1919
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
2020
state = self.state_vector()
@@ -30,9 +30,9 @@ def _step(self, a):
3030

3131
def _get_obs(self):
3232
return np.concatenate([
33-
self.model.data.qpos.flat[2:],
34-
self.model.data.qvel.flat,
35-
np.clip(self.model.data.cfrc_ext, -1, 1).flat,
33+
self.sim.data.qpos.flat[2:],
34+
self.sim.data.qvel.flat,
35+
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
3636
])
3737

3838
def reset_model(self):

gym/envs/mujoco/half_cheetah.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ def __init__(self):
88
utils.EzPickle.__init__(self)
99

1010
def _step(self, action):
11-
xposbefore = self.model.data.qpos[0, 0]
11+
xposbefore = self.sim.data.qpos[0]
1212
self.do_simulation(action, self.frame_skip)
13-
xposafter = self.model.data.qpos[0, 0]
13+
xposafter = self.sim.data.qpos[0]
1414
ob = self._get_obs()
1515
reward_ctrl = - 0.1 * np.square(action).sum()
1616
reward_run = (xposafter - xposbefore)/self.dt
@@ -20,8 +20,8 @@ def _step(self, action):
2020

2121
def _get_obs(self):
2222
return np.concatenate([
23-
self.model.data.qpos.flat[1:],
24-
self.model.data.qvel.flat,
23+
self.sim.data.qpos.flat[1:],
24+
self.sim.data.qvel.flat,
2525
])
2626

2727
def reset_model(self):

gym/envs/mujoco/hopper.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ def __init__(self):
88
utils.EzPickle.__init__(self)
99

1010
def _step(self, a):
11-
posbefore = self.model.data.qpos[0, 0]
11+
posbefore = self.sim.data.qpos[0]
1212
self.do_simulation(a, self.frame_skip)
13-
posafter, height, ang = self.model.data.qpos[0:3, 0]
13+
posafter, height, ang = self.sim.data.qpos[0:3]
1414
alive_bonus = 1.0
1515
reward = (posafter - posbefore) / self.dt
1616
reward += alive_bonus
@@ -23,8 +23,8 @@ def _step(self, a):
2323

2424
def _get_obs(self):
2525
return np.concatenate([
26-
self.model.data.qpos.flat[1:],
27-
np.clip(self.model.data.qvel.flat, -10, 10)
26+
self.sim.data.qpos.flat[1:],
27+
np.clip(self.sim.data.qvel.flat, -10, 10)
2828
])
2929

3030
def reset_model(self):

gym/envs/mujoco/humanoid.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
from gym.envs.mujoco import mujoco_env
33
from gym import utils
44

5-
def mass_center(model):
6-
mass = model.body_mass
7-
xpos = model.data.xipos
5+
def mass_center(model, sim):
6+
mass = np.expand_dims(model.body_mass, 1)
7+
xpos = sim.data.xipos
88
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
99

1010
class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
@@ -13,7 +13,7 @@ def __init__(self):
1313
utils.EzPickle.__init__(self)
1414

1515
def _get_obs(self):
16-
data = self.model.data
16+
data = self.sim.data
1717
return np.concatenate([data.qpos.flat[2:],
1818
data.qvel.flat,
1919
data.cinert.flat,
@@ -22,17 +22,17 @@ def _get_obs(self):
2222
data.cfrc_ext.flat])
2323

2424
def _step(self, a):
25-
pos_before = mass_center(self.model)
25+
pos_before = mass_center(self.model, self.sim)
2626
self.do_simulation(a, self.frame_skip)
27-
pos_after = mass_center(self.model)
27+
pos_after = mass_center(self.model, self.sim)
2828
alive_bonus = 5.0
29-
data = self.model.data
29+
data = self.sim.data
3030
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
3131
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
3232
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
3333
quad_impact_cost = min(quad_impact_cost, 10)
3434
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
35-
qpos = self.model.data.qpos
35+
qpos = self.sim.data.qpos
3636
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
3737
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
3838

gym/envs/mujoco/humanoidstandup.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,13 @@
22
from gym.envs.mujoco import mujoco_env
33
from gym import utils
44

5-
def mass_center(model):
6-
mass = model.body_mass
7-
xpos = model.data.xipos
8-
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
9-
105
class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
116
def __init__(self):
127
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
138
utils.EzPickle.__init__(self)
149

1510
def _get_obs(self):
16-
data = self.model.data
11+
data = self.sim.data
1712
return np.concatenate([data.qpos.flat[2:],
1813
data.qvel.flat,
1914
data.cinert.flat,
@@ -23,8 +18,8 @@ def _get_obs(self):
2318

2419
def _step(self, a):
2520
self.do_simulation(a, self.frame_skip)
26-
pos_after = self.model.data.qpos[2][0]
27-
data = self.model.data
21+
pos_after = self.sim.data.qpos[2]
22+
data = self.sim.data
2823
uph_cost = (pos_after - 0) / self.model.opt.timestep
2924

3025
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()

gym/envs/mujoco/inverted_double_pendulum.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,22 @@ def __init__(self):
1111
def _step(self, action):
1212
self.do_simulation(action, self.frame_skip)
1313
ob = self._get_obs()
14-
x, _, y = self.model.data.site_xpos[0]
14+
x, _, y = self.sim.data.site_xpos[0]
1515
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
16-
v1, v2 = self.model.data.qvel[1:3]
16+
v1, v2 = self.sim.data.qvel[1:3]
1717
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
1818
alive_bonus = 10
19-
r = (alive_bonus - dist_penalty - vel_penalty)[0]
19+
r = alive_bonus - dist_penalty - vel_penalty
2020
done = bool(y <= 1)
2121
return ob, r, done, {}
2222

2323
def _get_obs(self):
2424
return np.concatenate([
25-
self.model.data.qpos[:1], # cart x pos
26-
np.sin(self.model.data.qpos[1:]), # link angles
27-
np.cos(self.model.data.qpos[1:]),
28-
np.clip(self.model.data.qvel, -10, 10),
29-
np.clip(self.model.data.qfrc_constraint, -10, 10)
25+
self.sim.data.qpos[:1], # cart x pos
26+
np.sin(self.sim.data.qpos[1:]), # link angles
27+
np.cos(self.sim.data.qpos[1:]),
28+
np.clip(self.sim.data.qvel, -10, 10),
29+
np.clip(self.sim.data.qfrc_constraint, -10, 10)
3030
]).ravel()
3131

3232
def reset_model(self):

gym/envs/mujoco/inverted_pendulum.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def reset_model(self):
2222
return self._get_obs()
2323

2424
def _get_obs(self):
25-
return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel()
25+
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
2626

2727
def viewer_setup(self):
2828
v = self.viewer

0 commit comments

Comments
 (0)