From 58794c713be0fcc8cac9b23fb59a86d9b6e39ee5 Mon Sep 17 00:00:00 2001 From: man with a plan Date: Wed, 4 Jun 2025 16:46:34 -0700 Subject: [PATCH 1/3] Update hyper params and set seeds --- .../reinforcement_q_learning.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py index 0ae3ea9a90c..fdcec8d8801 100644 --- a/intermediate_source/reinforcement_q_learning.py +++ b/intermediate_source/reinforcement_q_learning.py @@ -91,6 +91,16 @@ "cpu" ) +# set the seeds for reproducibility +seed = 42 +random.seed(seed) +torch.manual_seed(seed) +env.reset(seed=seed) +env.action_space.seed(seed) +env.observation_space.seed(seed) +if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + ###################################################################### # Replay Memory @@ -253,13 +263,14 @@ def forward(self, x): # EPS_DECAY controls the rate of exponential decay of epsilon, higher means a slower decay # TAU is the update rate of the target network # LR is the learning rate of the ``AdamW`` optimizer + BATCH_SIZE = 128 GAMMA = 0.99 -EPS_START = 0.9 -EPS_END = 0.05 -EPS_DECAY = 1000 +EPS_START = 1 +EPS_END = 0.01 +EPS_DECAY = 2500 TAU = 0.005 -LR = 1e-4 +LR = 5e-4 # Get number of actions from gym action space n_actions = env.action_space.n From 430f634afcc9a5524391a043ac8d93b891e0a0e6 Mon Sep 17 00:00:00 2001 From: man with a plan Date: Thu, 5 Jun 2025 10:53:09 -0700 Subject: [PATCH 2/3] Updated hyper params --- intermediate_source/reinforcement_q_learning.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py index fdcec8d8801..a0fa9f0bd74 100644 --- a/intermediate_source/reinforcement_q_learning.py +++ b/intermediate_source/reinforcement_q_learning.py @@ -266,11 +266,12 @@ def forward(self, x): BATCH_SIZE = 128 GAMMA = 0.99 -EPS_START = 1 +EPS_START = 0.9 EPS_END = 0.01 EPS_DECAY = 2500 TAU = 0.005 -LR = 5e-4 +LR = 3e-4 + # Get number of actions from gym action space n_actions = env.action_space.n From 8677557fa25c44378805f2f2d586e1e079dbcc44 Mon Sep 17 00:00:00 2001 From: man with a plan Date: Thu, 5 Jun 2025 19:25:38 -0700 Subject: [PATCH 3/3] Added commented paragraph --- .../reinforcement_q_learning.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py index a0fa9f0bd74..1e50fcb3673 100644 --- a/intermediate_source/reinforcement_q_learning.py +++ b/intermediate_source/reinforcement_q_learning.py @@ -91,15 +91,23 @@ "cpu" ) -# set the seeds for reproducibility -seed = 42 -random.seed(seed) -torch.manual_seed(seed) -env.reset(seed=seed) -env.action_space.seed(seed) -env.observation_space.seed(seed) -if torch.cuda.is_available(): - torch.cuda.manual_seed(seed) + +# To ensure reproducibility during training, you can fix the random seeds +# by uncommenting the lines below. This makes the results consistent across +# runs, which is helpful for debugging or comparing different approaches. +# +# That said, allowing randomness can be beneficial in practice, as it lets +# the model explore different training trajectories. + + +# seed = 42 +# random.seed(seed) +# torch.manual_seed(seed) +# env.reset(seed=seed) +# env.action_space.seed(seed) +# env.observation_space.seed(seed) +# if torch.cuda.is_available(): +# torch.cuda.manual_seed(seed) ######################################################################