From 58794c713be0fcc8cac9b23fb59a86d9b6e39ee5 Mon Sep 17 00:00:00 2001
From: man with a plan <splion360@gmail.com>
Date: Wed, 4 Jun 2025 16:46:34 -0700
Subject: [PATCH 1/3] Update hyper params and set seeds

---
 .../reinforcement_q_learning.py               | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py
index 0ae3ea9a90c..fdcec8d8801 100644
--- a/intermediate_source/reinforcement_q_learning.py
+++ b/intermediate_source/reinforcement_q_learning.py
@@ -91,6 +91,16 @@
     "cpu"
 )
 
+# set the seeds for reproducibility
+seed = 42
+random.seed(seed)
+torch.manual_seed(seed)
+env.reset(seed=seed)
+env.action_space.seed(seed)
+env.observation_space.seed(seed)
+if torch.cuda.is_available(): 
+    torch.cuda.manual_seed(seed)
+
 
 ######################################################################
 # Replay Memory
@@ -253,13 +263,14 @@ def forward(self, x):
 # EPS_DECAY controls the rate of exponential decay of epsilon, higher means a slower decay
 # TAU is the update rate of the target network
 # LR is the learning rate of the ``AdamW`` optimizer
+
 BATCH_SIZE = 128
 GAMMA = 0.99
-EPS_START = 0.9
-EPS_END = 0.05
-EPS_DECAY = 1000
+EPS_START = 1
+EPS_END = 0.01
+EPS_DECAY = 2500
 TAU = 0.005
-LR = 1e-4
+LR = 5e-4
 
 # Get number of actions from gym action space
 n_actions = env.action_space.n

From 430f634afcc9a5524391a043ac8d93b891e0a0e6 Mon Sep 17 00:00:00 2001
From: man with a plan <splion360@gmail.com>
Date: Thu, 5 Jun 2025 10:53:09 -0700
Subject: [PATCH 2/3] Updated hyper params

---
 intermediate_source/reinforcement_q_learning.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py
index fdcec8d8801..a0fa9f0bd74 100644
--- a/intermediate_source/reinforcement_q_learning.py
+++ b/intermediate_source/reinforcement_q_learning.py
@@ -266,11 +266,12 @@ def forward(self, x):
 
 BATCH_SIZE = 128
 GAMMA = 0.99
-EPS_START = 1
+EPS_START = 0.9
 EPS_END = 0.01
 EPS_DECAY = 2500
 TAU = 0.005
-LR = 5e-4
+LR = 3e-4
+
 
 # Get number of actions from gym action space
 n_actions = env.action_space.n

From 8677557fa25c44378805f2f2d586e1e079dbcc44 Mon Sep 17 00:00:00 2001
From: man with a plan <splion360@gmail.com>
Date: Thu, 5 Jun 2025 19:25:38 -0700
Subject: [PATCH 3/3] Added commented paragraph

---
 .../reinforcement_q_learning.py               | 26 ++++++++++++-------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py
index a0fa9f0bd74..1e50fcb3673 100644
--- a/intermediate_source/reinforcement_q_learning.py
+++ b/intermediate_source/reinforcement_q_learning.py
@@ -91,15 +91,23 @@
     "cpu"
 )
 
-# set the seeds for reproducibility
-seed = 42
-random.seed(seed)
-torch.manual_seed(seed)
-env.reset(seed=seed)
-env.action_space.seed(seed)
-env.observation_space.seed(seed)
-if torch.cuda.is_available(): 
-    torch.cuda.manual_seed(seed)
+
+# To ensure reproducibility during training, you can fix the random seeds
+# by uncommenting the lines below. This makes the results consistent across
+# runs, which is helpful for debugging or comparing different approaches.
+#
+# That said, allowing randomness can be beneficial in practice, as it lets
+# the model explore different training trajectories.
+
+
+# seed = 42
+# random.seed(seed)
+# torch.manual_seed(seed)
+# env.reset(seed=seed)
+# env.action_space.seed(seed)
+# env.observation_space.seed(seed)
+# if torch.cuda.is_available(): 
+#     torch.cuda.manual_seed(seed)
 
 
 ######################################################################