feat: refine the logic of enabling hyperparameter tuning and add criteira (#1175)

RolandMinrui · Xu · you-n-g · web-flow · commit af071f5f45bf · 2025-08-12T21:12:54.000+08:00
* add 4 tuning criteria

* fix equation direction

* fix ci

* add comment

Co-authored-by: you-n-g &lt;you-n-g@users.noreply.github.com&gt;

---------

Co-authored-by: Xu &lt;v-xuminrui@microsoft.com&gt;
Co-authored-by: you-n-g &lt;you-n-g@users.noreply.github.com&gt;
diff --git a/rdagent/app/kaggle/conf.py b/rdagent/app/kaggle/conf.py
@@ -76,7 +76,13 @@ class KaggleBasePropSetting(ExtendedBaseSettings):
     """Enable mini-case study for experiments"""
 
     time_ratio_limit_to_enable_hyperparameter_tuning: float = 1
-    """Time ratio limit to enable hyperparameter tuning, if not change, hyperparameter tuning is always enabled in the first evolution."""
+    """Runner time ratio limit to enable hyperparameter tuning, if not change, hyperparameter tuning is always enabled in the first evolution."""
+
+    res_time_ratio_limit_to_enable_hyperparameter_tuning: float = 1
+    """Overall rest time ratio limit to enable hyperparameter tuning, if not change, hyperparameter tuning is always enabled in the first evolution."""
+
+    only_enable_tuning_in_merge: bool = False
+    """Enable hyperparameter tuning only in the merge stage"""
 
 
 KAGGLE_IMPLEMENT_SETTING = KaggleBasePropSetting()
diff --git a/rdagent/scenarios/data_science/dev/runner/eval.py b/rdagent/scenarios/data_science/dev/runner/eval.py
@@ -1,6 +1,7 @@
 import json
 import re
 from dataclasses import dataclass
+from datetime import timedelta
 from pathlib import Path
 
 import pandas as pd
@@ -15,6 +16,7 @@
 from rdagent.core.evolving_framework import QueriedKnowledge
 from rdagent.core.experiment import FBWorkspace, Task
 from rdagent.log import rdagent_logger as logger
+from rdagent.log.timer import RD_Agent_TIMER_wrapper
 from rdagent.scenarios.data_science.test_eval import (
     MLETestEval,
     NoTestEvalError,
@@ -160,14 +162,29 @@ def evaluate(
             submission_check_out, submission_ret_code = test_eval.valid(self.scen.competition, implementation)
             stdout += f"\n### Submission check:\n{submission_check_out}\nIf Submission check returns a 'Submission is valid' or similar message, despite some warning messages, you should still consider the submission as valid and give a positive final decision. "
 
+        # Whether to enable hyperparameter tuning check
+        # 1. This is the first loop of evaluation.
+        c1 = len(queried_knowledge.task_to_former_failed_traces[target_task.get_task_information()][0]) == 0
+
+        # 2. The current time spent on runner is less than the time limit ratio for runner timeout.
         time_spent_ratio = implementation.running_info.running_time / env.conf.running_timeout_period
-        # Only enable hyperparameter tuning on the first evaluation.
-        # Avoid too much time consuming.
-        enable_hyperparameter_tuning_check = False
-        if len(queried_knowledge.task_to_former_failed_traces[target_task.get_task_information()][0]) == 0 and (
-            time_spent_ratio < DS_RD_SETTING.time_ratio_limit_to_enable_hyperparameter_tuning
-        ):
-            enable_hyperparameter_tuning_check = True
+        c2 = time_spent_ratio < DS_RD_SETTING.time_ratio_limit_to_enable_hyperparameter_tuning
+
+        # 3. Only enable hyperparameter tuning during the merge stage if configured.
+        # TODO: it is not restricted in merge stage now for fast implementation.
+        timer = RD_Agent_TIMER_wrapper.timer
+        res_time = timer.remain_time()
+        if DS_RD_SETTING.only_enable_tuning_in_merge:
+            c3 = res_time <= timedelta(hours=DS_RD_SETTING.merge_hours)
+        else:
+            c3 = True
+
+        # 4. The current time spent on global is less than the time limit ratio for whole timeout.
+        res_ratio = res_time / timer.all_duration
+        c4 = res_ratio <= DS_RD_SETTING.res_time_ratio_limit_to_enable_hyperparameter_tuning
+
+        # Only enable hyperparameter tuning check if all conditions are met
+        enable_hyperparameter_tuning_check = c1 and c2 and c3 and c4
 
         system_prompt = T(".prompts:DSCoSTEER_eval.system").r(
             scenario=self.scen.get_scenario_all_desc(eda_output=implementation.file_dict.get("EDA.md", None)),