|
1 | 1 | import json |
2 | 2 | import re |
3 | 3 | from dataclasses import dataclass |
| 4 | +from datetime import timedelta |
4 | 5 | from pathlib import Path |
5 | 6 |
|
6 | 7 | import pandas as pd |
|
15 | 16 | from rdagent.core.evolving_framework import QueriedKnowledge |
16 | 17 | from rdagent.core.experiment import FBWorkspace, Task |
17 | 18 | from rdagent.log import rdagent_logger as logger |
| 19 | +from rdagent.log.timer import RD_Agent_TIMER_wrapper |
18 | 20 | from rdagent.scenarios.data_science.test_eval import ( |
19 | 21 | MLETestEval, |
20 | 22 | NoTestEvalError, |
@@ -160,14 +162,29 @@ def evaluate( |
160 | 162 | submission_check_out, submission_ret_code = test_eval.valid(self.scen.competition, implementation) |
161 | 163 | stdout += f"\n### Submission check:\n{submission_check_out}\nIf Submission check returns a 'Submission is valid' or similar message, despite some warning messages, you should still consider the submission as valid and give a positive final decision. " |
162 | 164 |
|
| 165 | + # Whether to enable hyperparameter tuning check |
| 166 | + # 1. This is the first loop of evaluation. |
| 167 | + c1 = len(queried_knowledge.task_to_former_failed_traces[target_task.get_task_information()][0]) == 0 |
| 168 | + |
| 169 | + # 2. The current time spent on runner is less than the time limit ratio for runner timeout. |
163 | 170 | time_spent_ratio = implementation.running_info.running_time / env.conf.running_timeout_period |
164 | | - # Only enable hyperparameter tuning on the first evaluation. |
165 | | - # Avoid too much time consuming. |
166 | | - enable_hyperparameter_tuning_check = False |
167 | | - if len(queried_knowledge.task_to_former_failed_traces[target_task.get_task_information()][0]) == 0 and ( |
168 | | - time_spent_ratio < DS_RD_SETTING.time_ratio_limit_to_enable_hyperparameter_tuning |
169 | | - ): |
170 | | - enable_hyperparameter_tuning_check = True |
| 171 | + c2 = time_spent_ratio < DS_RD_SETTING.time_ratio_limit_to_enable_hyperparameter_tuning |
| 172 | + |
| 173 | + # 3. Only enable hyperparameter tuning during the merge stage if configured. |
| 174 | + # TODO: it is not restricted in merge stage now for fast implementation. |
| 175 | + timer = RD_Agent_TIMER_wrapper.timer |
| 176 | + res_time = timer.remain_time() |
| 177 | + if DS_RD_SETTING.only_enable_tuning_in_merge: |
| 178 | + c3 = res_time <= timedelta(hours=DS_RD_SETTING.merge_hours) |
| 179 | + else: |
| 180 | + c3 = True |
| 181 | + |
| 182 | + # 4. The current time spent on global is less than the time limit ratio for whole timeout. |
| 183 | + res_ratio = res_time / timer.all_duration |
| 184 | + c4 = res_ratio <= DS_RD_SETTING.res_time_ratio_limit_to_enable_hyperparameter_tuning |
| 185 | + |
| 186 | + # Only enable hyperparameter tuning check if all conditions are met |
| 187 | + enable_hyperparameter_tuning_check = c1 and c2 and c3 and c4 |
171 | 188 |
|
172 | 189 | system_prompt = T(".prompts:DSCoSTEER_eval.system").r( |
173 | 190 | scenario=self.scen.get_scenario_all_desc(eda_output=implementation.file_dict.get("EDA.md", None)), |
|
0 commit comments