feat: multi-trace online merge (#886)

xuangu-fang · you-n-g · web-flow · commit 2112d676d093 · 2025-05-19T17:59:42.000+08:00
* prompt: highlight overfitting rist in AutoSOTAexpSelector * set online merge time in conf * online multi-trace merge with time-limit policy * fix typo * feat: allow soft-knowledge-base + multi_trace * fix: improve file tree and _walk symlink handling (#877) * refactor: improve file tree and _walk symlink handling * remove unused code * lint * prompt: highlight overfitting rist in AutoSOTAexpSelector * set online merge time in conf * online multi-trace merge with time-limit policy * fix typo * feat: allow soft-knowledge-base + multi_trace * auto-lint * put the multi-trace related config together --------- Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
diff --git a/rdagent/app/data_science/conf.py b/rdagent/app/data_science/conf.py
@@ -51,20 +51,6 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
     enable_doc_dev: bool = False
     model_dump_check_level: Literal["medium", "high"] = "medium"
 
-    ### selector related
-
-    #### checkpoint selector related
-    # selector_name: str = "latest"
-    selector_name: str = "rdagent.scenarios.data_science.proposal.exp_gen.ckp_select.LatestCKPSelector"
-    """The name of the selector to use"""
-    sota_count_window: int = 5
-    """The number of trials to consider for SOTA count"""
-    sota_count_threshold: int = 1
-    """The threshold for SOTA count"""
-
-    #### SOTA experiment selector related
-    sota_exp_selector_name: str = "rdagent.scenarios.data_science.proposal.exp_gen.sota_exp_select.GlobalSOTASelector"
-    """The name of the SOTA experiment selector to use"""
     ### knowledge base
     enable_knowledge_base: bool = False
     knowledge_base_version: str = "v1"
@@ -83,8 +69,34 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
     """We'll use f"{DS_RD_SETTING.local_data_path}/{DS_RD_SETTING.eval_sub_dir}/{competition}"
     to find the scriipt to evaluate the submission on test"""
 
-    ### inject diverse
+    """---below are the settings for multi-trace---"""
+
+    ### multi-trace related
+    max_trace_num: int = 3
+    """The maximum number of traces to grow before merging"""
+
+    #### multi-trace:checkpoint selector
+    selector_name: str = "rdagent.scenarios.data_science.proposal.exp_gen.ckp_select.LatestCKPSelector"
+    """The name of the selector to use"""
+    sota_count_window: int = 5
+    """The number of trials to consider for SOTA count"""
+    sota_count_threshold: int = 1
+    """The threshold for SOTA count"""
+
+    #### multi-trace: SOTA experiment selector
+    sota_exp_selector_name: str = "rdagent.scenarios.data_science.proposal.exp_gen.sota_exp_select.GlobalSOTASelector"
+    """The name of the SOTA experiment selector to use"""
+
+    ### multi-trace:inject optimals for multi-trace
+    # inject diverse when start a new sub-trace
     enable_inject_diverse: bool = False
 
+    # inject diverse at the root of the trace
+    enable_inject_knowledge_at_root: bool = False
+
+    #### multi-trace: time for final multi-trace merge
+    merge_hours: int = 2
+    """The time for merge"""
+
 
 DS_RD_SETTING = DataScienceBasePropSetting()
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/ckp_select.py b/rdagent/scenarios/data_science/proposal/exp_gen/ckp_select.py
@@ -1,8 +1,10 @@
 import random
+from datetime import datetime, timedelta
 
 from rdagent.app.data_science.conf import DS_RD_SETTING
 from rdagent.core.proposal import CheckpointSelector, Trace
 from rdagent.log import rdagent_logger as logger
+from rdagent.log.timer import RD_Agent_TIMER_wrapper, RDAgentTimer
 
 # # TODO: more advanced selector
 # # TODO/Discussion: load selector function here or define selector class in `proposal.py`?
@@ -23,6 +25,80 @@ def get_selection(self, trace: Trace) -> tuple[int, ...]:
         return (-1,)
 
 
+class LimitTimeCKPSelector(CheckpointSelector):
+    """
+    recore the time of current sub-trace, and jump to a new sub-trace if the time is up
+    """
+
+    def __init__(
+        self,
+    ):
+        self.global_timer: RDAgentTimer = RD_Agent_TIMER_wrapper.timer
+        self.sub_trace_start_times = {}
+        self.MAX_TRACE_NUM = DS_RD_SETTING.max_trace_num
+        self.time_limit_pre_trace = None
+
+    def set_time_limit(self):
+
+        # Calculate total time excluding merge hours
+        remaining_time = (
+            self.global_timer.all_duration.total_seconds() - timedelta(hours=DS_RD_SETTING.merge_hours).total_seconds()
+        )
+        # Convert to timedelta after division
+        self.time_limit_pre_trace = timedelta(seconds=remaining_time / DS_RD_SETTING.max_trace_num)
+        # Track when each sub-trace starts
+        logger.info(f"Using limit time selector with time limit {self.time_limit_pre_trace} per trace")
+
+    def get_selection(self, trace: Trace) -> tuple[int, ...]:
+        """
+        Determine whether to continue with the current sub-trace or start a new one
+        based on the time spent in the current sub-trace.
+
+        Returns:
+            (-1,): Continue with the current latest trial
+            (): Start a new sub-trace if max trace limit not reached
+        """
+
+        if self.time_limit_pre_trace is None:
+            self.set_time_limit()
+
+        current_time = datetime.now()
+
+        if len(trace.hist) == 0:
+            trace.sub_trace_count = 0
+            self.sub_trace_start_times[trace.sub_trace_count] = current_time
+            logger.info(f"Starting initial sub-trace {trace.sub_trace_count} at {current_time}")
+            return (-1,)  # Continue with latest trial for new sub-trace
+
+        # Calculate elapsed time for current sub-trace
+        elapsed_time = current_time - self.sub_trace_start_times[trace.sub_trace_count]
+
+        if elapsed_time < self.time_limit_pre_trace:
+            # Continue with current sub-trace
+            logger.info(
+                f"Elapsed time {elapsed_time} is below time limit {self.time_limit_pre_trace}, continue the current sub-trace"
+            )
+            logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+            return (-1,)
+        else:
+            # Check if we've reached the maximum number of traces
+            if trace.sub_trace_count + 1 >= self.MAX_TRACE_NUM:
+                logger.info(
+                    f"Reached maximum trace count ({self.MAX_TRACE_NUM}), continuing with the current sub-trace"
+                )
+                logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+                return (-1,)
+
+            # Time limit exceeded, start a new sub-trace
+            trace.sub_trace_count += 1
+            self.sub_trace_start_times[trace.sub_trace_count] = current_time
+            logger.info(
+                f"Elapsed time {elapsed_time} exceeds time limit {self.time_limit_pre_trace}, jump to a new sub-trace"
+            )
+            logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+            return tuple()  # Empty tuple signals starting a new sub-trace
+
+
 class SOTAJumpCKPSelector(CheckpointSelector):
     """
     SOTA jump policy:
@@ -35,13 +111,13 @@ def __init__(
     ) -> None:
         self.SOTA_COUNT_WINDOW = DS_RD_SETTING.sota_count_window
         self.SOTA_COUNT_THRESHOLD = DS_RD_SETTING.sota_count_threshold
+        self.MAX_TRACE_NUM = DS_RD_SETTING.max_trace_num
 
         logger.info(
             f"Using SOTA-jump selector with window {self.SOTA_COUNT_WINDOW} and threshold {self.SOTA_COUNT_THRESHOLD}"
         )
 
     def get_selection(self, trace: Trace) -> tuple[int, ...]:
-
         current_trace = trace.retrieve_search_list(search_type="ancestors")
         if len(trace.hist) > 0 and len(current_trace) > self.SOTA_COUNT_WINDOW:
             all_exp_list = trace.experiment_and_feedback_list_after_init(return_type="all", search_type="ancestors")
@@ -54,6 +130,14 @@ def get_selection(self, trace: Trace) -> tuple[int, ...]:
                 if fb.decision:
                     sota_count += 1
             if sota_count < self.SOTA_COUNT_THRESHOLD:
+                # Check if we've reached the maximum number of traces
+                if trace.sub_trace_count + 1 >= self.MAX_TRACE_NUM:
+                    logger.info(
+                        f"Reached maximum trace count ({self.MAX_TRACE_NUM}), continuing with the current sub-trace"
+                    )
+                    logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+                    return (-1,)
+
                 trace.sub_trace_count += 1
                 logger.info(
                     f"SOTA count {sota_count} is below threshold {self.SOTA_COUNT_THRESHOLD}, jump to a new sub-trace"
@@ -85,6 +169,7 @@ def __init__(
     ) -> None:
         self.SOTA_COUNT_WINDOW = DS_RD_SETTING.sota_count_window
         self.SOTA_COUNT_THRESHOLD = DS_RD_SETTING.sota_count_threshold
+        self.MAX_TRACE_NUM = DS_RD_SETTING.max_trace_num
 
         logger.info(
             f"Using back-jump selector with window {self.SOTA_COUNT_WINDOW} and threshold {self.SOTA_COUNT_THRESHOLD}"
@@ -106,6 +191,13 @@ def get_selection(self, trace: Trace) -> tuple[int, ...]:
                     sota_count += 1
 
             if sota_count < self.SOTA_COUNT_THRESHOLD:
+                # Check if we've reached the maximum number of traces before creating a new one
+                if trace.sub_trace_count + 1 >= self.MAX_TRACE_NUM:
+                    logger.info(
+                        f"Reached maximum trace count ({self.MAX_TRACE_NUM}), continuing with the current sub-trace"
+                    )
+                    logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+                    return (-1,)
 
                 random_choice = random.random()
                 if random_choice < 0.5:
@@ -127,6 +219,14 @@ def get_selection(self, trace: Trace) -> tuple[int, ...]:
                         logger.info(f"current sub-trace count: {trace.sub_trace_count}")
                         return (last_second_sota_idx,)
                     else:
+                        # Check max trace limit again before creating a new trace
+                        if trace.sub_trace_count + 1 >= self.MAX_TRACE_NUM:
+                            logger.info(
+                                f"Reached maximum trace count ({self.MAX_TRACE_NUM}), continuing with the current sub-trace"
+                            )
+                            logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+                            return (-1,)
+
                         trace.sub_trace_count += 1
                         logger.info(
                             f"SOTA count {sota_count} is below threshold {self.SOTA_COUNT_THRESHOLD}, jump a new sub-trace"
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/merge.py b/rdagent/scenarios/data_science/proposal/exp_gen/merge.py
@@ -82,6 +82,7 @@ def gen(self, trace: DSTrace, selection: tuple[int, ...] = (-1,)) -> DSExperimen
         return exp
 
 
+# dual-target version
 class ExpGen2TraceAndMerge(ExpGen):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -92,7 +93,7 @@ def gen(self, trace: DSTrace, selection: tuple[int, ...] = (-1,)) -> DSExperimen
         timer: RDAgentTimer = RD_Agent_TIMER_wrapper.timer
         logger.info(f"Remain time: {timer.remain_time_duration}")
 
-        if timer.remain_time_duration >= timedelta(hours=2):
+        if timer.remain_time_duration >= timedelta(hours=DS_RD_SETTING.merge_hours):
             leaves: list[int] = trace.get_leaves()
             if len(leaves) < 2:
                 selection = tuple()  # create new trace
@@ -111,3 +112,124 @@ def gen(self, trace: DSTrace, selection: tuple[int, ...] = (-1,)) -> DSExperimen
                 return self.exp_gen.gen(trace, selection)
             else:
                 return self.merge_exp_gen.gen(trace, selection)
+
+
+class MergeExpGen_MultiTrace(ExpGen):
+    def gen(self, trace: DSTrace, selection: tuple[int, ...] = (-1,)) -> DSExperiment:
+        # Ignore the selection argument and use all leaves instead.
+        leaves: list[int] = trace.get_leaves()
+        trace.set_current_selection(selection)  #
+
+        # assuming merging the first and sencond trace.
+        sota_exp_fb = trace.sota_experiment_fb(selection=(leaves[0],))
+        if sota_exp_fb is None:
+            sota_exp_fb = trace.hist[leaves[0]]
+
+        sota_exp_desc = T("scenarios.data_science.share:describe.exp").r(
+            exp=sota_exp_fb[0],
+            heading="Best previous exploration of the scenario",
+        )
+        sota_exp_fb_desc = T("scenarios.data_science.share:describe.feedback").r(
+            exp_and_feedback=sota_exp_fb,
+            heading="The feedback for best previous exploration",
+        )
+
+        exp_fb_desc_to_merge_list = []
+        # find the best exp to merge
+        for i in range(1, len(leaves)):
+            exp_to_merge_fb = trace.sota_experiment_fb(selection=(leaves[i],))
+            if exp_to_merge_fb is None:
+                exp_to_merge_fb = trace.hist[leaves[i]]
+
+            exp_to_merge_desc = T("scenarios.data_science.share:describe.exp").r(
+                exp=exp_to_merge_fb[0],
+                heading="A solution that to be merged into previous best solution",
+            )
+
+            success_fb_list = trace.experiment_and_feedback_list_after_init(
+                return_type="sota", search_type="ancestors", selection=(leaves[i],)
+            )
+            if len(success_fb_list) > 0:
+                exp_to_merge_fb_desc = T("scenarios.data_science.share:describe.trace").r(
+                    exp_and_feedback_list=success_fb_list,
+                    type="success",
+                    heading="Successful iterations:",
+                    success_trial_desc="These trials are the steps or changes that led to the success of the solution to be merged",
+                    pipeline=DS_RD_SETTING.coder_on_whole_pipeline,
+                )
+            else:
+                exp_to_merge_fb_desc = T("scenarios.data_science.share:describe.feedback").r(
+                    exp_and_feedback=exp_to_merge_fb,
+                    heading="The feedback for the solution to be merged",
+                )
+
+        exp_fb_desc_to_merge_list.append((exp_to_merge_desc, exp_to_merge_fb_desc))
+
+        task = PipelineTask(
+            description=T("scenarios.data_science.proposal.exp_gen.merge:multi_trace").r(
+                sota_exp_desc=sota_exp_desc,
+                sota_exp_fb_desc=sota_exp_fb_desc,
+                exp_fb_desc_to_merge_list=exp_fb_desc_to_merge_list,
+            )
+        )
+
+        exp = DSExperiment(
+            pending_tasks_list=[[task]],
+            hypothesis=DSHypothesis(
+                component="Pipeline",
+                hypothesis="Merging two different versions of solutions would get the best of both sides and result in a better solution",
+            ),
+        )
+
+        if sota_exp_fb is not None:
+            exp.experiment_workspace.inject_code_from_file_dict(sota_exp_fb[0].experiment_workspace)
+        return exp
+
+
+# multi-target version
+# allow multiple traces to grow and then merge
+class ExpGen2TraceAndMergeV2(ExpGen):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.merge_exp_gen = MergeExpGen_MultiTrace(self.scen)
+        self.exp_gen = DSExpGen(self.scen)
+        self.MAX_TRACE_NUM = DS_RD_SETTING.max_trace_num  # maximum number of traces to grow before merging
+        self.flag_start_merge = False
+
+    def gen(self, trace: DSTrace, selection: tuple[int, ...] = (-1,)) -> DSExperiment:
+        timer: RDAgentTimer = RD_Agent_TIMER_wrapper.timer
+        logger.info(f"Remain time: {timer.remain_time_duration}")
+
+        if timer.remain_time_duration >= timedelta(hours=DS_RD_SETTING.merge_hours):
+
+            if DS_RD_SETTING.enable_inject_knowledge_at_root:
+
+                if len(trace.hist) == 0:
+                    # set the knowledge base option to True for the first trace
+                    DS_RD_SETTING.enable_knowledge_base = True
+
+                else:
+                    # set the knowledge base option back to False for the other traces
+                    DS_RD_SETTING.enable_knowledge_base = False
+
+            return self.exp_gen.gen(trace, selection)
+
+        else:
+            # disable reset in merging stage
+            DS_RD_SETTING.coding_fail_reanalyze_threshold = 100000
+            DS_RD_SETTING.consecutive_errors = 100000
+
+            leaves: list[int] = trace.get_leaves()
+            if len(leaves) < 2:
+                return self.exp_gen.gen(trace, selection=(-1,))
+            else:
+
+                if not self.flag_start_merge:  # root node of the merge trace
+                    self.flag_start_merge = True
+                    selection = tuple()
+                    return self.merge_exp_gen.gen(trace, selection)
+                else:
+                    # return self.merge_exp_gen.gen(trace, selection)
+                    return self.exp_gen.gen(
+                        trace, selection=(-1,)
+                    )  # continue the last trace, to polish the merged solution
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/merge.yaml b/rdagent/scenarios/data_science/proposal/exp_gen/merge.yaml
@@ -22,3 +22,33 @@ task: |-
   {% if exp_to_merge_fb_desc %}
   {{ exp_to_merge_fb_desc }}
   {% endif %}
+
+multi_trace: |-
+  {% include "scenarios.data_science.share:scen.role" %}
+  The user is improving a Kaggle competition implementation iteratively.
+  Your task is to merge multiple solutions to create a better version (Combine the strengths of multiple solutions while discarding their weaknesses, to create a new version that is better than any of the given solutions alone). We expect the merged version to perform better than all given solutions.
+
+  You will be given:
+  1) Previous Main Solution: this is the main solution you will build on to create an improved version;
+    - Feedback to the main solutions
+  2) Solution to be merged:  multiple trials of solutions that you will combine with the previous main solution. For each solution, you will be given:
+    - Solution: the approach or method used in this solution.
+    - Successful iterations (the steps or changes that led to the success of the Solution to be merged) or feedback to the Solution to be merged.
+  
+  # Previous Main Solution
+  {{ sota_exp_desc }}
+  {{ sota_exp_fb_desc }}
+
+  # Multiple Trials of Solutions to be merged 
+  {% for exp_to_merge_desc, exp_to_merge_fb_desc in exp_fb_desc_to_merge_list %}
+  ## Trial Index: {{ loop.index }}
+
+  ### Solution Description:
+  {{ exp_to_merge_desc }}
+
+  ### Feedback to the Solution:
+  {% if exp_to_merge_fb_desc %}
+  {{ exp_to_merge_fb_desc }}
+  {% endif %}
+
+  {% endfor %}
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/prompts_selector.yaml b/rdagent/scenarios/data_science/proposal/exp_gen/prompts_selector.yaml