microsoft
diff --git a/‎rdagent/app/data_science/conf.py‎
Lines changed: 20 additions & 0 deletions b/‎rdagent/app/data_science/conf.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎rdagent/app/data_science/loop.py‎
Lines changed: 37 additions & 3 deletions b/‎rdagent/app/data_science/loop.py‎
Lines changed: 37 additions & 3 deletions
diff --git a/‎rdagent/components/coder/data_science/pipeline/eval.py‎
Lines changed: 7 additions & 0 deletions b/‎rdagent/components/coder/data_science/pipeline/eval.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎rdagent/core/proposal.py‎
Lines changed: 12 additions & 1 deletion b/‎rdagent/core/proposal.py‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎rdagent/scenarios/data_science/dev/feedback.py‎
Lines changed: 3 additions & 1 deletion b/‎rdagent/scenarios/data_science/dev/feedback.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎rdagent/scenarios/data_science/proposal/exp_gen/base.py‎
Lines changed: 20 additions & 6 deletions b/‎rdagent/scenarios/data_science/proposal/exp_gen/base.py‎
Lines changed: 20 additions & 6 deletions
diff --git a/‎rdagent/scenarios/data_science/proposal/exp_gen/ckp_select.py‎
Lines changed: 149 additions & 0 deletions b/‎rdagent/scenarios/data_science/proposal/exp_gen/ckp_select.py‎
Lines changed: 149 additions & 0 deletions
@@ -14,6 +14,9 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
     scen: str = "rdagent.scenarios.data_science.scen.KaggleScen"
     """Scenario class for data mining model"""
 
+    hypothesis_gen: str = "rdagent.scenarios.data_science.proposal.exp_gen.DSExpGen"
+    """Hypothesis generation class"""
+
     ## Workflow Related
     consecutive_errors: int = 5
 
@@ -47,6 +50,20 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
     enable_doc_dev: bool = False
     model_dump_check_level: Literal["medium", "high"] = "medium"
 
+    ### selector related
+
+    #### checkpoint selector related
+    # selector_name: str = "latest"
+    selector_name: str = "rdagent.scenarios.data_science.proposal.exp_gen.ckp_select.LatestCKPSelector"
+    """The name of the selector to use"""
+    sota_count_window: int = 5
+    """The number of trials to consider for SOTA count"""
+    sota_count_threshold: int = 1
+    """The threshold for SOTA count"""
+
+    #### SOTA experiment selector related
+    sota_exp_selector_name: str = "rdagent.scenarios.data_science.proposal.exp_gen.sota_exp_select.GlobalSOTASelector"
+    """The name of the SOTA experiment selector to use"""
     ### knowledge base
     enable_knowledge_base: bool = False
     knowledge_base_version: str = "v1"
@@ -65,5 +82,8 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
     """We'll use f"{DS_RD_SETTING.local_data_path}/{DS_RD_SETTING.eval_sub_dir}/{competition}"
     to find the scriipt to evaluate the submission on test"""
 
+    ### inject diverse
+    enable_inject_diverse: bool = False
+
 
 DS_RD_SETTING = DataScienceBasePropSetting()
@@ -32,10 +32,31 @@
 from rdagent.scenarios.data_science.dev.runner import DSCoSTEERRunner
 from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
 from rdagent.scenarios.data_science.proposal.exp_gen import DSExpGen, DSTrace
+from rdagent.scenarios.data_science.proposal.exp_gen.ckp_select import (
+    BackJumpCKPSelector,
+    LatestCKPSelector,
+    SOTAJumpCKPSelector,
+)
 from rdagent.scenarios.data_science.proposal.exp_gen.idea_pool import DSKnowledgeBase
-from rdagent.scenarios.data_science.proposal.exp_gen.select import LatestCKPSelector
+from rdagent.scenarios.data_science.proposal.exp_gen.sota_exp_select import (
+    AutoSOTAexpSelector,
+    BestValidSelector,
+    GlobalSOTASelector,
+)
 from rdagent.scenarios.kaggle.kaggle_crawler import download_data
 
+CKP_SELECTOR_NAME_MAP = {
+    "latest": LatestCKPSelector,
+    "sota_jump": SOTAJumpCKPSelector,
+    "back_jump": BackJumpCKPSelector,
+}
+
+SOTA_EXP_SELECTOR_NAME_MAP = {
+    "global_sota": GlobalSOTASelector,
+    "auto_sota": AutoSOTAexpSelector,
+    "best_valid_sota": BestValidSelector,
+}
+
 
 class DataScienceRDLoop(RDLoop):
     skip_loop_error = (CoderError, RunnerError)
@@ -49,8 +70,15 @@ def __init__(self, PROP_SETTING: BasePropSetting):
 
         # 2) task generation from a complete solution
         # self.exp_gen: ExpGen = import_class(PROP_SETTING.exp_gen)(scen)
-        self.ckp_selector = LatestCKPSelector()
-        self.exp_gen = DSExpGen(scen)
+
+        # self.ckp_selector = CKP_SELECTOR_NAME_MAP[DS_RD_SETTING.selector_name]()
+        # self.sota_exp_selector = SOTA_EXP_SELECTOR_NAME_MAP[DS_RD_SETTING.sota_exp_selector_name]()
+        self.ckp_selector = import_class(PROP_SETTING.selector_name)()
+        self.sota_exp_selector = import_class(PROP_SETTING.sota_exp_selector_name)()
+
+        self.exp_gen = import_class(PROP_SETTING.hypothesis_gen)(scen)
+
+        # coders
         self.data_loader_coder = DataLoaderCoSTEER(scen)
         self.feature_coder = FeatureCoSTEER(scen)
         self.model_coder = ModelCoSTEER(scen)
@@ -76,6 +104,12 @@ def __init__(self, PROP_SETTING: BasePropSetting):
         super(RDLoop, self).__init__()
 
     def direct_exp_gen(self, prev_out: dict[str, Any]):
+
+        # set the SOTA experiment to submit
+        sota_exp_to_submit = self.sota_exp_selector.get_sota_exp_to_submit(self.trace)
+        self.trace.set_sota_exp_to_submit(sota_exp_to_submit)
+
+        # set the checkpoint to start from
         selection = self.ckp_selector.get_selection(self.trace)
         exp = self.exp_gen.gen(self.trace, selection)
         logger.log_object(exp)
 
@@ -127,6 +127,13 @@ def evaluate(
 
         eda_output = implementation.file_dict.get("EDA.md", None)
 
+        eda_output = implementation.file_dict.get("EDA.md", None)
+
+        if not isinstance(implementation, FBWorkspace):
+            eda_output = None
+        else:
+            eda_output = implementation.file_dict.get("EDA.md", None)
+
         system_prompt = T(".prompts:pipeline_eval.system").r(
             scenario=self.scen.get_scenario_all_desc(eda_output=eda_output),
             task_desc=target_task.get_task_information(),
 
@@ -149,11 +149,22 @@ def get_selection(self, trace: Trace) -> tuple[int, ...] | None:
         - `(idx, )` represents starting from the `idx`-th trial in the trace.
         - `None` represents starting from scratch (start a new trace)
 
-
         - More advanced selection strategies in `select.py`
         """
 
 
+class SOTAexpSelector:
+    """
+    Select the SOTA experiment from the trace to submit
+    """
+
+    @abstractmethod
+    def get_sota_exp_to_submit(self, trace: Trace) -> Experiment | None:
+        """
+        Select the SOTA experiment from the trace to submit
+        """
+
+
 class ExpGen(ABC):
 
     def __init__(self, scen: Scenario) -> None:
 
@@ -31,9 +31,11 @@ def generate_feedback(self, exp: DSExperiment, trace: DSTrace) -> ExperimentFeed
             exp=sota_exp, heading="SOTA of previous exploration of the scenario"
         )
 
+        last_exp = trace.last_exp()
+
         # Get feedback description using shared template
         feedback_desc = T("scenarios.data_science.share:describe.feedback").r(
-            exp_and_feedback=(trace.hist[-1] if trace.hist else None), heading="Previous Trial Feedback"
+            exp_and_feedback=trace.hist[-1] if trace.hist else None, heading="Previous Trial Feedback"
         )
 
         # TODO:
 
@@ -61,10 +61,17 @@ def __init__(self, scen: DataScienceScen, knowledge_base: KnowledgeBase | None =
 
         self.knowledge_base = knowledge_base
 
+        self.sub_trace_count: int = 0
+
         self.current_selection: tuple[int, ...] = (-1,)
 
+        self.sota_exp_to_submit: DSExperiment | None = None  # grab the global best exp to submit
+
     COMPLETE_ORDER = ("DataLoadSpec", "FeatureEng", "Model", "Ensemble", "Workflow")
 
+    def set_sota_exp_to_submit(self, exp: DSExperiment) -> None:
+        self.sota_exp_to_submit = exp
+
     def get_current_selection(self) -> tuple[int, ...]:
         return self.current_selection
 
@@ -127,15 +134,22 @@ def retrieve_search_list(
         list[tuple[DSExperiment, ExperimentFeedback]]
             The search list.
         """
+        if search_type == "all":
+            return self.hist
 
-        if selection is None:
-            selection = self.get_current_selection()
+        elif search_type == "ancestors":
 
-        if selection is None:
-            # selection is None, which means we switch to a new trace, which is not implemented yet
-            return []
+            if selection is None:
+                selection = self.get_current_selection()
 
-        return self.collect_all_ancestors(selection) if search_type == "ancestors" else self.hist
+            if len(selection) == 0:
+                # selection is (), which means we switch to a new trace
+                return []
+
+            return self.collect_all_ancestors(selection)
+
+        else:
+            raise ValueError(f"Invalid search type: {search_type}")
 
     def collect_all_ancestors(
         self,
 
@@ -0,0 +1,149 @@
+import random
+
+from rdagent.app.data_science.conf import DS_RD_SETTING
+from rdagent.core.proposal import CheckpointSelector, Trace
+from rdagent.log import rdagent_logger as logger
+
+# # TODO: more advanced selector
+# # TODO/Discussion: load selector function here or define selector class in `proposal.py`?
+
+
+class LatestCKPSelector(CheckpointSelector):
+    """
+    -`(-1, )` represents starting from the latest trial in the trace
+    """
+
+    def __init__(
+        self,
+    ):
+        logger.info(f"Using latest selector by default")
+
+    def get_selection(self, trace: Trace) -> tuple[int, ...]:
+
+        return (-1,)
+
+
+class SOTAJumpCKPSelector(CheckpointSelector):
+    """
+    SOTA jump policy:
+    if the cumulative SOTA in a window is below a threshold, jump to a new trial
+    otherwise, continue the current latest trial
+    """
+
+    def __init__(
+        self,
+    ) -> None:
+        self.SOTA_COUNT_WINDOW = DS_RD_SETTING.sota_count_window
+        self.SOTA_COUNT_THRESHOLD = DS_RD_SETTING.sota_count_threshold
+
+        logger.info(
+            f"Using SOTA-jump selector with window {self.SOTA_COUNT_WINDOW} and threshold {self.SOTA_COUNT_THRESHOLD}"
+        )
+
+    def get_selection(self, trace: Trace) -> tuple[int, ...]:
+
+        current_trace = trace.retrieve_search_list(search_type="ancestors")
+        if len(trace.hist) > 0 and len(current_trace) > self.SOTA_COUNT_WINDOW:
+            all_exp_list = trace.experiment_and_feedback_list_after_init(return_type="all", search_type="ancestors")
+            # sota_exp_list = trace.experiment_and_feedback_list_after_init(return_type="sota", search_type="ancestors")
+            exp_list_in_window = all_exp_list[-self.SOTA_COUNT_WINDOW :]
+
+            # compute the cumulative SOTA ratio in the window
+            sota_count = 0
+            for exp, fb in exp_list_in_window:
+                if fb.decision:
+                    sota_count += 1
+            if sota_count < self.SOTA_COUNT_THRESHOLD:
+                trace.sub_trace_count += 1
+                logger.info(
+                    f"SOTA count {sota_count} is below threshold {self.SOTA_COUNT_THRESHOLD}, jump to a new sub-trace"
+                )
+                logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+                return ()
+            else:
+                logger.info(
+                    f"SOTA count {sota_count} is above threshold {self.SOTA_COUNT_THRESHOLD}, continue the current latest trial"
+                )
+                logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+                return (-1,)
+
+        else:
+            logger.info(f"Not enough history to make a decision, continue the current latest trial")
+            return (-1,)
+
+
+class BackJumpCKPSelector(CheckpointSelector):
+    """
+    back-jump policy:
+    if the cumulative SOTA in a window is below a threshold,
+    with 50% probability, reboot a new sub-trace
+    with 50% probability, jump back to the "last second" SOTA trial (we assume the lastest SOTA trial is not good enough selection)
+    """
+
+    def __init__(
+        self,
+    ) -> None:
+        self.SOTA_COUNT_WINDOW = DS_RD_SETTING.sota_count_window
+        self.SOTA_COUNT_THRESHOLD = DS_RD_SETTING.sota_count_threshold
+
+        logger.info(
+            f"Using back-jump selector with window {self.SOTA_COUNT_WINDOW} and threshold {self.SOTA_COUNT_THRESHOLD}"
+        )
+
+    def get_selection(self, trace: Trace) -> tuple[int, ...]:
+        current_trace = trace.retrieve_search_list(search_type="ancestors")
+
+        if len(trace.hist) > 0 and len(current_trace) > self.SOTA_COUNT_WINDOW:
+
+            all_exp_list = trace.experiment_and_feedback_list_after_init(return_type="all", search_type="ancestors")
+            # sota_exp_list = trace.experiment_and_feedback_list_after_init(return_type="sota", search_type="ancestors")
+            exp_list_in_window = all_exp_list[-self.SOTA_COUNT_WINDOW :]
+
+            # compute the cumulative SOTA ratio in the window
+            sota_count = 0
+            for exp, fb in exp_list_in_window:
+                if fb.decision:
+                    sota_count += 1
+
+            if sota_count < self.SOTA_COUNT_THRESHOLD:
+
+                random_choice = random.random()
+                if random_choice < 0.5:
+                    trace.sub_trace_count += 1
+                    logger.info(
+                        f"SOTA count {sota_count} is below threshold {self.SOTA_COUNT_THRESHOLD}, jump a new sub-trace"
+                    )
+                    return ()  # reboot a new sub-trace
+                else:
+                    logger.info(
+                        f"SOTA count {sota_count} is below threshold {self.SOTA_COUNT_THRESHOLD}, jump back to the last second SOTA in hist (may not in current sub-trace)"
+                    )
+                    sota_exp_list = trace.experiment_and_feedback_list_after_init(return_type="sota", search_type="all")
+                    if len(sota_exp_list) > 1:
+                        last_second_sota_idx = trace.hist.index(sota_exp_list[-2])
+                        logger.info(
+                            f"jump back to the last second SOTA in hist (may not in current sub-trace), index: {last_second_sota_idx}"
+                        )
+                        logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+                        return (last_second_sota_idx,)
+                    else:
+                        trace.sub_trace_count += 1
+                        logger.info(
+                            f"SOTA count {sota_count} is below threshold {self.SOTA_COUNT_THRESHOLD}, jump a new sub-trace"
+                        )
+                        logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+                        return ()  # reboot a new sub-trace
+
+            else:
+                logger.info(
+                    f"SOTA count {sota_count} is above threshold {self.SOTA_COUNT_THRESHOLD}, continue the current latest trial"
+                )
+                logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+                return (-1,)
+        else:
+            logger.info(f"Not enough history to make a decision, continue the current latest trial")
+            logger.info(f"current sub-trace count: {trace.sub_trace_count}")
+            return (-1,)
+
+
+# TODO: implement these selectors and more
Original file line number	Diff line number	Diff line change
`@@ -31,9 +31,11 @@ def generate_feedback(self, exp: DSExperiment, trace: DSTrace) -> ExperimentFeed`
`31`	`31`	`exp=sota_exp, heading="SOTA of previous exploration of the scenario"`
`32`	`32`	`)`
`33`	`33`
	`34`	`+ last_exp = trace.last_exp()`
	`35`	`+`
`34`	`36`	`# Get feedback description using shared template`
`35`	`37`	`feedback_desc = T("scenarios.data_science.share:describe.feedback").r(`
`36`		`- exp_and_feedback=(trace.hist[-1] if trace.hist else None), heading="Previous Trial Feedback"`
	`38`	`+ exp_and_feedback=trace.hist[-1] if trace.hist else None, heading="Previous Trial Feedback"`
`37`	`39`	`)`
`38`	`40`
`39`	`41`	`# TODO:`