microsoft
diff --git a/‎rdagent/app/kaggle/conf.py‎
Lines changed: 4 additions & 4 deletions b/‎rdagent/app/kaggle/conf.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎rdagent/app/kaggle/model.py‎ ‎rdagent/app/kaggle/loop.py‎rdagent/app/kaggle/model.py renamed to rdagent/app/kaggle/loop.py
Lines changed: 5 additions & 0 deletions b/‎rdagent/app/kaggle/model.py‎ ‎rdagent/app/kaggle/loop.py‎rdagent/app/kaggle/model.py renamed to rdagent/app/kaggle/loop.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎rdagent/components/coder/model_coder/model.py‎
Lines changed: 13 additions & 9 deletions b/‎rdagent/components/coder/model_coder/model.py‎
Lines changed: 13 additions & 9 deletions
diff --git a/‎rdagent/components/proposal/model_proposal.py‎
Lines changed: 8 additions & 12 deletions b/‎rdagent/components/proposal/model_proposal.py‎
Lines changed: 8 additions & 12 deletions
diff --git a/‎rdagent/components/proposal/prompts.yaml‎
Lines changed: 9 additions & 5 deletions b/‎rdagent/components/proposal/prompts.yaml‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎rdagent/scenarios/data_mining/proposal/model_proposal.py‎
Lines changed: 9 additions & 1 deletion b/‎rdagent/scenarios/data_mining/proposal/model_proposal.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎rdagent/scenarios/kaggle/developer/feedback.py‎
Lines changed: 1 addition & 1 deletion b/‎rdagent/scenarios/kaggle/developer/feedback.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rdagent/scenarios/kaggle/experiment/meta_tpl/model_rf.py‎
Lines changed: 8 additions & 5 deletions b/‎rdagent/scenarios/kaggle/experiment/meta_tpl/model_rf.py‎
Lines changed: 8 additions & 5 deletions
@@ -13,13 +13,13 @@ class Config:
         """Add 'model_' to the protected namespaces"""
 
     # 1) overriding the default
-    scen: str = "rdagent.scenarios.kaggle.experiment.model_experiment.KGModelScenario"
+    scen: str = "rdagent.scenarios.kaggle.experiment.scenario.KGModelScenario"
     """Scenario class for data mining model"""
 
-    hypothesis_gen: str = "rdagent.scenarios.kaggle.proposal.model_proposal.KGModelHypothesisGen"
+    hypothesis_gen: str = "rdagent.scenarios.kaggle.proposal.proposal.KGHypothesisGen"
     """Hypothesis generation class"""
 
-    hypothesis2experiment: str = "rdagent.scenarios.kaggle.proposal.model_proposal.KGModelHypothesis2Experiment"
+    hypothesis2experiment: str = "rdagent.scenarios.kaggle.proposal.proposal.KGHypothesis2Experiment"
     """Hypothesis to experiment class"""
 
     coder: str = "rdagent.scenarios.kaggle.developer.model_coder.KGModelCoSTEER"
@@ -28,7 +28,7 @@ class Config:
     runner: str = "rdagent.scenarios.kaggle.developer.model_runner.KGModelRunner"
     """Runner class"""
 
-    summarizer: str = "rdagent.scenarios.kaggle.developer.feedback.KGModelHypothesisExperiment2Feedback"
+    summarizer: str = "rdagent.scenarios.kaggle.developer.feedback.KGHypothesisExperiment2Feedback"
     """Summarizer class"""
 
     evolving_n: int = 10
 
@@ -5,13 +5,15 @@
 from rdagent.app.kaggle.conf import PROP_SETTING
 from rdagent.components.workflow.conf import BasePropSetting
 from rdagent.components.workflow.rd_loop import RDLoop
+from rdagent.core.developer import Developer
 from rdagent.core.exception import ModelEmptyError
 from rdagent.core.proposal import (
     Hypothesis2Experiment,
     HypothesisExperiment2Feedback,
     HypothesisGen,
     Trace,
 )
+from rdagent.core.scenario import Scenario
 from rdagent.core.utils import import_class
 from rdagent.log import rdagent_logger as logger
 
@@ -62,4 +64,7 @@ def main(path=None, step_n=None, competition=None):
 
 
 if __name__ == "__main__":
+    from dotenv import load_dotenv
+
+    load_dotenv(override=True)
     fire.Fire(main)
@@ -22,10 +22,10 @@ def __init__(
         self,
         name: str,
         description: str,
-        formulation: str,
         architecture: str,
-        variables: Dict[str, str],
         hyperparameters: Dict[str, str],
+        formulation: str=None,
+        variables: Dict[str, str] = None,
         model_type: Optional[str] = None,
     ) -> None:
         self.name: str = name
@@ -34,17 +34,20 @@ def __init__(
         self.architecture: str = architecture
         self.variables: str = variables
         self.hyperparameters: str = hyperparameters
-        self.model_type: str = model_type  # Tabular for tabular model, TimesSeries for time series model, Graph for graph model, XGBoost for XGBoost model
+        self.model_type: str = (
+            model_type  # Tabular for tabular model, TimesSeries for time series model, Graph for graph model, XGBoost for XGBoost model
+        )
 
     def get_task_information(self):
-        return f"""name: {self.name}
+        task_desc = f"""name: {self.name}
 description: {self.description}
-formulation: {self.formulation}
-architecture: {self.architecture}
-variables: {self.variables}
-hyperparameters: {self.hyperparameters}
-model_type: {self.model_type}
 """
+        task_desc += f"formulation: {self.formulation}\n" if self.formulation else ""
+        task_desc += f"architecture: {self.architecture}\n"
+        task_desc += f"variables: {self.variables}\n" if self.variables else ""
+        task_desc += f"hyperparameters: {self.hyperparameters}\n"
+        task_desc += f"model_type: {self.model_type}\n"
+        return task_desc
 
     @staticmethod
     def from_dict(dict):
@@ -161,4 +164,5 @@ def execute(
         return execution_feedback_str, execution_model_output
 
 
+FeatureExperiment = Experiment
 ModelExperiment = Experiment
@@ -25,12 +25,10 @@ class ModelHypothesisGen(HypothesisGen):
 
     # The following methods are scenario related so they should be implemented in the subclass
     @abstractmethod
-    def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:
-        ...
+    def prepare_context(self, trace: Trace) -> Tuple[dict, bool]: ...
 
     @abstractmethod
-    def convert_response(self, response: str) -> ModelHypothesis:
-        ...
+    def convert_response(self, response: str) -> ModelHypothesis: ...
 
     def gen(self, trace: Trace) -> ModelHypothesis:
         context_dict, json_flag = self.prepare_context(trace)
@@ -39,7 +37,7 @@ def gen(self, trace: Trace) -> ModelHypothesis:
             Environment(undefined=StrictUndefined)
             .from_string(ModelHypothesisGen.prompts["hypothesis_gen"]["system_prompt"])
             .render(
-                targets="model",
+                targets="feature engineering and model building",
                 scenario=self.scen.get_scenario_all_desc(),
                 hypothesis_output_format=context_dict["hypothesis_output_format"],
                 hypothesis_specification=context_dict["hypothesis_specification"],
@@ -49,7 +47,7 @@ def gen(self, trace: Trace) -> ModelHypothesis:
             Environment(undefined=StrictUndefined)
             .from_string(ModelHypothesisGen.prompts["hypothesis_gen"]["user_prompt"])
             .render(
-                targets="model",
+                targets="feature engineering and model building",
                 hypothesis_and_feedback=context_dict["hypothesis_and_feedback"],
                 RAG=context_dict["RAG"],
             )
@@ -69,20 +67,18 @@ def __init__(self) -> None:
         super().__init__()
 
     @abstractmethod
-    def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict, bool]:
-        ...
+    def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict, bool]: ...
 
     @abstractmethod
-    def convert_response(self, response: str, trace: Trace) -> ModelExperiment:
-        ...
+    def convert_response(self, response: str, trace: Trace) -> ModelExperiment: ...
 
     def convert(self, hypothesis: Hypothesis, trace: Trace) -> ModelExperiment:
         context, json_flag = self.prepare_context(hypothesis, trace)
         system_prompt = (
             Environment(undefined=StrictUndefined)
             .from_string(ModelHypothesis2Experiment.prompts["hypothesis2experiment"]["system_prompt"])
             .render(
-                targets="model",
+                targets="feature engineering and model building",
                 scenario=trace.scen.get_scenario_all_desc(),
                 experiment_output_format=context["experiment_output_format"],
             )
@@ -91,7 +87,7 @@ def convert(self, hypothesis: Hypothesis, trace: Trace) -> ModelExperiment:
             Environment(undefined=StrictUndefined)
             .from_string(ModelHypothesis2Experiment.prompts["hypothesis2experiment"]["user_prompt"])
             .render(
-                targets="model",
+                targets="feature engineering and model building",
                 target_hypothesis=context["target_hypothesis"],
                 hypothesis_and_feedback=context["hypothesis_and_feedback"],
                 target_list=context["target_list"],
 
@@ -4,18 +4,22 @@ hypothesis_gen:
     The {{targets}} are used in a certain scenario, the scenario is as follows:
     {{ scenario }}
     The user has made several hypothesis on this scenario and did several evaluation on them. The user will provide this information to you. Check if a new hypothesis has already been proposed. If it is already generated and you agree with it, just use it. If you don't agree, generate a better one.
+    {% if hypothesis_specification %}
     To help you generate new hypothesis, the user has prepared some additional information for you. You should use this information to help generate new {{targets}}.
+    Here are the specifications: {{ hypothesis_specification }}
+    {% endif %}
     Please generate the output following the format and specifications below:
     {{ hypothesis_output_format }}
-    Here are the specifications: {{ hypothesis_specification }}
 
   user_prompt: |-
-    If it is not the first round, then the user has made several hypothesis on this scenario and did several evaluation on them.
+    {% if hypothesis_and_feedback|length == 0 %}    It is the first round of hypothesis generation. The user has no hypothesis on this scenario yet.
+    {% else %}It is not the first round, the user has made several hypothesis on this scenario and did several evaluation on them.
     The former hypothesis and the corresponding feedbacks are as follows (focus on the last one & the new hypothesis that it provides and reasoning to see if you agree):
     {{ hypothesis_and_feedback }}
-    To help you generate new {{targets}}, we have prepared the following information for you:
-    {{ RAG }}
-    Please generate the new hypothesis based on the information above. Also generate the relevant keys for the reasoning and the distilled knowledge that follows. For those keys, in particular for knowledge, explain in the context of the specific scenario to build up domain knowledge in the specific field rather than genearl knowledge.
+    {% endif %}
+    {% if RAG %}To help you generate new {{targets}}, we have prepared the following information for you:
+    {{ RAG }}{% endif %}
+    Please generate the new hypothesis based on the information above. Also generate the relevant keys for the reasoning and the distilled knowledge that follows. For those keys, in particular for knowledge, explain in the context of the specific scenario to build up domain knowledge in the specific field rather than general knowledge.
 
 hypothesis2experiment:
   system_prompt: |-
 
@@ -98,7 +98,15 @@ def convert_response(self, response: str, trace: Trace) -> ModelExperiment:
             hyperparameters = response_dict[model_name]["hyperparameters"]
             model_type = response_dict[model_name]["model_type"]
             tasks.append(
-                ModelTask(model_name, description, formulation, architecture, variables, hyperparameters, model_type)
+                ModelTask(
+                    name=model_name,
+                    description=description,
+                    formulation=formulation,
+                    architecture=architecture,
+                    variables=variables,
+                    hyperparameters=hyperparameters,
+                    model_type=model_type,
+                )
             )
         exp = DMModelExperiment(tasks)
         exp.based_experiments = [t[1] for t in trace.hist if t[2]]
 
@@ -19,7 +19,7 @@
 DIRNAME = Path(__file__).absolute().resolve().parent
 
 
-class KGModelHypothesisExperiment2Feedback(HypothesisExperiment2Feedback):
+class KGHypothesisExperiment2Feedback(HypothesisExperiment2Feedback):
     """Generated feedbacks on the hypothesis from **Executed** Implementations of different tasks & their comparisons with previous performances"""
 
     def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback:
 
@@ -9,19 +9,21 @@
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import accuracy_score
 
-def select(X):
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
     """
     Select relevant features. To be used in fit & predict function.
     """
     # For now, we assume all features are relevant. This can be expanded to feature selection logic.
     return X
 
+
 def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame, y_valid: pd.Series):
     """
     Define and train the Random Forest model. Merge feature selection into the pipeline.
     """
     # Initialize the Random Forest model
-    model = RandomForestClassifier(n_estimators=100, random_state=32)  
+    model = RandomForestClassifier(n_estimators=100, random_state=32)
 
     # Select features (if any feature selection is needed)
     X_train_selected = select(X_train)
@@ -37,15 +39,16 @@ def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame, y_vali
 
     return model
 
+
 def predict(model, X):
     """
     Keep feature selection's consistency and make predictions.
     """
     # Select features (if any feature selection is needed)
     X_selected = select(X)
-    
+
     # Predict using the trained model
     y_pred_prob = model.predict_proba(X_selected)[:, 1]
-    
+
     # Apply threshold to get boolean predictions
-    return y_pred_prob > 0.5
+    return y_pred_prob > 0.5