microsoft
diff --git a/‎rdagent/scenarios/kaggle/developer/feedback.py‎
Lines changed: 4 additions & 29 deletions b/‎rdagent/scenarios/kaggle/developer/feedback.py‎
Lines changed: 4 additions & 29 deletions
diff --git a/‎rdagent/scenarios/kaggle/developer/runner.py‎
Lines changed: 85 additions & 19 deletions b/‎rdagent/scenarios/kaggle/developer/runner.py‎
Lines changed: 85 additions & 19 deletions
diff --git a/‎…le/experiment/meta_tpl/model/model_rf.py‎ ‎…ent/meta_tpl/model/model_randomforest.py‎rdagent/scenarios/kaggle/experiment/meta_tpl/model/model_rf.py renamed to rdagent/scenarios/kaggle/experiment/meta_tpl/model/model_randomforest.py b/‎…le/experiment/meta_tpl/model/model_rf.py‎ ‎…ent/meta_tpl/model/model_randomforest.py‎rdagent/scenarios/kaggle/experiment/meta_tpl/model/model_rf.py renamed to rdagent/scenarios/kaggle/experiment/meta_tpl/model/model_randomforest.py
diff --git a/‎…e/experiment/meta_tpl/model/model_xgb.py‎ ‎…periment/meta_tpl/model/model_xgboost.py‎rdagent/scenarios/kaggle/experiment/meta_tpl/model/model_xgb.py renamed to rdagent/scenarios/kaggle/experiment/meta_tpl/model/model_xgboost.py b/‎…e/experiment/meta_tpl/model/model_xgb.py‎ ‎…periment/meta_tpl/model/model_xgboost.py‎rdagent/scenarios/kaggle/experiment/meta_tpl/model/model_xgb.py renamed to rdagent/scenarios/kaggle/experiment/meta_tpl/model/model_xgboost.py
diff --git a/‎…d-series-s4e8_template/model/model_rf.py‎ ‎…4e8_template/model/model_randomforest.py‎rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_rf.py renamed to rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_randomforest.py b/‎…d-series-s4e8_template/model/model_rf.py‎ ‎…4e8_template/model/model_randomforest.py‎rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_rf.py renamed to rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_randomforest.py
diff --git a/‎…-series-s4e8_template/model/model_xgb.py‎ ‎…ies-s4e8_template/model/model_xgboost.py‎rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_xgb.py renamed to rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_xgboost.py b/‎…-series-s4e8_template/model/model_xgb.py‎ ‎…ies-s4e8_template/model/model_xgboost.py‎rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_xgb.py renamed to rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_xgboost.py
diff --git a/‎rdagent/scenarios/kaggle/experiment/prompts.yaml‎
Lines changed: 0 additions & 1 deletion b/‎rdagent/scenarios/kaggle/experiment/prompts.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎rdagent/scenarios/kaggle/experiment/scenario.py‎
Lines changed: 0 additions & 1 deletion b/‎rdagent/scenarios/kaggle/experiment/scenario.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎rdagent/scenarios/kaggle/experiment/workspace.py‎
Lines changed: 1 addition & 1 deletion b/‎rdagent/scenarios/kaggle/experiment/workspace.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rdagent/scenarios/kaggle/prompts.yaml‎
Lines changed: 24 additions & 0 deletions b/‎rdagent/scenarios/kaggle/prompts.yaml‎
Lines changed: 24 additions & 0 deletions
@@ -46,32 +46,6 @@ def process_results(current_result, sota_result):
 
 
 class KGHypothesisExperiment2Feedback(HypothesisExperiment2Feedback):
-    def get_available_features(self, exp: Experiment):
-        features = []
-
-        for feature_info in exp.experiment_workspace.data_description:
-            task_info, feature_shape = feature_info
-            features.append(
-                {"name": task_info.factor_name, "description": task_info.factor_description, "shape": feature_shape}
-            )
-
-        return features
-
-    def get_model_code(self, exp: Experiment):
-        model_type = exp.sub_tasks[0].model_type if exp.sub_tasks else None
-        if model_type == "XGBoost":
-            return exp.sub_workspace_list[0].code_dict.get(
-                "model_xgb.py"
-            )  # TODO Check if we need to replace this by using RepoAnalyzer
-        elif model_type == "RandomForest":
-            return exp.sub_workspace_list[0].code_dict.get("model_rf.py")
-        elif model_type == "LightGBM":
-            return exp.sub_workspace_list[0].code_dict.get("model_lgb.py")
-        elif model_type == "NN":
-            return exp.sub_workspace_list[0].code_dict.get("model_nn.py")
-        else:
-            return None
-
     def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback:
         """
         The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM).
@@ -109,9 +83,10 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
             combined_result = process_results(current_result, current_result)  # Compare with itself
             print("Warning: No previous experiments to compare against. Using current result as baseline.")
 
-        available_features = self.get_available_features(exp)
-        # Get the appropriate model code
-        model_code = self.get_model_code(exp)
+        available_features = {
+            task_info: feature_shape for task_info, feature_shape in exp.experiment_workspace.data_description
+        }
+        model_code = exp.experiment_workspace.model_description
 
         # Generate the user prompt based on the action type
         if hypothesis.action == "Model tuning":
 
@@ -1,20 +1,27 @@
+import json
 import pickle
 import shutil
 from pathlib import Path
 
+from jinja2 import Environment, StrictUndefined
+
 from rdagent.app.kaggle.conf import KAGGLE_IMPLEMENT_SETTING
 from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS
 from rdagent.components.coder.factor_coder.factor import FactorTask
+from rdagent.components.coder.model_coder.model import ModelTask
 from rdagent.components.runner import CachedRunner
 from rdagent.components.runner.conf import RUNNER_SETTINGS
-from rdagent.core.exception import FactorEmptyError, ModelEmptyError
+from rdagent.core.exception import CoderError, FactorEmptyError, ModelEmptyError
 from rdagent.core.experiment import ASpecificExp
-from rdagent.oai.llm_utils import md5_hash
+from rdagent.core.prompts import Prompts
+from rdagent.oai.llm_utils import APIBackend, md5_hash
 from rdagent.scenarios.kaggle.experiment.kaggle_experiment import (
     KGFactorExperiment,
     KGModelExperiment,
 )
 
+prompt_dict = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")
+
 
 class KGCachedRunner(CachedRunner[ASpecificExp]):
     def build_from_SOTA(self, exp: ASpecificExp) -> None:
@@ -23,7 +30,7 @@ def build_from_SOTA(self, exp: ASpecificExp) -> None:
             exp.experiment_workspace.data_description = exp.based_experiments[-1].experiment_workspace.data_description
             exp.experiment_workspace.model_description = exp.based_experiments[
                 -1
-            ].experiment_workspace.model_description
+            ].experiment_workspace.model_description.copy()
 
     def get_cache_key(self, exp: ASpecificExp) -> str:
         codes = []
@@ -38,22 +45,19 @@ def get_cache_key(self, exp: ASpecificExp) -> str:
 class KGModelRunner(KGCachedRunner[KGModelExperiment]):
     def develop(self, exp: KGModelExperiment) -> KGModelExperiment:
         self.build_from_SOTA(exp)
-        if exp.sub_workspace_list[0].target_task.model_type == "XGBoost":
-            if exp.sub_workspace_list[0].code_dict == {}:
-                raise ModelEmptyError("No model is implemented")
-            exp.experiment_workspace.inject_code(**{"model_xgb.py": exp.sub_workspace_list[0].code_dict["model.py"]})
-        elif exp.sub_workspace_list[0].target_task.model_type == "RandomForest":
-            if exp.sub_workspace_list[0].code_dict == {}:
-                raise ModelEmptyError("No model is implemented")
-            exp.experiment_workspace.inject_code(**{"model_rf.py": exp.sub_workspace_list[0].code_dict["model.py"]})
-        elif exp.sub_workspace_list[0].target_task.model_type == "LightGBM":
-            if exp.sub_workspace_list[0].code_dict == {}:
-                raise ModelEmptyError("No model is implemented")
-            exp.experiment_workspace.inject_code(**{"model_lgb.py": exp.sub_workspace_list[0].code_dict["model.py"]})
-        elif exp.sub_workspace_list[0].target_task.model_type == "NN":
-            if exp.sub_workspace_list[0].code_dict == {}:
-                raise ModelEmptyError("No model is implemented")
-            exp.experiment_workspace.inject_code(**{"model_nn.py": exp.sub_workspace_list[0].code_dict["model.py"]})
+
+        sub_ws = exp.sub_workspace_list[0]
+        model_type = sub_ws.target_task.model_type
+
+        if sub_ws.code_dict == {}:
+            raise ModelEmptyError("No model is implemented.")
+        else:
+            model_file_name = f"model_{model_type.lower()}.py"
+            exp.experiment_workspace.inject_code(**{model_file_name: sub_ws.code_dict["model.py"]})
+
+            model_description = sub_ws.target_task.get_task_information()
+            exp.experiment_workspace.model_description[model_type] = model_description
+
         if RUNNER_SETTINGS.cache_result:
             cache_hit, result = self.get_cache_result(exp)
             if cache_hit:
@@ -72,6 +76,48 @@ def develop(self, exp: KGModelExperiment) -> KGModelExperiment:
 
 
 class KGFactorRunner(KGCachedRunner[KGFactorExperiment]):
+    def extract_model_task_from_code(self, code: str) -> str:
+        sys_prompt = (
+            Environment(undefined=StrictUndefined)
+            .from_string(prompt_dict["extract_model_task_from_code"]["system"])
+            .render()
+        )
+
+        user_prompt = (
+            Environment(undefined=StrictUndefined)
+            .from_string(prompt_dict["extract_model_task_from_code"]["user"])
+            .render(file_content=code)
+        )
+
+        model_task_description = APIBackend().build_messages_and_create_chat_completion(
+            user_prompt=user_prompt,
+            system_prompt=sys_prompt,
+            json_mode=True,
+        )
+
+        try:
+            response_json_analysis = json.loads(model_task_description)
+            task_desc = f"""name: {response_json_analysis['name']}
+        description: {response_json_analysis['description']}
+        """
+            task_desc += (
+                f"formulation: {response_json_analysis['formulation']}\n"
+                if response_json_analysis.get("formulation")
+                else ""
+            )
+            task_desc += f"architecture: {response_json_analysis['architecture']}\n"
+            task_desc += (
+                f"variables: {json.dumps(response_json_analysis['variables'], indent=4)}\n"
+                if response_json_analysis.get("variables")
+                else ""
+            )
+            task_desc += f"hyperparameters: {json.dumps(response_json_analysis['hyperparameters'], indent=4)}\n"
+            task_desc += f"model_type: {response_json_analysis['model_type']}\n"
+        except json.JSONDecodeError:
+            task_desc = "Failed to parse LLM's response as JSON"
+
+        return task_desc
+
     def init_develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
         """
         For the initial development, the experiment serves as a benchmark for feature engineering.
@@ -100,6 +146,22 @@ def init_develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
         feature_shape = org_data.shape[-1]
         exp.experiment_workspace.data_description.append((sub_task.get_task_information(), feature_shape))
 
+        sub_model_1_description = (
+            self.extract_model_task_from_code(
+                (exp.experiment_workspace.workspace_path / "model" / "model_randomforest.py").read_text()
+            )
+            + f"""code: { (exp.experiment_workspace.workspace_path / "model" / "model_randomforest.py").read_text()}"""
+        )
+        sub_model_2_description = (
+            self.extract_model_task_from_code(
+                (exp.experiment_workspace.workspace_path / "model" / "model_xgboost.py").read_text()
+            )
+            + f"""code: { (exp.experiment_workspace.workspace_path / "model" / "model_xgboost.py").read_text()}"""
+        )
+
+        exp.experiment_workspace.model_description["XGBoost"] = sub_model_1_description
+        exp.experiment_workspace.model_description["RandomForest"] = sub_model_2_description
+
         if RUNNER_SETTINGS.cache_result:
             self.dump_cache_result(exp, result)
 
@@ -133,7 +195,11 @@ def develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
 
         result = exp.experiment_workspace.execute(run_env=env_to_use)
 
+        if result is None:
+            raise CoderError("No result is returned from the experiment workspace")
+
         exp.result = result
+
         if RUNNER_SETTINGS.cache_result:
             self.dump_cache_result(exp, result)
 
 
@@ -8,7 +8,6 @@ kg_description_template:
       "Competition Type": "The type of competition, e.g., 'Classification', 'Regression', 'Clustering', 'Prediction", "Time-Series Forecasting",
       "Competition Description": "A brief description of the competition",
       "Target Description": "A description of the target variable to be predicted",
-      "Competition Features": "A dict of relevant features used in the competition and their descriptions (if available)", # if you are not sure about the meaning of the feature, please add a (guess) before the description. Importantly, your feature name should be exactly the same as the feature name in the dataset!
     }
     Since these might be very similar column names in data like one_hot_encoded columns, you can use some regex to group them together.
 
 
@@ -69,7 +69,6 @@ def _analysis_competition_description(self):
         self.competition_type = response_json_analysis.get("Competition Type", "No type provided")
         self.competition_description = response_json_analysis.get("Competition Description", "No description provided")
         self.target_description = response_json_analysis.get("Target Description", "No target provided")
-        self.competition_features = response_json_analysis.get("Competition Features", "No features provided")
         self.competition_features = self.source_data
 
     @property
 
@@ -29,7 +29,7 @@ def __init__(self, template_folder_path: Path, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
         self.inject_code_from_folder(template_folder_path)
         self.data_description: list[str] = []
-        self.model_description: str = ""
+        self.model_description: dict[str, str] = {}
 
     def generate_preprocess_data(
         self,
 
@@ -263,3 +263,27 @@ feature_selection_feedback_generation:
     4. Are there any domain-specific considerations that should inform our feature selection?
 
     Remember to focus on the select() method in the model code, as this is where feature selection is implemented.
+
+extract_model_task_from_code:
+  system: |-
+    You are an expert in analyzing code for machine learning models.  
+  user: |-
+    Given the following code, summarize the machine learning model including:
+    - Model architecture
+    - Hyperparameters
+    - Formulation and variables
+    - Model type (one of XGBoost, RandomForest, LightGBM, NN)
+
+    Code:
+    {{ file_content }}
+
+    Return the information in JSON format with the following structure:
+    {
+        "name": "",
+        "description": "",
+        "architecture": "",
+        "hyperparameters": {},
+        "formulation": "",
+        "variables": {},
+        "model_type": ""
+    }
Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,6 @@ kg_description_template:`
`8`	`8`	`"Competition Type": "The type of competition, e.g., 'Classification', 'Regression', 'Clustering', 'Prediction", "Time-Series Forecasting",`
`9`	`9`	`"Competition Description": "A brief description of the competition",`
`10`	`10`	`"Target Description": "A description of the target variable to be predicted",`
`11`		`- "Competition Features": "A dict of relevant features used in the competition and their descriptions (if available)", # if you are not sure about the meaning of the feature, please add a (guess) before the description. Importantly, your feature name should be exactly the same as the feature name in the dataset!`
`12`	`11`	`}`
`13`	`12`	`Since these might be very similar column names in data like one_hot_encoded columns, you can use some regex to group them together.`
`14`	`13`