1+ import json
12import pickle
23import shutil
34from pathlib import Path
45
6+ from jinja2 import Environment , StrictUndefined
7+
58from rdagent .app .kaggle .conf import KAGGLE_IMPLEMENT_SETTING
69from rdagent .components .coder .factor_coder .config import FACTOR_IMPLEMENT_SETTINGS
710from rdagent .components .coder .factor_coder .factor import FactorTask
11+ from rdagent .components .coder .model_coder .model import ModelTask
812from rdagent .components .runner import CachedRunner
913from rdagent .components .runner .conf import RUNNER_SETTINGS
10- from rdagent .core .exception import FactorEmptyError , ModelEmptyError
14+ from rdagent .core .exception import CoderError , FactorEmptyError , ModelEmptyError
1115from rdagent .core .experiment import ASpecificExp
12- from rdagent .oai .llm_utils import md5_hash
16+ from rdagent .core .prompts import Prompts
17+ from rdagent .oai .llm_utils import APIBackend , md5_hash
1318from rdagent .scenarios .kaggle .experiment .kaggle_experiment import (
1419 KGFactorExperiment ,
1520 KGModelExperiment ,
1621)
1722
23+ prompt_dict = Prompts (file_path = Path (__file__ ).parent .parent / "prompts.yaml" )
24+
1825
1926class KGCachedRunner (CachedRunner [ASpecificExp ]):
2027 def build_from_SOTA (self , exp : ASpecificExp ) -> None :
@@ -23,7 +30,7 @@ def build_from_SOTA(self, exp: ASpecificExp) -> None:
2330 exp .experiment_workspace .data_description = exp .based_experiments [- 1 ].experiment_workspace .data_description
2431 exp .experiment_workspace .model_description = exp .based_experiments [
2532 - 1
26- ].experiment_workspace .model_description
33+ ].experiment_workspace .model_description . copy ()
2734
2835 def get_cache_key (self , exp : ASpecificExp ) -> str :
2936 codes = []
@@ -38,22 +45,19 @@ def get_cache_key(self, exp: ASpecificExp) -> str:
3845class KGModelRunner (KGCachedRunner [KGModelExperiment ]):
3946 def develop (self , exp : KGModelExperiment ) -> KGModelExperiment :
4047 self .build_from_SOTA (exp )
41- if exp .sub_workspace_list [0 ].target_task .model_type == "XGBoost" :
42- if exp .sub_workspace_list [0 ].code_dict == {}:
43- raise ModelEmptyError ("No model is implemented" )
44- exp .experiment_workspace .inject_code (** {"model_xgb.py" : exp .sub_workspace_list [0 ].code_dict ["model.py" ]})
45- elif exp .sub_workspace_list [0 ].target_task .model_type == "RandomForest" :
46- if exp .sub_workspace_list [0 ].code_dict == {}:
47- raise ModelEmptyError ("No model is implemented" )
48- exp .experiment_workspace .inject_code (** {"model_rf.py" : exp .sub_workspace_list [0 ].code_dict ["model.py" ]})
49- elif exp .sub_workspace_list [0 ].target_task .model_type == "LightGBM" :
50- if exp .sub_workspace_list [0 ].code_dict == {}:
51- raise ModelEmptyError ("No model is implemented" )
52- exp .experiment_workspace .inject_code (** {"model_lgb.py" : exp .sub_workspace_list [0 ].code_dict ["model.py" ]})
53- elif exp .sub_workspace_list [0 ].target_task .model_type == "NN" :
54- if exp .sub_workspace_list [0 ].code_dict == {}:
55- raise ModelEmptyError ("No model is implemented" )
56- exp .experiment_workspace .inject_code (** {"model_nn.py" : exp .sub_workspace_list [0 ].code_dict ["model.py" ]})
48+
49+ sub_ws = exp .sub_workspace_list [0 ]
50+ model_type = sub_ws .target_task .model_type
51+
52+ if sub_ws .code_dict == {}:
53+ raise ModelEmptyError ("No model is implemented." )
54+ else :
55+ model_file_name = f"model_{ model_type .lower ()} .py"
56+ exp .experiment_workspace .inject_code (** {model_file_name : sub_ws .code_dict ["model.py" ]})
57+
58+ model_description = sub_ws .target_task .get_task_information ()
59+ exp .experiment_workspace .model_description [model_type ] = model_description
60+
5761 if RUNNER_SETTINGS .cache_result :
5862 cache_hit , result = self .get_cache_result (exp )
5963 if cache_hit :
@@ -72,6 +76,48 @@ def develop(self, exp: KGModelExperiment) -> KGModelExperiment:
7276
7377
7478class KGFactorRunner (KGCachedRunner [KGFactorExperiment ]):
79+ def extract_model_task_from_code (self , code : str ) -> str :
80+ sys_prompt = (
81+ Environment (undefined = StrictUndefined )
82+ .from_string (prompt_dict ["extract_model_task_from_code" ]["system" ])
83+ .render ()
84+ )
85+
86+ user_prompt = (
87+ Environment (undefined = StrictUndefined )
88+ .from_string (prompt_dict ["extract_model_task_from_code" ]["user" ])
89+ .render (file_content = code )
90+ )
91+
92+ model_task_description = APIBackend ().build_messages_and_create_chat_completion (
93+ user_prompt = user_prompt ,
94+ system_prompt = sys_prompt ,
95+ json_mode = True ,
96+ )
97+
98+ try :
99+ response_json_analysis = json .loads (model_task_description )
100+ task_desc = f"""name: { response_json_analysis ['name' ]}
101+ description: { response_json_analysis ['description' ]}
102+ """
103+ task_desc += (
104+ f"formulation: { response_json_analysis ['formulation' ]} \n "
105+ if response_json_analysis .get ("formulation" )
106+ else ""
107+ )
108+ task_desc += f"architecture: { response_json_analysis ['architecture' ]} \n "
109+ task_desc += (
110+ f"variables: { json .dumps (response_json_analysis ['variables' ], indent = 4 )} \n "
111+ if response_json_analysis .get ("variables" )
112+ else ""
113+ )
114+ task_desc += f"hyperparameters: { json .dumps (response_json_analysis ['hyperparameters' ], indent = 4 )} \n "
115+ task_desc += f"model_type: { response_json_analysis ['model_type' ]} \n "
116+ except json .JSONDecodeError :
117+ task_desc = "Failed to parse LLM's response as JSON"
118+
119+ return task_desc
120+
75121 def init_develop (self , exp : KGFactorExperiment ) -> KGFactorExperiment :
76122 """
77123 For the initial development, the experiment serves as a benchmark for feature engineering.
@@ -100,6 +146,22 @@ def init_develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
100146 feature_shape = org_data .shape [- 1 ]
101147 exp .experiment_workspace .data_description .append ((sub_task .get_task_information (), feature_shape ))
102148
149+ sub_model_1_description = (
150+ self .extract_model_task_from_code (
151+ (exp .experiment_workspace .workspace_path / "model" / "model_randomforest.py" ).read_text ()
152+ )
153+ + f"""code: { (exp .experiment_workspace .workspace_path / "model" / "model_randomforest.py" ).read_text ()} """
154+ )
155+ sub_model_2_description = (
156+ self .extract_model_task_from_code (
157+ (exp .experiment_workspace .workspace_path / "model" / "model_xgboost.py" ).read_text ()
158+ )
159+ + f"""code: { (exp .experiment_workspace .workspace_path / "model" / "model_xgboost.py" ).read_text ()} """
160+ )
161+
162+ exp .experiment_workspace .model_description ["XGBoost" ] = sub_model_1_description
163+ exp .experiment_workspace .model_description ["RandomForest" ] = sub_model_2_description
164+
103165 if RUNNER_SETTINGS .cache_result :
104166 self .dump_cache_result (exp , result )
105167
@@ -133,7 +195,11 @@ def develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
133195
134196 result = exp .experiment_workspace .execute (run_env = env_to_use )
135197
198+ if result is None :
199+ raise CoderError ("No result is returned from the experiment workspace" )
200+
136201 exp .result = result
202+
137203 if RUNNER_SETTINGS .cache_result :
138204 self .dump_cache_result (exp , result )
139205
0 commit comments