Skip to content

Commit 75eea22

Browse files
xisen-wXianBWTPLin22WinstonLiyt
authored
feat: supporting various Kaggle competitions & scenarios for RD-Agent (#409)
* Fixes on kaggle output * feat: add kaggle s3e14 template (#394) * add s3e14 template * fix CI * Initialisation of a template of competition * add kaggle s3e16 template (#396) * get kaggle competition scores (#397) * Adding a new competition s4e6 * feat: s4e5 (#400) * init for s4e5 * edit s4e5 * ci issue * feat: S4e3 (#402) * Initialisation of a template of competition * Adding a new competition s4e6 * Competition Initialised * Fixed to make sure that now it runs * Fixing for CI * correct evaluation (#403) * find rank in leaderboard (#405) * fix: model templates for KG scenario (#408) * fix feature selection for some models * feat select template * Updating the prompts for a more powerful model tuning * refine the prompt * fix: template error in s4e6 * feat: show simple execution time in demo (#410) * show time in kaggle demo * change color * fix a small bug * edit loop.py and proposal * delete useless files * CI issues * ci issue --------- Co-authored-by: XianBW <36835909+XianBW@users.noreply.github.com> Co-authored-by: Haoran Pan <167847254+TPLin22@users.noreply.github.com> Co-authored-by: Way2Learn <118058822+Xisen-Wang@users.noreply.github.com> Co-authored-by: WinstonLiyt <1957922024@qq.com> Co-authored-by: TPLin22 <tplin2@163.com>
1 parent 8f8afea commit 75eea22

55 files changed

Lines changed: 1162 additions & 246 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

rdagent/app/kaggle/loop.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,16 @@ def __init__(self, PROP_SETTING: BasePropSetting):
3535
with logger.tag("init"):
3636
scen: Scenario = import_class(PROP_SETTING.scen)(PROP_SETTING.competition)
3737
logger.log_object(scen, tag="scenario")
38-
3938
knowledge_base = (
4039
import_class(PROP_SETTING.knowledge_base)(PROP_SETTING.knowledge_base_path, scen)
4140
if PROP_SETTING.knowledge_base != ""
4241
else None
4342
)
4443
logger.log_object(knowledge_base, tag="knowledge_base")
45-
4644
self.hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.hypothesis_gen)(scen)
4745
logger.log_object(self.hypothesis_gen, tag="hypothesis generator")
48-
4946
self.hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.hypothesis2experiment)()
5047
logger.log_object(self.hypothesis2experiment, tag="hypothesis2experiment")
51-
5248
self.feature_coder: Developer = import_class(PROP_SETTING.feature_coder)(scen)
5349
logger.log_object(self.feature_coder, tag="feature coder")
5450
self.model_feature_selection_coder: Developer = import_class(PROP_SETTING.model_feature_selection_coder)(
@@ -57,12 +53,10 @@ def __init__(self, PROP_SETTING: BasePropSetting):
5753
logger.log_object(self.model_feature_selection_coder, tag="model feature selection coder")
5854
self.model_coder: Developer = import_class(PROP_SETTING.model_coder)(scen)
5955
logger.log_object(self.model_coder, tag="model coder")
60-
6156
self.feature_runner: Developer = import_class(PROP_SETTING.feature_runner)(scen)
6257
logger.log_object(self.feature_runner, tag="feature runner")
6358
self.model_runner: Developer = import_class(PROP_SETTING.model_runner)(scen)
6459
logger.log_object(self.model_runner, tag="model runner")
65-
6660
self.summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.summarizer)(scen)
6761
logger.log_object(self.summarizer, tag="summarizer")
6862
self.trace = KGTrace(scen=scen, knowledge_base=knowledge_base)
@@ -88,7 +82,6 @@ def running(self, prev_out: dict[str, Any]):
8882
else:
8983
exp = self.model_runner.develop(prev_out["coding"])
9084
logger.log_object(exp, tag="runner result")
91-
9285
if KAGGLE_IMPLEMENT_SETTING.competition in [
9386
"optiver-realized-volatility-prediction",
9487
"covid19-global-forecasting-week-1",
@@ -99,7 +92,6 @@ def running(self, prev_out: dict[str, Any]):
9992
)
10093
except Exception as e:
10194
logger.error(f"Merge python files to one file failed: {e}")
102-
10395
if KAGGLE_IMPLEMENT_SETTING.auto_submit:
10496
csv_path = exp.experiment_workspace.workspace_path / "submission.csv"
10597
try:
@@ -129,21 +121,16 @@ def running(self, prev_out: dict[str, Any]):
129121
def main(path=None, step_n=None, competition=None):
130122
"""
131123
Auto R&D Evolving loop for models in a kaggle{} scenario.
132-
133124
You can continue running session by
134-
135125
.. code-block:: bash
136-
137126
dotenv run -- python rdagent/app/kaggle/loop.py [--competition titanic] $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional parameter
138127
rdagent kaggle --competition playground-series-s4e8 # You are encouraged to use this one.
139-
140128
"""
141129
if competition:
142130
KAGGLE_IMPLEMENT_SETTING.competition = competition
143131
download_data(competition=competition, local_path=KAGGLE_IMPLEMENT_SETTING.local_data_path)
144132
else:
145133
logger.error("Please specify competition name.")
146-
147134
if path is None:
148135
kaggle_loop = KaggleRDLoop(KAGGLE_IMPLEMENT_SETTING)
149136
else:

rdagent/log/ui/app.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@
8888
if "lround" not in state:
8989
state.lround = 0 # RD Loop Round
9090

91+
if "times" not in state:
92+
state.times = defaultdict(lambda: defaultdict(list))
93+
9194
if "erounds" not in state:
9295
state.erounds = defaultdict(int) # Evolving Rounds in each RD Loop
9396

@@ -186,6 +189,17 @@ def get_msgs_until(end_func: Callable[[Message], bool] = lambda _: True):
186189
)
187190

188191
state.msgs[state.lround][msg.tag].append(msg)
192+
193+
# Update Times
194+
if "init" in tags:
195+
state.times[state.lround]["init"].append(msg.timestamp)
196+
if "r" in tags:
197+
state.times[state.lround]["r"].append(msg.timestamp)
198+
if "d" in tags:
199+
state.times[state.lround]["d"].append(msg.timestamp)
200+
if "ef" in tags:
201+
state.times[state.lround]["ef"].append(msg.timestamp)
202+
189203
# Stop Getting Logs
190204
if end_func(msg):
191205
break
@@ -224,6 +238,7 @@ def refresh(same_trace: bool = False):
224238
state.last_msg = None
225239
state.current_tags = []
226240
state.alpha158_metrics = None
241+
state.times = defaultdict(lambda: defaultdict(list))
227242

228243

229244
def evolving_feedback_window(wsf: FactorSingleFeedback | ModelCoderFeedback):
@@ -741,6 +756,18 @@ def evolving_window():
741756
st.markdown(state.scenario.rich_style_description + css, unsafe_allow_html=True)
742757

743758

759+
def show_times(round: int):
760+
for k, v in state.times[round].items():
761+
if len(v) > 1:
762+
diff = v[-1] - v[0]
763+
else:
764+
diff = v[0] - v[0]
765+
total_seconds = diff.seconds
766+
seconds = total_seconds % 60
767+
minutes = total_seconds // 60
768+
st.markdown(f"**:blue[{k}]**: :red[**{minutes}**] minutes :orange[**{seconds}**] seconds")
769+
770+
744771
if state.scenario is not None:
745772
summary_window()
746773

@@ -754,8 +781,12 @@ def evolving_window():
754781
round = st.radio("**Loops**", horizontal=True, options=r_options, index=state.lround - 1)
755782
else:
756783
round = 1
784+
785+
show_times(round)
757786
rf_c, d_c = st.columns([2, 2])
758787
elif isinstance(state.scenario, GeneralModelScenario):
788+
show_times(round)
789+
759790
rf_c = st.container()
760791
d_c = st.container()
761792
round = 1
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import pandas as pd
2+
3+
4+
def select(X: pd.DataFrame) -> pd.DataFrame:
5+
"""
6+
Select relevant features. To be used in fit & predict function.
7+
"""
8+
# For now, we assume all features are relevant. This can be expanded to feature selection logic.
9+
if X.columns.nlevels == 1:
10+
return X
11+
X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
12+
return X
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import pandas as pd
2+
3+
4+
def select(X: pd.DataFrame) -> pd.DataFrame:
5+
"""
6+
Select relevant features. To be used in fit & predict function.
7+
"""
8+
# For now, we assume all features are relevant. This can be expanded to feature selection logic.
9+
if X.columns.nlevels == 1:
10+
return X
11+
X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
12+
return X

rdagent/scenarios/kaggle/experiment/digit-recognizer_template/train.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,14 @@ def import_module_from_path(module_name, module_path):
7575
metrics_all.append(accuracy)
7676

7777
# 5) Save the validation accuracy
78-
min_index = np.argmax(metrics_all)
79-
pd.Series(data=[metrics_all[min_index]], index=["multi-class accuracy"]).to_csv("submission_score.csv")
78+
max_index = np.argmax(metrics_all)
79+
pd.Series(data=[metrics_all[max_index]], index=["multi-class accuracy"]).to_csv("submission_score.csv")
8080

8181
# 6) Submit predictions for the test
8282
ids = range(1, len(X_test) + 1)
8383

8484
# TODO: fix selection
8585
print(X_valid_selected.columns)
86-
y_test_pred = model_l[min_index][1](model_l[min_index][0], model_l[min_index][2].select(X_test)).flatten()
86+
y_test_pred = model_l[max_index][1](model_l[max_index][0], model_l[max_index][2].select(X_test)).flatten()
8787
submission_result = pd.DataFrame({"ImageId": ids, "Label": y_test_pred})
8888
submission_result.to_csv("submission.csv", index=False)

rdagent/scenarios/kaggle/experiment/forest-cover-type-prediction_template/model/model_nn.py

Lines changed: 0 additions & 78 deletions
This file was deleted.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import pandas as pd
2+
3+
4+
def select(X: pd.DataFrame) -> pd.DataFrame:
5+
"""
6+
Select relevant features. To be used in fit & predict function.
7+
"""
8+
# For now, we assume all features are relevant. This can be expanded to feature selection logic.
9+
if X.columns.nlevels == 1:
10+
return X
11+
X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
12+
return X

rdagent/scenarios/kaggle/experiment/forest-cover-type-prediction_template/train.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,12 @@ def import_module_from_path(module_name, module_path):
7676
metrics_all.append(accuracy)
7777

7878
# 5) Save the validation accuracy
79-
min_index = np.argmax(metrics_all)
80-
pd.Series(data=[metrics_all[min_index]], index=["multi-class accuracy"]).to_csv("submission_score.csv")
79+
max_index = np.argmax(metrics_all)
80+
pd.Series(data=[metrics_all[max_index]], index=["multi-class accuracy"]).to_csv("submission_score.csv")
8181

8282
# 6) Make predictions on the test set and save them
83-
X_test_selected = model_l[min_index][2].select(X_test.copy())
84-
y_test_pred = model_l[min_index][1](model_l[min_index][0], X_test_selected).flatten() + 1
83+
X_test_selected = model_l[max_index][2].select(X_test.copy())
84+
y_test_pred = model_l[max_index][1](model_l[max_index][0], X_test_selected).flatten() + 1
8585

8686

8787
# 7) Submit predictions for the test set
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import pandas as pd
2+
3+
4+
def select(X: pd.DataFrame) -> pd.DataFrame:
5+
"""
6+
Select relevant features. To be used in fit & predict function.
7+
"""
8+
# For now, we assume all features are relevant. This can be expanded to feature selection logic.
9+
if X.columns.nlevels == 1:
10+
return X
11+
X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
12+
return X
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import pandas as pd
2+
3+
4+
def select(X: pd.DataFrame) -> pd.DataFrame:
5+
"""
6+
Select relevant features. To be used in fit & predict function.
7+
"""
8+
# For now, we assume all features are relevant. This can be expanded to feature selection logic.
9+
if X.columns.nlevels == 1:
10+
return X
11+
X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
12+
return X

0 commit comments

Comments
 (0)