Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions rdagent/components/coder/data_science/ensemble/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from rdagent.core.scenario import Scenario
from rdagent.oai.llm_utils import APIBackend
from rdagent.utils.agent.tpl import T

from rdagent.utils.agent.ret import PythonAgentOut

class EnsembleMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
def implement_one_task(
Expand Down Expand Up @@ -76,6 +76,7 @@ def implement_one_task(
queried_former_failed_knowledge[0] if queried_former_failed_knowledge else None
),
all_code=workspace.all_codes,
out_spec=PythonAgentOut.get_spec()
)
user_prompt = T(".prompts:ensemble_coder.user").r(
ensemble_spec=workspace.file_dict["spec/ensemble.md"],
Expand All @@ -84,14 +85,12 @@ def implement_one_task(
)

for _ in range(5):
ensemble_code = json.loads(
ensemble_code = PythonAgentOut.extract_output(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
)
if ensemble_code != workspace.file_dict.get("ensemble.py"):
break
else:
Expand Down
4 changes: 4 additions & 0 deletions rdagent/components/coder/data_science/ensemble/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,14 @@ ensemble_coder:
2. You should avoid using logging module to output information in your generated code, and instead use the print() function.

## Output Format
{% if out_spec %}
{{ out_spec }}
{% else %}
Please response the code in the following json format. Here is an example structure for the JSON output:
{
"code": "The Python code as a string."
}
{% endif %}

user: |-
--------- Ensemble Specification ---------
Expand Down
8 changes: 4 additions & 4 deletions rdagent/components/coder/data_science/feature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from rdagent.core.scenario import Scenario
from rdagent.oai.llm_utils import APIBackend
from rdagent.utils.agent.tpl import T
from rdagent.utils.agent.ret import PythonAgentOut


class FeatureMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
Expand Down Expand Up @@ -61,6 +62,7 @@ def implement_one_task(
data_loader_code=workspace.file_dict.get("load_data.py"),
queried_similar_successful_knowledge=queried_similar_successful_knowledge,
queried_former_failed_knowledge=queried_former_failed_knowledge[0],
out_spec=PythonAgentOut.get_spec(),
)
user_prompt = T(".prompts:feature_coder.user").r(
feature_spec=workspace.file_dict["spec/feature.md"],
Expand All @@ -69,14 +71,12 @@ def implement_one_task(
)

for _ in range(5):
feature_code = json.loads(
feature_code = PythonAgentOut.extract_output(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
)
if feature_code != workspace.file_dict.get("feature.py"):
break
else:
Expand Down
4 changes: 4 additions & 0 deletions rdagent/components/coder/data_science/feature/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,14 @@ feature_coder:
- You should avoid using logging module to output information in your generated code, and instead use the print() function.

## Output Format
{% if out_spec %}
{{ out_spec }}
{% else %}
Please response the code in the following json format. Here is an example structure for the JSON output:
{
"code": "The Python code as a string."
}
{% endif %}

user: |-
--------- Feature Processing Specification ---------
Expand Down
8 changes: 3 additions & 5 deletions rdagent/components/coder/data_science/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from rdagent.core.experiment import FBWorkspace
from rdagent.core.scenario import Scenario
from rdagent.oai.llm_utils import APIBackend
from rdagent.utils.agent.ret import BatchEditOut
from rdagent.utils.agent.ret import PythonBatchEditOut
from rdagent.utils.agent.tpl import T


Expand Down Expand Up @@ -63,7 +63,7 @@ def implement_one_task(
feature_code=workspace.file_dict["feature.py"],
queried_similar_successful_knowledge=queried_similar_successful_knowledge,
queried_former_failed_knowledge=queried_former_failed_knowledge[0],
out_spec=BatchEditOut.get_spec(),
out_spec=PythonBatchEditOut.get_spec(),
)
# user_prompt = T(".prompts:model_coder.user").r(
# model_spec=workspace.file_dict["spec/model.md"],
Expand All @@ -80,12 +80,10 @@ def implement_one_task(
)

for _ in range(5):
batch_edit = BatchEditOut.extract_output(
batch_edit = PythonBatchEditOut.extract_output(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=BatchEditOut.json_mode,
json_target_type=Dict[str, str],
)
)

Expand Down
50 changes: 12 additions & 38 deletions rdagent/components/coder/data_science/raw_data_loader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from rdagent.core.experiment import FBWorkspace
from rdagent.core.scenario import Scenario
from rdagent.oai.llm_utils import APIBackend
from rdagent.utils.agent.ret import PythonAgentOut
from rdagent.utils.agent.tpl import T


Expand Down Expand Up @@ -96,45 +97,19 @@ def implement_one_task(
competition_info=competition_info,
folder_spec=data_folder_info,
)
data_loader_prompt = T(".prompts:spec.user.data_loader").r(
latest_spec=workspace.file_dict.get("spec/data_loader.md")
)
data_loader_prompt = T(".prompts:spec.user.data_loader").r(latest_spec=workspace.file_dict.get("spec/data_loader.md"))
feature_prompt = T(".prompts:spec.user.feature").r(latest_spec=workspace.file_dict.get("spec/feature.md"))
model_prompt = T(".prompts:spec.user.model").r(latest_spec=workspace.file_dict.get("spec/model.md"))
ensemble_prompt = T(".prompts:spec.user.ensemble").r(
latest_spec=workspace.file_dict.get("spec/ensemble.md")
)
workflow_prompt = T(".prompts:spec.user.workflow").r(
latest_spec=workspace.file_dict.get("spec/workflow.md")
)
ensemble_prompt = T(".prompts:spec.user.ensemble").r(latest_spec=workspace.file_dict.get("spec/ensemble.md"))
workflow_prompt = T(".prompts:spec.user.workflow").r(latest_spec=workspace.file_dict.get("spec/workflow.md"))

spec_session = APIBackend().build_chat_session(session_system_prompt=system_prompt)

data_loader_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=data_loader_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
feature_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=feature_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
model_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=model_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
ensemble_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=ensemble_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
workflow_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=workflow_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
data_loader_spec = spec_session.build_chat_completion(user_prompt=data_loader_prompt)
feature_spec = spec_session.build_chat_completion(user_prompt=feature_prompt)
model_spec = spec_session.build_chat_completion(user_prompt=model_prompt)
ensemble_spec = spec_session.build_chat_completion(user_prompt=ensemble_prompt)
workflow_spec = spec_session.build_chat_completion(user_prompt=workflow_prompt)
else:
data_loader_spec = workspace.file_dict["spec/data_loader.md"]
feature_spec = workspace.file_dict["spec/feature.md"]
Expand All @@ -147,6 +122,7 @@ def implement_one_task(
task_desc=data_loader_task_info,
queried_similar_successful_knowledge=queried_similar_successful_knowledge,
queried_former_failed_knowledge=queried_former_failed_knowledge[0],
out_spec=PythonAgentOut.get_spec(),
)
user_prompt = T(".prompts:data_loader_coder.user").r(
competition_info=competition_info,
Expand All @@ -157,14 +133,12 @@ def implement_one_task(
)

for _ in range(5):
data_loader_code = json.loads(
data_loader_code = PythonAgentOut.extract_output(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
)
if data_loader_code != workspace.file_dict.get("load_data.py"):
break
else:
Expand Down
Loading