11import pickle
2+ from copy import deepcopy
23from datetime import datetime
34from pathlib import Path
45
1011 CoSTEERRAGStrategyV2 ,
1112)
1213from rdagent .core .developer import Developer
13- from rdagent .core .evaluation import Evaluator
14- from rdagent .core .evolving_agent import EvolvingStrategy , RAGEvoAgent
14+ from rdagent .core .evolving_agent import EvolvingStrategy , RAGEvaluator , RAGEvoAgent
1515from rdagent .core .exception import CoderError
1616from rdagent .core .experiment import Experiment
1717from rdagent .log import rdagent_logger as logger
@@ -22,15 +22,13 @@ class CoSTEER(Developer[Experiment]):
2222 def __init__ (
2323 self ,
2424 settings : CoSTEERSettings ,
25- eva : Evaluator ,
25+ eva : RAGEvaluator ,
2626 es : EvolvingStrategy ,
2727 * args ,
2828 evolving_version : int = 2 ,
2929 max_seconds : int | None = None ,
3030 with_knowledge : bool = True ,
31- with_feedback : bool = True ,
3231 knowledge_self_gen : bool = True ,
33- filter_final_evo : bool = True ,
3432 max_loop : int | None = None ,
3533 ** kwargs ,
3634 ) -> None :
@@ -47,9 +45,7 @@ def __init__(
4745 )
4846
4947 self .with_knowledge = with_knowledge
50- self .with_feedback = with_feedback
5148 self .knowledge_self_gen = knowledge_self_gen
52- self .filter_final_evo = filter_final_evo
5349 self .evolving_strategy = es
5450 self .evaluator = eva
5551 self .evolving_version = evolving_version
@@ -71,25 +67,37 @@ def __init__(
7167 )
7268 )
7369
70+ def _get_last_fb (self ) -> CoSTEERMultiFeedback :
71+ fb = self .evolve_agent .evolving_trace [- 1 ].feedback
72+ assert fb is not None , "feedback is None"
73+ assert isinstance (fb , CoSTEERMultiFeedback ), "feedback must be of type CoSTEERMultiFeedback"
74+ return fb
75+
7476 def develop (self , exp : Experiment ) -> Experiment :
7577
7678 # init intermediate items
7779 evo_exp = EvolvingItem .from_experiment (exp )
7880
79- self .evolve_agent = RAGEvoAgent (
81+ self .evolve_agent = RAGEvoAgent [ EvolvingItem ] (
8082 max_loop = self .max_loop ,
8183 evolving_strategy = self .evolving_strategy ,
8284 rag = self .rag ,
8385 with_knowledge = self .with_knowledge ,
84- with_feedback = self . with_feedback ,
86+ with_feedback = True ,
8587 knowledge_self_gen = self .knowledge_self_gen ,
8688 enable_filelock = self .settings .enable_filelock ,
8789 filelock_path = self .settings .filelock_path ,
8890 )
8991
92+ # Evolving the solution
9093 start_datetime = datetime .now ()
94+ fallback_evo_exp = None
9195 for evo_exp in self .evolve_agent .multistep_evolve (evo_exp , self .evaluator ):
9296 assert isinstance (evo_exp , Experiment ) # multiple inheritance
97+ if self ._get_last_fb ().is_acceptable ():
98+ fallback_evo_exp = deepcopy (evo_exp )
99+ fallback_evo_exp .create_ws_ckp () # NOTE: creating checkpoints for saving files in the workspace to prevent inplace mutation.
100+
93101 logger .log_object (evo_exp .sub_workspace_list , tag = "evolving code" )
94102 for sw in evo_exp .sub_workspace_list :
95103 logger .info (f"evolving workspace: { sw } " )
@@ -100,8 +108,16 @@ def develop(self, exp: Experiment) -> Experiment:
100108 logger .info ("Global timer is timeout, stop evolving" )
101109 break
102110
103- if self .with_feedback and self .filter_final_evo :
104- evo_exp = self ._exp_postprocess_by_feedback (evo_exp , self .evolve_agent .evolving_trace [- 1 ].feedback )
111+ # if the final feedback is not finished(therefore acceptable), we will use the fallback solution.
112+ try :
113+ evo_exp = self ._exp_postprocess_by_feedback (evo_exp , self ._get_last_fb ())
114+ except CoderError :
115+ if fallback_evo_exp is not None :
116+ logger .info ("Fallback to the fallback solution." )
117+ evo_exp = fallback_evo_exp
118+ evo_exp .recover_ws_ckp () # NOTE: recovering checkpoints for restoring files in the workspace to prevent inplace mutation.
119+ else :
120+ raise
105121
106122 exp .sub_workspace_list = evo_exp .sub_workspace_list
107123 exp .experiment_workspace = evo_exp .experiment_workspace
0 commit comments