Skip to content

Commit 3ce2bd4

Browse files
authored
feat: fallback to acceptable results (#1129)
* refactor: add is_acceptable, fallback logic and generify evolving agent * refine lint * small * lint * lint * lint * feat: add is_acceptable to CoSTEERMultiFeedback * feat: add in-memory workspace checkpoint and recovery * feat: preserve symbolic links in workspace checkpoints and recovery * lint * lint * feat: limit workspace checkpoint to files under 100KB * feat: add workspace checkpoint size limit setting * prompt * lint
1 parent 2307237 commit 3ce2bd4

12 files changed

Lines changed: 260 additions & 56 deletions

File tree

rdagent/components/coder/CoSTEER/__init__.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pickle
2+
from copy import deepcopy
23
from datetime import datetime
34
from pathlib import Path
45

@@ -10,8 +11,7 @@
1011
CoSTEERRAGStrategyV2,
1112
)
1213
from rdagent.core.developer import Developer
13-
from rdagent.core.evaluation import Evaluator
14-
from rdagent.core.evolving_agent import EvolvingStrategy, RAGEvoAgent
14+
from rdagent.core.evolving_agent import EvolvingStrategy, RAGEvaluator, RAGEvoAgent
1515
from rdagent.core.exception import CoderError
1616
from rdagent.core.experiment import Experiment
1717
from rdagent.log import rdagent_logger as logger
@@ -22,15 +22,13 @@ class CoSTEER(Developer[Experiment]):
2222
def __init__(
2323
self,
2424
settings: CoSTEERSettings,
25-
eva: Evaluator,
25+
eva: RAGEvaluator,
2626
es: EvolvingStrategy,
2727
*args,
2828
evolving_version: int = 2,
2929
max_seconds: int | None = None,
3030
with_knowledge: bool = True,
31-
with_feedback: bool = True,
3231
knowledge_self_gen: bool = True,
33-
filter_final_evo: bool = True,
3432
max_loop: int | None = None,
3533
**kwargs,
3634
) -> None:
@@ -47,9 +45,7 @@ def __init__(
4745
)
4846

4947
self.with_knowledge = with_knowledge
50-
self.with_feedback = with_feedback
5148
self.knowledge_self_gen = knowledge_self_gen
52-
self.filter_final_evo = filter_final_evo
5349
self.evolving_strategy = es
5450
self.evaluator = eva
5551
self.evolving_version = evolving_version
@@ -71,25 +67,37 @@ def __init__(
7167
)
7268
)
7369

70+
def _get_last_fb(self) -> CoSTEERMultiFeedback:
71+
fb = self.evolve_agent.evolving_trace[-1].feedback
72+
assert fb is not None, "feedback is None"
73+
assert isinstance(fb, CoSTEERMultiFeedback), "feedback must be of type CoSTEERMultiFeedback"
74+
return fb
75+
7476
def develop(self, exp: Experiment) -> Experiment:
7577

7678
# init intermediate items
7779
evo_exp = EvolvingItem.from_experiment(exp)
7880

79-
self.evolve_agent = RAGEvoAgent(
81+
self.evolve_agent = RAGEvoAgent[EvolvingItem](
8082
max_loop=self.max_loop,
8183
evolving_strategy=self.evolving_strategy,
8284
rag=self.rag,
8385
with_knowledge=self.with_knowledge,
84-
with_feedback=self.with_feedback,
86+
with_feedback=True,
8587
knowledge_self_gen=self.knowledge_self_gen,
8688
enable_filelock=self.settings.enable_filelock,
8789
filelock_path=self.settings.filelock_path,
8890
)
8991

92+
# Evolving the solution
9093
start_datetime = datetime.now()
94+
fallback_evo_exp = None
9195
for evo_exp in self.evolve_agent.multistep_evolve(evo_exp, self.evaluator):
9296
assert isinstance(evo_exp, Experiment) # multiple inheritance
97+
if self._get_last_fb().is_acceptable():
98+
fallback_evo_exp = deepcopy(evo_exp)
99+
fallback_evo_exp.create_ws_ckp() # NOTE: creating checkpoints for saving files in the workspace to prevent inplace mutation.
100+
93101
logger.log_object(evo_exp.sub_workspace_list, tag="evolving code")
94102
for sw in evo_exp.sub_workspace_list:
95103
logger.info(f"evolving workspace: {sw}")
@@ -100,8 +108,16 @@ def develop(self, exp: Experiment) -> Experiment:
100108
logger.info("Global timer is timeout, stop evolving")
101109
break
102110

103-
if self.with_feedback and self.filter_final_evo:
104-
evo_exp = self._exp_postprocess_by_feedback(evo_exp, self.evolve_agent.evolving_trace[-1].feedback)
111+
# if the final feedback is not finished(therefore acceptable), we will use the fallback solution.
112+
try:
113+
evo_exp = self._exp_postprocess_by_feedback(evo_exp, self._get_last_fb())
114+
except CoderError:
115+
if fallback_evo_exp is not None:
116+
logger.info("Fallback to the fallback solution.")
117+
evo_exp = fallback_evo_exp
118+
evo_exp.recover_ws_ckp() # NOTE: recovering checkpoints for restoring files in the workspace to prevent inplace mutation.
119+
else:
120+
raise
105121

106122
exp.sub_workspace_list = evo_exp.sub_workspace_list
107123
exp.experiment_workspace = evo_exp.experiment_workspace

rdagent/components/coder/CoSTEER/evaluators.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,9 @@ def append(self, feedback: CoSTEERSingleFeedback) -> None:
181181
def __iter__(self):
182182
return iter(self.feedback_list)
183183

184+
def is_acceptable(self) -> bool:
185+
return all(feedback.is_acceptable() for feedback in self.feedback_list)
186+
184187
def finished(self) -> bool:
185188
"""
186189
In some implementations, tasks may fail multiple times, leading agents to skip the implementation.

rdagent/components/coder/CoSTEER/evolvable_subjects.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def __init__(
2525
self.sub_gt_implementations = sub_gt_implementations
2626

2727
@classmethod
28-
def from_experiment(cls, exp: Experiment) -> Experiment:
28+
def from_experiment(cls, exp: Experiment) -> "EvolvingItem":
2929
ei = cls(sub_tasks=exp.sub_tasks)
3030
ei.based_experiments = exp.based_experiments
3131
ei.experiment_workspace = exp.experiment_workspace

rdagent/core/conf.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ class RDAgentSettings(ExtendedBaseSettings):
5555

5656
# workspace conf
5757
workspace_path: Path = Path.cwd() / "git_ignore_folder" / "RD-Agent_workspace"
58+
workspace_ckp_size_limit: int = 0
59+
"""
60+
the checkpoint for the workspace is a zip file.
61+
0 (or any value <=0) means *no* size limit for files in workspace checkpoints
62+
"""
5863

5964
# multi processing conf
6065
multi_proc_n: int = 1

rdagent/core/evaluation.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ class Feedback:
1212
The building process of feedback will should be in evaluator
1313
"""
1414

15+
def is_acceptable(self) -> bool:
16+
"""
17+
Sometimes, the solution is already acceptable, but we still want to refine it.
18+
So we use different logic to determine whether the solution is acceptable or finished.
19+
"""
20+
return self.__bool__()
21+
1522
def finished(self) -> bool:
1623
"""
1724
In some implementations, tasks may fail multiple times, leading agents to skip the implementation.

rdagent/core/evolving_agent.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,21 @@
22

33
from abc import ABC, abstractmethod
44
from collections.abc import Generator
5-
from typing import TYPE_CHECKING, Any, Generic, TypeVar
5+
from contextlib import nullcontext
6+
from typing import Any, Generic, TypeVar
67

78
from filelock import FileLock
89
from tqdm import tqdm
910

10-
if TYPE_CHECKING:
11-
from rdagent.core.evolving_framework import EvolvableSubjects
12-
13-
from contextlib import nullcontext
14-
1511
from rdagent.core.evaluation import EvaluableObj, Evaluator, Feedback
16-
from rdagent.core.evolving_framework import EvolvingStrategy, EvoStep
12+
from rdagent.core.evolving_framework import EvolvableSubjects, EvolvingStrategy, EvoStep
1713
from rdagent.log import rdagent_logger as logger
1814

1915
ASpecificEvaluator = TypeVar("ASpecificEvaluator", bound=Evaluator)
16+
ASpecificEvolvableSubjects = TypeVar("ASpecificEvolvableSubjects", bound=EvolvableSubjects)
2017

2118

22-
class EvoAgent(ABC, Generic[ASpecificEvaluator]):
19+
class EvoAgent(ABC, Generic[ASpecificEvaluator, ASpecificEvolvableSubjects]):
2320

2421
def __init__(self, max_loop: int, evolving_strategy: EvolvingStrategy) -> None:
2522
self.max_loop = max_loop
@@ -28,9 +25,9 @@ def __init__(self, max_loop: int, evolving_strategy: EvolvingStrategy) -> None:
2825
@abstractmethod
2926
def multistep_evolve(
3027
self,
31-
evo: EvolvableSubjects,
28+
evo: ASpecificEvolvableSubjects,
3229
eva: ASpecificEvaluator | Feedback,
33-
) -> Generator[EvolvableSubjects, None, None]:
30+
) -> Generator[ASpecificEvolvableSubjects, None, None]:
3431
"""
3532
yield EvolvableSubjects for caller for easier process control and logging.
3633
"""
@@ -47,7 +44,7 @@ def evaluate(
4744
raise NotImplementedError
4845

4946

50-
class RAGEvoAgent(EvoAgent[RAGEvaluator]):
47+
class RAGEvoAgent(EvoAgent[RAGEvaluator, ASpecificEvolvableSubjects], Generic[ASpecificEvolvableSubjects]):
5148

5249
def __init__(
5350
self,
@@ -63,7 +60,7 @@ def __init__(
6360
) -> None:
6461
super().__init__(max_loop, evolving_strategy)
6562
self.rag = rag
66-
self.evolving_trace: list[EvoStep] = []
63+
self.evolving_trace: list[EvoStep[ASpecificEvolvableSubjects]] = []
6764
self.with_knowledge = with_knowledge
6865
self.with_feedback = with_feedback
6966
self.knowledge_self_gen = knowledge_self_gen
@@ -72,9 +69,9 @@ def __init__(
7269

7370
def multistep_evolve(
7471
self,
75-
evo: EvolvableSubjects,
72+
evo: ASpecificEvolvableSubjects,
7673
eva: RAGEvaluator | Feedback,
77-
) -> Generator[EvolvableSubjects, None, None]:
74+
) -> Generator[ASpecificEvolvableSubjects, None, None]:
7875
for evo_loop_id in tqdm(range(self.max_loop), "Implementing"):
7976
with logger.tag(f"evo_loop_{evo_loop_id}"):
8077
# 1. RAG
@@ -91,7 +88,7 @@ def multistep_evolve(
9188
)
9289

9390
# 3. Pack evolve results
94-
es = EvoStep(evo, queried_knowledge)
91+
es = EvoStep[ASpecificEvolvableSubjects](evo, queried_knowledge)
9592

9693
# 4. Evaluation
9794
if self.with_feedback:

rdagent/core/evolving_framework.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import copy
44
from abc import ABC, abstractmethod
55
from dataclasses import dataclass
6-
from typing import TYPE_CHECKING, Any
6+
from typing import TYPE_CHECKING, Any, Generic, TypeVar
77

88
from rdagent.core.evaluation import EvaluableObj
99
from rdagent.core.knowledge_base import KnowledgeBase
@@ -36,8 +36,11 @@ def clone(self) -> EvolvableSubjects:
3636
return copy.deepcopy(self)
3737

3838

39+
ASpecificEvolvableSubjects = TypeVar("ASpecificEvolvableSubjects", bound=EvolvableSubjects)
40+
41+
3942
@dataclass
40-
class EvoStep:
43+
class EvoStep(Generic[ASpecificEvolvableSubjects]):
4144
"""At a specific step,
4245
based on
4346
- previous trace
@@ -48,23 +51,24 @@ class EvoStep:
4851
(optional) After evaluation, we get feedback `feedback`.
4952
"""
5053

51-
evolvable_subjects: EvolvableSubjects
54+
evolvable_subjects: ASpecificEvolvableSubjects
55+
5256
queried_knowledge: QueriedKnowledge | None = None
5357
feedback: Feedback | None = None
5458

5559

56-
class EvolvingStrategy(ABC):
60+
class EvolvingStrategy(ABC, Generic[ASpecificEvolvableSubjects]):
5761
def __init__(self, scen: Scenario) -> None:
5862
self.scen = scen
5963

6064
@abstractmethod
6165
def evolve(
6266
self,
63-
*evo: EvolvableSubjects,
64-
evolving_trace: list[EvoStep] | None = None,
67+
*evo: ASpecificEvolvableSubjects,
68+
evolving_trace: list[EvoStep[ASpecificEvolvableSubjects]] | None = None,
6569
queried_knowledge: QueriedKnowledge | None = None,
6670
**kwargs: Any,
67-
) -> EvolvableSubjects:
71+
) -> ASpecificEvolvableSubjects:
6872
"""The evolving trace is a list of (evolvable_subjects, feedback) ordered
6973
according to the time.
7074
@@ -74,7 +78,7 @@ def evolve(
7478
"""
7579

7680

77-
class RAGStrategy(ABC):
81+
class RAGStrategy(ABC, Generic[ASpecificEvolvableSubjects]):
7882
"""Retrieval Augmentation Generation Strategy"""
7983

8084
def __init__(self, *args: Any, **kwargs: Any) -> None:
@@ -91,7 +95,7 @@ def load_or_init_knowledge_base(
9195
@abstractmethod
9296
def query(
9397
self,
94-
evo: EvolvableSubjects,
98+
evo: ASpecificEvolvableSubjects,
9599
evolving_trace: list[EvoStep],
96100
**kwargs: Any,
97101
) -> QueriedKnowledge | None:
@@ -100,7 +104,7 @@ def query(
100104
@abstractmethod
101105
def generate_knowledge(
102106
self,
103-
evolving_trace: list[EvoStep],
107+
evolving_trace: list[EvoStep[ASpecificEvolvableSubjects]],
104108
*,
105109
return_knowledge: bool = False,
106110
**kwargs: Any,

0 commit comments

Comments
 (0)