microsoft
diff --git a/‎rdagent/app/data_science/conf.py‎
Lines changed: 2 additions & 0 deletions b/‎rdagent/app/data_science/conf.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎rdagent/oai/backend/litellm.py‎
Lines changed: 1 addition & 1 deletion b/‎rdagent/oai/backend/litellm.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rdagent/scenarios/data_science/dev/feedback.py‎
Lines changed: 2 additions & 0 deletions b/‎rdagent/scenarios/data_science/dev/feedback.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎rdagent/scenarios/data_science/dev/prompts.yaml‎
Lines changed: 4 additions & 6 deletions b/‎rdagent/scenarios/data_science/dev/prompts.yaml‎
Lines changed: 4 additions & 6 deletions
@@ -24,5 +24,7 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
     #### enable specification
     spec_enabled: bool = True
 
+    proposal_version: str = "v1"
+
 
 DS_RD_SETTING = DataScienceBasePropSetting()
@@ -80,6 +80,7 @@ def _create_chat_completion_inner_function(  # type: ignore[no-untyped-def] # no
         if json_mode and supports_response_schema(model=LITELLM_SETTINGS.chat_model):
             kwargs["response_format"] = {"type": "json_object"}
 
+        logger.info(self._build_log_messages(messages), tag="llm_messages")
         # Call LiteLLM completion
         response = completion(
             model=LITELLM_SETTINGS.chat_model,
@@ -93,7 +94,6 @@ def _create_chat_completion_inner_function(  # type: ignore[no-untyped-def] # no
             f"{LogColors.GREEN}Using chat model{LogColors.END} {LITELLM_SETTINGS.chat_model}", tag="llm_messages"
         )
 
-        logger.info(self._build_log_messages(messages), tag="llm_messages")
         if LITELLM_SETTINGS.chat_stream:
             logger.info(f"{LogColors.BLUE}assistant:{LogColors.END}", tag="llm_messages")
             content = ""
 
@@ -77,6 +77,8 @@ def generate_feedback(self, exp: DSExperiment, trace: DSTrace) -> ExperimentFeed
             )
         )
 
+        # Currently, we do not use `observations`, `hypothesis_evaluation`, and `new_hypothesis` in the framework.
+        # `new_hypothesis` should not exist in the feedback.
         return HypothesisFeedback(
             observations=resp_dict.get("Observations", "No observations provided"),
             hypothesis_evaluation=resp_dict.get("Feedback for Hypothesis", "No feedback provided"),
 
@@ -15,16 +15,14 @@ exp_feedback:
     Your feedback should:
     1. Confirm if the current result supports or refutes the hypothesis.
     2. Compare with previous best results.
-    3. Suggest improvements or new directions. Stay innovative and adaptive.
-    4. SOTA results are the best outcomes we have achieved in this scenario. If we do not have complete experiment available (i.e., results that are runnable and can generate evaluation outcomes), **please replace it as the best result/SOTA**.
+    3. SOTA results are the best outcomes we have achieved in this scenario. 
   
     Please provide detailed and constructive feedback.
     Example JSON Structure for Result Analysis:
     {
-      "Observations": "Your overall observations here",
-      "Feedback for Hypothesis": "Observations related to the hypothesis",
-      "New Hypothesis": "Your new hypothesis here",
-      "Reasoning": "Reasoning for the new hypothesis",
+      "Observations": "A detailed summary of the experimental results, including the description and scores for both SOTA and the current experiment. Limit this field to no more than three sentences, focusing on concrete data rather than general statements.",
+      "Feedback for Hypothesis": "A brief evaluation of the original hypothesis that highlights specific data points or trends which support or contradict it. Limit this field to two sentences.",
+      "Reasoning": "A clear explanation of why the current result performs better or worse than SOTA. This should reference both the SOTA description score and the current experiment's description score, providing insight into the factors contributing to the observed differences. Limit this field to one to three sentences.",
       "Replace Best Result": "yes or no"
     }
Original file line number	Diff line number	Diff line change
`@@ -77,6 +77,8 @@ def generate_feedback(self, exp: DSExperiment, trace: DSTrace) -> ExperimentFeed`
`77`	`77`	`)`
`78`	`78`	`)`
`79`	`79`
	`80`	+ # Currently, we do not use `observations`, `hypothesis_evaluation`, and `new_hypothesis` in the framework.
	`81`	+ # `new_hypothesis` should not exist in the feedback.
`80`	`82`	`return HypothesisFeedback(`
`81`	`83`	`observations=resp_dict.get("Observations", "No observations provided"),`
`82`	`84`	`hypothesis_evaluation=resp_dict.get("Feedback for Hypothesis", "No feedback provided"),`