microsoft · peteryang1 · Aug 5, 2025 · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025
diff --git a/Makefile b/Makefile
@@ -82,11 +82,11 @@ constraints: deepclean
 
 # Check lint with black.
 black:
-	$(PIPRUN) python -m black --check --diff . --extend-exclude test/scripts --extend-exclude git_ignore_folder -l 120
+	$(PIPRUN) python -m black --check --diff . --extend-exclude "(test/scripts|test/notebook/testfiles|git_ignore_folder)" -l 120
 
 # Check lint with isort.
 isort:
-	$(PIPRUN) python -m isort --check . -s git_ignore_folder -s test/scripts
+	$(PIPRUN) python -m isort --check . -s git_ignore_folder -s test/scripts -s test/notebook/testfiles
 
 # Check lint with mypy.
 # First deal with the core folder, and then gradually increase the scope of detection,
@@ -119,11 +119,11 @@ pre-commit:
 
 # Auto lint with black.
 auto-black:
-	$(PIPRUN) python -m black . --extend-exclude test/scripts --extend-exclude git_ignore_folder --extend-exclude .venv -l 120
+	$(PIPRUN) python -m black . --extend-exclude "(test/scripts|test/notebook/testfiles|git_ignore_folder|.venv)" -l 120
 
 # Auto lint with isort.
 auto-isort:
-	$(PIPRUN) python -m isort . -s git_ignore_folder -s test/scripts -s .venv
+	$(PIPRUN) python -m isort . -s git_ignore_folder -s test/scripts -s test/notebook/testfiles -s .venv
 
 # Auto lint with toml-sort.
 auto-toml-sort:

diff --git a/rdagent/app/data_science/conf.py b/rdagent/app/data_science/conf.py
@@ -43,6 +43,9 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
 
     ### specific feature
 
+    ### notebook integration
+    enable_notebook_conversion: bool = False
+
     #### enable specification
     spec_enabled: bool = True
 

diff --git a/rdagent/components/coder/data_science/pipeline/__init__.py b/rdagent/components/coder/data_science/pipeline/__init__.py
@@ -83,7 +83,10 @@ def implement_one_task(
             package_info=target_task.package_info,
             enable_model_dump=DS_RD_SETTING.enable_model_dump,
             enable_debug_mode=DS_RD_SETTING.sample_data_by_LLM,
-            spec=T("scenarios.data_science.share:component_spec.Pipeline").r(metric_name=self.scen.metric_name),
+            spec=T("scenarios.data_science.share:component_spec.Pipeline").r(
+                metric_name=self.scen.metric_name,
+                enable_notebook_conversion=DS_RD_SETTING.enable_notebook_conversion,
+            ),
         )
         user_prompt = T(".prompts:pipeline_coder.user").r(
             competition_info=competition_info,

diff --git a/rdagent/components/coder/data_science/pipeline/eval.py b/rdagent/components/coder/data_science/pipeline/eval.py
@@ -16,6 +16,7 @@
     CoSTEERQueriedKnowledgeV2,
 )
 from rdagent.components.coder.data_science.conf import get_clear_ws_cmd, get_ds_env
+from rdagent.components.coder.data_science.share.notebook import NotebookConverter
 from rdagent.components.coder.data_science.utils import remove_eda_part
 from rdagent.core.experiment import FBWorkspace, Task
 from rdagent.scenarios.data_science.test_eval import get_test_eval
@@ -70,6 +71,24 @@ def evaluate(
                 env=env, entry=f"strace -e trace=file -f -o trace.log python -m coverage run main.py"
             )
 
+        nb_conversion_ret_code = 0
+        nb_conversion_check_text = ""
+        if DS_RD_SETTING.enable_notebook_conversion:
+            notebook_converter = NotebookConverter()
+            code = implementation.file_dict["main.py"]
+            error_msg = notebook_converter.validate_code_format(code)
+            if error_msg is not None:
+                nb_conversion_check_text = error_msg
+                nb_conversion_ret_code = 1
+            else:
+                notebook_converter.convert(
+                    task=target_task,
+                    code=code,
+                    stdout=result.stdout,
+                    outfile=implementation.workspace_path / "main.ipynb",
+                    use_debug_flag=DS_RD_SETTING.sample_data_by_LLM,
+                )
+
         sample_submission_check = True
         test_eval = get_test_eval()
         if (sample_submission_file_name := test_eval.get_sample_submission_name(self.scen.competition)) is not None:
@@ -173,7 +192,10 @@ def evaluate(
             scenario=self.scen.get_scenario_all_desc(eda_output=eda_output),
             task_desc=target_task.get_task_information(),
             stdout=stdout.strip(),
-            spec=T("scenarios.data_science.share:component_spec.Pipeline").r(metric_name=self.scen.metric_name),
+            spec=T("scenarios.data_science.share:component_spec.Pipeline").r(
+                metric_name=self.scen.metric_name,
+                enable_notebook_conversion=DS_RD_SETTING.enable_notebook_conversion,
+            ),
             code=implementation.file_dict["main.py"],
         )
         wfb = build_cls_from_json_with_retry(
@@ -193,4 +215,7 @@ def evaluate(
             wfb.return_checking += (
                 "\nSample submission file check failed. Code should not open the sample submission file."
             )
+        if nb_conversion_ret_code != 0 and wfb.final_decision is True:
+            wfb.final_decision = False
+            wfb.return_checking += "\n" + nb_conversion_check_text
         return wfb
diff --git a/rdagent/components/coder/data_science/share/notebook.py b/rdagent/components/coder/data_science/share/notebook.py
@@ -0,0 +1,135 @@
+"""
+Handles conversion from a Python file to a Jupyter notebook.
+"""
+
+import argparse
+from typing import Optional
+
+import nbformat
+
+from rdagent.components.coder.data_science.share.util import (
+    extract_first_section_name_from_code,
+    extract_function_body,
+    split_code_and_output_into_sections,
+)
+from rdagent.core.experiment import Task
+from rdagent.log import rdagent_logger as logger
+from rdagent.oai.llm_utils import APIBackend
+from rdagent.utils.agent.ret import MarkdownAgentOut
+from rdagent.utils.agent.tpl import T
+
+
+class NotebookConverter:
+    """
+    Builder responsible for writing a Jupyter notebook for a workspace.
+    """
+
+    def validate_code_format(self, code: str) -> str | None:
+        """
+        Returns None if the code format is valid, otherwise returns an error message.
+        """
+        main_function_body = extract_function_body(code, "main")
+        if not main_function_body:
+            return "[Error] No main function found in the code. Please ensure that the main function is defined and contains the necessary print statements to divide sections."
+
+        found_section_name = extract_first_section_name_from_code(main_function_body)
+        if not found_section_name:
+            return "[Error] No sections found in the code. Expected to see 'print(\"Section: <section name>\")' as section dividers. Also make sure that they are actually run and not just comments."
+
+        return None
+
+    def convert(
+        self,
+        task: Optional[Task],
+        code: str,
+        stdout: str,
+        outfile: Optional[str] = None,
+        use_debug_flag: bool = False,
+    ) -> str:
+        """
+        Build a notebook based on the current progression.
+        """
+        # Handle argparse in the code to ensure it works in a notebook environment
+        should_handle_argparse = "argparse" in code
+        sections = split_code_and_output_into_sections(code=code, stdout=stdout)
+        notebook = nbformat.v4.new_notebook()
+
+        # Use LLM to generate an intro cell for the notebook
+        if task:
+            system_prompt = T(".prompts:notebookconverter.system").r()
+            user_prompt = T(".prompts:notebookconverter.user").r(
+                plan=task.get_task_information(),
+                code=code,
+            )
+            resp = APIBackend().build_messages_and_create_chat_completion(
+                user_prompt=user_prompt, system_prompt=system_prompt
+            )
+            intro_content = MarkdownAgentOut.extract_output(resp)
+            notebook.cells.append(nbformat.v4.new_markdown_cell(intro_content))
+
+        if should_handle_argparse:
+            # Remove extra `import sys` since it will be added for argparse handling
+            if "import sys\n" in sections[0]["code"]:
+                sections[0]["code"] = sections[0]["code"].replace("import sys\n", "")
+
+            # Add sys.argv modification for argparse handling
+            sections[0]["code"] = (
+                "\n".join(
+                    [
+                        "import sys",
+                        "# hack to allow argparse to work in notebook",
+                        ('sys.argv = ["main.py", "--debug"]' if use_debug_flag else 'sys.argv = ["main.py"]'),
+                    ]
+                )
+                + "\n\n"
+                + sections[0]["code"].lstrip()
+            )
+
+        for section in sections:
+            # Create a markdown cell for the section name and comments
+            markdown_content = ""
+            if section["name"]:
+                markdown_content += f"## {section['name']}\n"
+            if section["comments"]:
+                markdown_content += f"{section['comments']}\n"
+            if markdown_content:
+                notebook.cells.append(nbformat.v4.new_markdown_cell(markdown_content))
+
+            # Create a code cell for the section code and output
+            if section["code"]:
+                cell = nbformat.v4.new_code_cell(section["code"])
+                if section["output"]:
+                    # For simplicity, treat all output as coming from stdout
+                    # TODO: support Jupyter kernel execution and handle outputs appropriately here
+                    cell.outputs = [nbformat.v4.new_output("stream", name="stdout", text=section["output"])]
+                notebook.cells.append(cell)
+
+        # Save the notebook or return it as a string
+        if outfile:
+            with open((outfile), "w", encoding="utf-8") as f:
+                nbformat.write(notebook, f)
+                logger.info(f"Notebook written to {outfile}")
+
+        return nbformat.writes(notebook)
+
+
+if __name__ == "__main__":
+    converter = NotebookConverter()
+    parser = argparse.ArgumentParser(description="Convert Python code to Jupyter notebook.")
+    parser.add_argument("inputfile", type=str, help="Path to the input Python file.")
+    parser.add_argument("outfile", type=str, help="Path to the output Notebook file.")
+    parser.add_argument(
+        "--stdout",
+        type=str,
+        default="",
+        help="Standard output from the code execution.",
+    )
+    parser.add_argument("--debug", action="store_true", help="Use debug flag to modify sys.argv.")
+    args = parser.parse_args()
+    converter.convert(
+        task=None,
+        code=open(args.inputfile, "r").read(),
+        stdout=args.stdout,
+        outfile=args.outfile,
+        use_debug_flag=False,
+    )
diff --git a/rdagent/components/coder/data_science/share/prompts.yaml b/rdagent/components/coder/data_science/share/prompts.yaml
@@ -89,3 +89,25 @@ docdev:
     ```
     {% endfor %}
 
+notebookconverter:
+  system: |-
+    {% include "scenarios.data_science.share:scen.role" %} Your task is to provide a summary for a data science solution.
+
+    You will be given:
+    - The original implementation plan for the script.
+    - A Python script that contains code and output.
+
+    Your task is to generate markdown content that includes a title and a short paragraph summarizing the technique in model training, the type of model produced and any other noteworthy details in the solution.
+
+    The return content should be like the format below(Please note that "````" is used to avoid confliction of "```" in markdown file)
+    ````markdown
+    # <The title of the notebook>
+    <the content of markdown file>
+    ````
+
+  user: |-
+    --------------- The implementation plan ---------------
+    {{plan}}
+
+    --------------- The Python script content ---------------
+    {{code}}