pandas-dev · canthonyscott · Jun 23, 2023 · Jun 23, 2023 · Jun 23, 2023 · Jun 23, 2023
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3684,6 +3684,7 @@ def to_csv(
         decimal: str = ...,
         errors: OpenFileErrors = ...,
         storage_options: StorageOptions = ...,
+        comment: str | None = ...,
     ) -> str:
         ...
 
@@ -3711,6 +3712,7 @@ def to_csv(
         decimal: str = ...,
         errors: OpenFileErrors = ...,
         storage_options: StorageOptions = ...,
+        comment: str | None = ...,
     ) -> None:
         ...
 
@@ -3742,6 +3744,7 @@ def to_csv(
         decimal: str = ".",
         errors: OpenFileErrors = "strict",
         storage_options: StorageOptions | None = None,
+        comment: str | None = None,
     ) -> str | None:
         r"""
         Write object to a comma-separated values (csv) file.
@@ -3847,6 +3850,13 @@ def to_csv(
 
             .. versionadded:: 1.2.0
 
+        comment : str, default None
+            If set the key and values of df.attrs will be written to the
+            beginning of the csv file, prefixed by this value, each key/value
+            pair to a single ling. To prevent downstream reading issues
+            this char will be removed from the df.attrs if present.
+            Complement of pd.read_csv's 'comment' param.
+
         Returns
         -------
         None or str
@@ -3913,6 +3923,7 @@ def to_csv(
             doublequote=doublequote,
             escapechar=escapechar,
             storage_options=storage_options,
+            comment=comment,
         )
 
     # ----------------------------------------------------------------------

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -72,6 +72,7 @@ def __init__(
         doublequote: bool = True,
         escapechar: str | None = None,
         storage_options: StorageOptions | None = None,
+        comment: str | None = None,
     ) -> None:
         self.fmt = formatter
 
@@ -94,6 +95,7 @@ def __init__(
         self.date_format = date_format
         self.cols = self._initialize_columns(cols)
         self.chunksize = self._initialize_chunksize(chunksize)
+        self.comment = comment
 
     @property
     def na_rep(self) -> str:
@@ -265,6 +267,8 @@ def save(self) -> None:
             self._save()
 
     def _save(self) -> None:
+        if self.comment:
+            self._save_df_attrs()
         if self._need_to_save_header:
             self._save_header()
         self._save_body()
@@ -323,3 +327,10 @@ def _save_chunk(self, start_i: int, end_i: int) -> None:
             self.cols,
             self.writer,
         )
+
+    def _save_df_attrs(self) -> None:
+        for key, value in self.fmt.frame.attrs.items():
+            # remove the delimiter from the attr string values
+            key = str(key).replace(self.writer.dialect.delimiter, "")
+            value = str(value).replace(self.writer.dialect.delimiter, "")
+            self.writer.writerow([f"{self.comment}{key}:{value}"])
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1118,6 +1118,7 @@ def to_csv(
         escapechar: str | None = None,
         errors: str = "strict",
         storage_options: StorageOptions | None = None,
+        comment: str | None = None,
     ) -> str | None:
         """
         Render dataframe as comma-separated file.
@@ -1148,6 +1149,7 @@ def to_csv(
             escapechar=escapechar,
             storage_options=storage_options,
             formatter=self.fmt,
+            comment=comment,
         )
         csv_formatter.save()
 

diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
@@ -1,10 +1,13 @@
+from io import StringIO
+
 import numpy as np
 import pytest
 
 from pandas import (
     DataFrame,
     NaT,
     date_range,
+    read_csv,
 )
 import pandas._testing as tm
 
@@ -259,3 +262,26 @@ def frame_of_index_cols():
         }
     )
     return df
+
+
+@pytest.fixture
+def comments_attrs():
+    return {
+        "one": "Hello",
+        "two": "Hello World",
+        "three": "Hello, World!",
+        "four,": "comma in keym",
+    }
+
+
+@pytest.fixture
+def data_for_comments_raw():
+    data = "col1,col2,col3\n0,0,0\n1,1,1\n2,2,2\n"
+    return data
+
+
+@pytest.fixture
+def frame_for_comments(data_for_comments_raw, comments_attrs):
+    df = read_csv(StringIO(data_for_comments_raw))
+    df.attrs = comments_attrs
+    return df
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
@@ -1310,3 +1310,65 @@ def test_to_csv_categorical_and_interval(self):
         expected_rows = [",a", '0,"[2020-01-01, 2020-01-02]"']
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
         assert result == expected
+
+    def prepate_string_rep_of_comment_output(
+        self, delim: str, comments_attrs, data_for_comments_raw, frame_for_comments
+    ) -> str:
+        comment = "#"
+
+        data_for_comments_raw = data_for_comments_raw.replace(",", delim)
+        # Create string representation of data with attrs written at start
+        output_data_rows = []
+        for k, v in comments_attrs.items():
+            # Make sure delims being used are sanitized from comment lines
+            k = k.replace(delim, "")
+            v = v.replace(delim, "")
+            output_data_rows.append(f"{comment}{k}:{v}\n")
+        output_data = "".join(output_data_rows)
+        output_data = output_data + data_for_comments_raw
+        return output_data
+
+    def test_comment_writer_csv(
+        self, comments_attrs, data_for_comments_raw, frame_for_comments
+    ):
+        comment = "#"
+        delim = ","
+        output_data = self.prepate_string_rep_of_comment_output(
+            delim, comments_attrs, data_for_comments_raw, frame_for_comments
+        )
+        read_output = read_csv(StringIO(output_data), comment=comment)
+
+        # Check output data can be read correctly
+        tm.assert_frame_equal(
+            read_output, frame_for_comments
+        ), "Frame read from test data did not match expected results."
+
+        # Check saved output is as expected
+        with tm.ensure_clean() as path:
+            frame_for_comments.to_csv(path, comment=comment, index=False)
+            with open(path, encoding="utf-8") as fp:
+                lines = fp.read()
+                assert (
+                    lines == output_data
+                ), "csv output with comment lines not as expected"
+
+    def test_comment_writer_tabs(
+        self, comments_attrs, data_for_comments_raw, frame_for_comments
+    ):
+        comment = "#"
+        delim = "\t"
+        output_data = self.prepate_string_rep_of_comment_output(
+            delim, comments_attrs, data_for_comments_raw, frame_for_comments
+        )
+        read_output = read_csv(StringIO(output_data), comment=comment, sep="\t")
+
+        tm.assert_frame_equal(
+            read_output, frame_for_comments
+        ), "Read tab outputs are not as expected"
+        with tm.ensure_clean() as path:
+            frame_for_comments.to_csv(path, comment=comment, index=False, sep="\t")
+            with open(path, encoding="utf-8") as fp:
+                lines = fp.read()
+                assert (
+                    lines == output_data
+                ), "tsv output with comment lines not as expected"