diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e0e9048b20a9d..d3a13ad5485ad 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3684,6 +3684,7 @@ def to_csv( decimal: str = ..., errors: OpenFileErrors = ..., storage_options: StorageOptions = ..., + comment: str | None = ..., ) -> str: ... @@ -3711,6 +3712,7 @@ def to_csv( decimal: str = ..., errors: OpenFileErrors = ..., storage_options: StorageOptions = ..., + comment: str | None = ..., ) -> None: ... @@ -3742,6 +3744,7 @@ def to_csv( decimal: str = ".", errors: OpenFileErrors = "strict", storage_options: StorageOptions | None = None, + comment: str | None = None, ) -> str | None: r""" Write object to a comma-separated values (csv) file. @@ -3847,6 +3850,13 @@ def to_csv( .. versionadded:: 1.2.0 + comment : str, default None + If set the key and values of df.attrs will be written to the + beginning of the csv file, prefixed by this value, each key/value + pair to a single ling. To prevent downstream reading issues + this char will be removed from the df.attrs if present. + Complement of pd.read_csv's 'comment' param. + Returns ------- None or str @@ -3913,6 +3923,7 @@ def to_csv( doublequote=doublequote, escapechar=escapechar, storage_options=storage_options, + comment=comment, ) # ---------------------------------------------------------------------- diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 39abb0bf127d9..2b315b36b2aba 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -72,6 +72,7 @@ def __init__( doublequote: bool = True, escapechar: str | None = None, storage_options: StorageOptions | None = None, + comment: str | None = None, ) -> None: self.fmt = formatter @@ -94,6 +95,7 @@ def __init__( self.date_format = date_format self.cols = self._initialize_columns(cols) self.chunksize = self._initialize_chunksize(chunksize) + self.comment = comment @property def na_rep(self) -> str: @@ -265,6 +267,8 @@ def save(self) -> None: self._save() def _save(self) -> None: + if self.comment: + self._save_df_attrs() if self._need_to_save_header: self._save_header() self._save_body() @@ -323,3 +327,10 @@ def _save_chunk(self, start_i: int, end_i: int) -> None: self.cols, self.writer, ) + + def _save_df_attrs(self) -> None: + for key, value in self.fmt.frame.attrs.items(): + # remove the delimiter from the attr string values + key = str(key).replace(self.writer.dialect.delimiter, "") + value = str(value).replace(self.writer.dialect.delimiter, "") + self.writer.writerow([f"{self.comment}{key}:{value}"]) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index a7a6f481ebdde..371c11866fc50 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1118,6 +1118,7 @@ def to_csv( escapechar: str | None = None, errors: str = "strict", storage_options: StorageOptions | None = None, + comment: str | None = None, ) -> str | None: """ Render dataframe as comma-separated file. @@ -1148,6 +1149,7 @@ def to_csv( escapechar=escapechar, storage_options=storage_options, formatter=self.fmt, + comment=comment, ) csv_formatter.save() diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 97cf75acbd629..054ea842b3e6f 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -1,3 +1,5 @@ +from io import StringIO + import numpy as np import pytest @@ -5,6 +7,7 @@ DataFrame, NaT, date_range, + read_csv, ) import pandas._testing as tm @@ -259,3 +262,26 @@ def frame_of_index_cols(): } ) return df + + +@pytest.fixture +def comments_attrs(): + return { + "one": "Hello", + "two": "Hello World", + "three": "Hello, World!", + "four,": "comma in keym", + } + + +@pytest.fixture +def data_for_comments_raw(): + data = "col1,col2,col3\n0,0,0\n1,1,1\n2,2,2\n" + return data + + +@pytest.fixture +def frame_for_comments(data_for_comments_raw, comments_attrs): + df = read_csv(StringIO(data_for_comments_raw)) + df.attrs = comments_attrs + return df diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index ee9c4f05991a0..ed886ff9c0770 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -1310,3 +1310,65 @@ def test_to_csv_categorical_and_interval(self): expected_rows = [",a", '0,"[2020-01-01, 2020-01-02]"'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected + + def prepate_string_rep_of_comment_output( + self, delim: str, comments_attrs, data_for_comments_raw, frame_for_comments + ) -> str: + comment = "#" + + data_for_comments_raw = data_for_comments_raw.replace(",", delim) + # Create string representation of data with attrs written at start + output_data_rows = [] + for k, v in comments_attrs.items(): + # Make sure delims being used are sanitized from comment lines + k = k.replace(delim, "") + v = v.replace(delim, "") + output_data_rows.append(f"{comment}{k}:{v}\n") + output_data = "".join(output_data_rows) + output_data = output_data + data_for_comments_raw + return output_data + + def test_comment_writer_csv( + self, comments_attrs, data_for_comments_raw, frame_for_comments + ): + comment = "#" + delim = "," + output_data = self.prepate_string_rep_of_comment_output( + delim, comments_attrs, data_for_comments_raw, frame_for_comments + ) + read_output = read_csv(StringIO(output_data), comment=comment) + + # Check output data can be read correctly + tm.assert_frame_equal( + read_output, frame_for_comments + ), "Frame read from test data did not match expected results." + + # Check saved output is as expected + with tm.ensure_clean() as path: + frame_for_comments.to_csv(path, comment=comment, index=False) + with open(path, encoding="utf-8") as fp: + lines = fp.read() + assert ( + lines == output_data + ), "csv output with comment lines not as expected" + + def test_comment_writer_tabs( + self, comments_attrs, data_for_comments_raw, frame_for_comments + ): + comment = "#" + delim = "\t" + output_data = self.prepate_string_rep_of_comment_output( + delim, comments_attrs, data_for_comments_raw, frame_for_comments + ) + read_output = read_csv(StringIO(output_data), comment=comment, sep="\t") + + tm.assert_frame_equal( + read_output, frame_for_comments + ), "Read tab outputs are not as expected" + with tm.ensure_clean() as path: + frame_for_comments.to_csv(path, comment=comment, index=False, sep="\t") + with open(path, encoding="utf-8") as fp: + lines = fp.read() + assert ( + lines == output_data + ), "tsv output with comment lines not as expected"