From 42134e7b266ca18b30385ddb6dff1bcef6f3e26e Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Sat, 8 Oct 2011 23:53:46 +0100 Subject: [PATCH] Use stdlib csv module to write csv, so commas in data are escaped correctly. --- pandas/core/frame.py | 22 +++++++++------------- pandas/core/series.py | 5 +++-- pandas/tests/test_frame.py | 10 ++++++++++ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ec33dfc6441e5..1dc58738c0f9d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -13,6 +13,7 @@ # pylint: disable=W0212,W0231,W0703,W0622 from StringIO import StringIO +import csv import operator import warnings @@ -485,13 +486,13 @@ def to_csv(self, path, nanRep='', cols=None, header=True, mode : Python write mode, default 'wb' """ f = open(path, mode) + csvout = csv.writer(f) if cols is None: cols = self.columns series = self._series if header: - joined_cols = ','.join([str(c) for c in cols]) if index: # should write something for index label if index_label is None: @@ -509,31 +510,26 @@ def to_csv(self, path, nanRep='', cols=None, header=True, elif not isinstance(index_label, (list, tuple, np.ndarray)): # given a string for a DF with Index index_label = [index_label] - f.write('%s,%s' % (",".join(index_label), joined_cols)) + csvout.writerow(list(index_label) + list(cols)) else: - f.write(joined_cols) - f.write('\n') + csvout.writerow(cols) nlevels = getattr(self.index, 'nlevels', 1) for idx in self.index: + row_fields = [] if index: if nlevels == 1: - f.write(str(idx)) + row_fields = [idx] else: # handle MultiIndex - f.write(",".join([str(i) for i in idx])) + row_fields = list(idx) for i, col in enumerate(cols): val = series[col].get(idx) if isnull(val): val = nanRep - else: - val = str(val) - if i > 0 or index: - f.write(',%s' % val) - else: - f.write('%s' % val) + row_fields.append(val) - f.write('\n') + csvout.writerow(row_fields) f.close() diff --git a/pandas/core/series.py b/pandas/core/series.py index 30bd68ab832c7..74e2fdc64437f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5,6 +5,7 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 +import csv import itertools import operator import sys @@ -1589,8 +1590,8 @@ def to_csv(self, path): Output filepath. If None, write to stdout """ f = open(path, 'wb') - for idx, value in self.iteritems(): - f.write(str(idx) + ',' + str(value) + '\n') + csvout = csv.writer(f) + csvout.writerows(self.iteritems()) f.close() def dropna(self): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 119cf2968efa2..60feace3ecb6d 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1582,6 +1582,16 @@ def test_to_csv_float32_nanrep(self): lines = open(pth).readlines() self.assert_(lines[1].split(',')[2] == '999') os.remove(pth) + + def test_to_csv_withcommas(self): + "Commas inside fields should be correctly escaped when saving as CSV." + path = '__tmp__' + df = DataFrame({'A':[1,2,3], 'B':['5,6','7,8','9,0']}) + df.to_csv(path) + df2 = DataFrame.from_csv(path) + assert_frame_equal(df2, df) + + os.remove(path) def test_info(self): io = StringIO()