From 8f8c450b52c3f2f1bab8c9ac5f7250a41623820d Mon Sep 17 00:00:00 2001 From: Juan Colonese <47576248+colonesej@users.noreply.github.com> Date: Thu, 12 Mar 2020 14:56:30 -0300 Subject: [PATCH 01/10] BUG: Fix to_json when converting Period column Fix GH31917. AttributeError when trying to access frequency string directly on Series --- pandas/io/json/_table_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 4e42533ca2744..2c3ebe834cf1c 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -114,7 +114,7 @@ def convert_pandas_type_to_json_field(arr, dtype=None): field["constraints"] = {"enum": list(cats)} field["ordered"] = ordered elif is_period_dtype(arr): - field["freq"] = arr.freqstr + field["freq"] = arr.dtype.freq.freqstr elif is_datetime64tz_dtype(arr): if hasattr(arr, "dt"): field["tz"] = arr.dt.tz.zone From 527e2ef85b45b81ea58b5ac505d28e7f9ba93e63 Mon Sep 17 00:00:00 2001 From: Juan Colonese <47576248+colonesej@users.noreply.github.com> Date: Mon, 16 Mar 2020 08:37:53 -0300 Subject: [PATCH 02/10] Update v1.1.0.rst --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5b6f70be478c2..90c87c78b4379 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -306,7 +306,7 @@ I/O - Bug in :meth:`read_csv` was raising `TypeError` when `sep=None` was used in combination with `comment` keyword (:issue:`31396`) - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`) - Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`) - +- Bug in :meth:`to_json` was raising `AttributeError` with column or Series of type `Period` (:issue:`31917`). Plotting ^^^^^^^^ From a7b2310d333db6a596e8092a6c92ee232c8029e7 Mon Sep 17 00:00:00 2001 From: Juan Colonese Date: Wed, 15 Apr 2020 18:25:03 -0300 Subject: [PATCH 03/10] first attempt rountrip tests for to_json on period series --- pandas/conftest.py | 7 +++++++ pandas/tests/io/json/test_pandas.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index e1088dae3925a..f2e99ccfcc856 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -472,6 +472,13 @@ def _create_series(index): for index_id, index in indices_dict.items() } +@pytest.fixture +def period_series(): + """Fixture for Series with Period-type index. + """ + s = tm.makePeriodSeries() + s.name = 'ps' + return s @pytest.fixture def series_with_simple_index(indices): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b74abc965f7fa..239c2a2eefb10 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -650,6 +650,21 @@ def test_series_roundtrip_timeseries(self, orient, numpy, datetime_series): tm.assert_series_equal(result, expected) + def test_series_roundtrip_periodseries(self, orient, period_series): + # GH32665: Fix to_json when converting Period column/series + data = period_series.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient) + + expected = period_series + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + if orient in ("index","columns"): + result.index = result.index.to_period() + if orient != "split": + expected.name = None + + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("dtype", [np.float64, np.int]) @pytest.mark.parametrize("numpy", [True, False]) def test_series_roundtrip_numeric(self, orient, numpy, dtype): From 2e674a04e7c4d356178ed1dfe947682d57051d42 Mon Sep 17 00:00:00 2001 From: Juan Colonese <47576248+colonesej@users.noreply.github.com> Date: Wed, 15 Apr 2020 18:33:19 -0300 Subject: [PATCH 04/10] Update conftest.py --- pandas/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index f2e99ccfcc856..d4c3338ec6cb2 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -472,6 +472,7 @@ def _create_series(index): for index_id, index in indices_dict.items() } + @pytest.fixture def period_series(): """Fixture for Series with Period-type index. @@ -480,6 +481,7 @@ def period_series(): s.name = 'ps' return s + @pytest.fixture def series_with_simple_index(indices): """ From 67f0f4fe9816b1839faa5d52813d4a4ebe61a862 Mon Sep 17 00:00:00 2001 From: Juan Colonese <47576248+colonesej@users.noreply.github.com> Date: Wed, 15 Apr 2020 18:34:51 -0300 Subject: [PATCH 05/10] Update test_pandas.py --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c5c7e70855d69..7832ff82d0639 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -658,7 +658,7 @@ def test_series_roundtrip_periodseries(self, orient, period_series): expected = period_series if orient in ("values", "records"): expected = expected.reset_index(drop=True) - if orient in ("index","columns"): + if orient in ("index", "columns"): result.index = result.index.to_period() if orient != "split": expected.name = None From 25287b602f2fe596ebac044318c10f59038a3267 Mon Sep 17 00:00:00 2001 From: Juan Colonese <47576248+colonesej@users.noreply.github.com> Date: Thu, 16 Apr 2020 17:18:22 -0300 Subject: [PATCH 06/10] Update v1.1.0.rst --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5a5f1d25d7b7b..bc638611ad414 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -529,7 +529,7 @@ I/O - Bug in :meth:`DataFrame.to_sql` where an ``AttributeError`` was raised when saving an out of bounds date (:issue:`26761`) - Bug in :meth:`read_excel` did not correctly handle multiple embedded spaces in OpenDocument text cells. (:issue:`32207`) - Bug in :meth:`read_json` was raising ``TypeError`` when reading a list of booleans into a Series. (:issue:`31464`) -- Bug in :meth:`to_json` was raising `AttributeError` with column or Series of type `Period` (:issue:`31917`). +- Bug in :meth:`to_json` was raising ``AttributeError`` with column or Series of `PeriodDtype` (:issue:`31917`) Plotting ^^^^^^^^ From 38ea13bf718d63377bb4ede33d8492dec618b8f2 Mon Sep 17 00:00:00 2001 From: Juan Colonese Date: Fri, 17 Apr 2020 11:58:38 -0300 Subject: [PATCH 07/10] to_json roundtrip tests with frames with periodDtype index and column --- pandas/conftest.py | 29 ++++++++++++++++++++++++ pandas/tests/io/json/test_pandas.py | 35 +++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index d4c3338ec6cb2..e9327034b736b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -593,6 +593,35 @@ def datetime_frame(): return DataFrame(tm.getTimeSeriesData()) +@pytest.fixture +def period_frame(): + """ + Fixture for DataFrame of floats with PeriodIndex + + Columns are ['A', 'B', 'C', 'D'] + + A B C D + 2000-01-03 -1.122153 0.468535 0.122226 1.693711 + 2000-01-04 0.189378 0.486100 0.007864 -1.216052 + 2000-01-05 0.041401 -0.835752 -0.035279 -0.414357 + 2000-01-06 0.430050 0.894352 0.090719 0.036939 + 2000-01-07 -0.620982 -0.668211 -0.706153 1.466335 + 2000-01-10 -0.752633 0.328434 -0.815325 0.699674 + 2000-01-11 -2.236969 0.615737 -0.829076 -1.196106 + ... ... ... ... ... + 2000-02-03 1.642618 -0.579288 0.046005 1.385249 + 2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351 + 2000-02-07 -2.656149 -0.601387 1.410148 0.444150 + 2000-02-08 -1.201881 -1.289040 0.772992 -1.445300 + 2000-02-09 1.377373 0.398619 1.008453 -0.928207 + 2000-02-10 0.473194 -0.636677 0.984058 0.511519 + 2000-02-11 -0.965556 0.408313 -1.312844 -0.381948 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getPeriodData()) + + @pytest.fixture def float_frame(): """ diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7832ff82d0639..4a802b13742e5 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -365,6 +365,41 @@ def test_frame_to_json_except(self): with pytest.raises(ValueError, match=msg): df.to_json(orient="garbage") + + def test_frame_roundtrip_period_index(self, orient, period_frame): + # GH32665: Fix to_json when converting Period column/series + data = period_frame.to_json(orient=orient) + result = pd.read_json(data, typ="frame", orient=orient) + + expected = period_frame.copy() + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + if orient == 'values': + # drop column names as well + expected = expected.T.reset_index(drop=True).T + if orient in ("index", "columns"): + result.index = result.index.to_period() + if orient != "split": + expected.name = None + + tm.assert_frame_equal(result, expected) + + + def test_frame_roundtrip_period_columns(self, orient, period_frame): + # GH32665: Fix to_json when converting Period column/series + test_frame = period_frame.reset_index() + data = test_frame.to_json(orient=orient) + result = pd.read_json(data, typ="frame", orient=orient) + + expected = test_frame + if orient == 'values': + expected.colummns = range(len(expected.columns)) + if orient != "split": + expected.name = None + + tm.assert_frame_equal(result, expected) + + def test_frame_empty(self): df = DataFrame(columns=["jim", "joe"]) assert not df._is_mixed_type From 68102cf6f780e439d2222c47383252b136a0cb37 Mon Sep 17 00:00:00 2001 From: Juan Colonese Date: Fri, 17 Apr 2020 12:18:05 -0300 Subject: [PATCH 08/10] explicitly giving frequency before comparing roundtrip tests on PeriodDtype indexes --- pandas/tests/io/json/test_pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 4a802b13742e5..c44c5443d82e8 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -378,7 +378,7 @@ def test_frame_roundtrip_period_index(self, orient, period_frame): # drop column names as well expected = expected.T.reset_index(drop=True).T if orient in ("index", "columns"): - result.index = result.index.to_period() + result.index = result.index.to_period(freq=expected.index.freq) if orient != "split": expected.name = None @@ -694,7 +694,7 @@ def test_series_roundtrip_periodseries(self, orient, period_series): if orient in ("values", "records"): expected = expected.reset_index(drop=True) if orient in ("index", "columns"): - result.index = result.index.to_period() + result.index = result.index.to_period(freq=expected.index.freq) if orient != "split": expected.name = None From 09cf5f5e6fbbac3bf689cc1549b8758e328d3bc1 Mon Sep 17 00:00:00 2001 From: Juan Colonese Date: Wed, 10 Jun 2020 09:23:16 -0300 Subject: [PATCH 09/10] pep8 corrections --- pandas/tests/io/json/test_pandas.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index aebd64dedcb86..36c73e3ef443b 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -365,13 +365,11 @@ def test_frame_to_json_except(self): with pytest.raises(ValueError, match=msg): df.to_json(orient="garbage") - def test_frame_roundtrip_period_index(self, orient, period_frame): # GH32665: Fix to_json when converting Period column/series - if orient == 'split': pytest.skip('skipping orient=split due to different conversion schema') - + data = period_frame.to_json(orient=orient) result = pd.read_json(data, typ="frame", orient=orient) @@ -383,12 +381,11 @@ def test_frame_roundtrip_period_index(self, orient, period_frame): expected = expected.T.reset_index(drop=True).T if orient in ("index", "columns"): result.index = result.index.to_period(freq=expected.index.freq) - expected.name = None tm.assert_frame_equal(result, expected) - @pytest.mark.skip(reason = 'Conversion of Period-like column in dict-like format') + @pytest.mark.skip(reason='Conversion of Period-like column in dict-like format') def test_frame_roundtrip_period_columns(self, orient, period_frame): # GH32665: Fix to_json when converting Period column/series @@ -404,7 +401,6 @@ def test_frame_roundtrip_period_columns(self, orient, period_frame): tm.assert_frame_equal(result, expected) - def test_frame_empty(self): df = DataFrame(columns=["jim", "joe"]) assert not df._is_mixed_type @@ -694,7 +690,7 @@ def test_series_roundtrip_periodseries(self, orient, period_series): # GH32665: Fix to_json when converting Period column/series if orient == 'split': pytest.skip('skipping orient=split due to different conversion schema') - + data = period_series.to_json(orient=orient) result = pd.read_json(data, typ="series", orient=orient) From 223380d866cbd000600dbbf120ffeaa2d565cac8 Mon Sep 17 00:00:00 2001 From: Juan Colonese Date: Wed, 10 Jun 2020 10:20:45 -0300 Subject: [PATCH 10/10] black reformatting --- pandas/conftest.py | 2 +- pandas/tests/io/json/test_pandas.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 2dc08f6393785..229d2e3894b62 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -482,7 +482,7 @@ def period_series(): """Fixture for Series with Period-type index. """ s = tm.makePeriodSeries() - s.name = 'ps' + s.name = "ps" return s diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 36c73e3ef443b..9543829bd55bb 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -367,8 +367,8 @@ def test_frame_to_json_except(self): def test_frame_roundtrip_period_index(self, orient, period_frame): # GH32665: Fix to_json when converting Period column/series - if orient == 'split': - pytest.skip('skipping orient=split due to different conversion schema') + if orient == "split": + pytest.skip("skipping orient=split due to different conversion schema") data = period_frame.to_json(orient=orient) result = pd.read_json(data, typ="frame", orient=orient) @@ -376,7 +376,7 @@ def test_frame_roundtrip_period_index(self, orient, period_frame): expected = period_frame.copy() if orient in ("values", "records"): expected = expected.reset_index(drop=True) - if orient == 'values': + if orient == "values": # drop column names as well expected = expected.T.reset_index(drop=True).T if orient in ("index", "columns"): @@ -385,7 +385,7 @@ def test_frame_roundtrip_period_index(self, orient, period_frame): tm.assert_frame_equal(result, expected) - @pytest.mark.skip(reason='Conversion of Period-like column in dict-like format') + @pytest.mark.skip(reason="Conversion of Period-like column in dict-like format") def test_frame_roundtrip_period_columns(self, orient, period_frame): # GH32665: Fix to_json when converting Period column/series @@ -394,7 +394,7 @@ def test_frame_roundtrip_period_columns(self, orient, period_frame): result = pd.read_json(data, typ="frame", orient=orient) expected = test_frame - if orient == 'values': + if orient == "values": expected.colummns = range(len(expected.columns)) if orient != "split": expected.name = None @@ -688,8 +688,8 @@ def test_series_roundtrip_timeseries(self, orient, numpy, datetime_series): def test_series_roundtrip_periodseries(self, orient, period_series): # GH32665: Fix to_json when converting Period column/series - if orient == 'split': - pytest.skip('skipping orient=split due to different conversion schema') + if orient == "split": + pytest.skip("skipping orient=split due to different conversion schema") data = period_series.to_json(orient=orient) result = pd.read_json(data, typ="series", orient=orient)