Merge pull request #7733 from sinhrks/tsplot_bug

jreback · jreback · commit bfa9bc5583ed · 2014-07-24T08:59:45.000-04:00
BUG: Repeated timeseries plot may result in incorrect kind
diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
@@ -245,12 +245,18 @@ Bug Fixes
 - Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`)
 
 
+
 - Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is reffered internally (:issue:`7748`)
 
 - Bug in pickle deserialization that failed for pre-0.14.1 containers with dup items trying to avoid ambiguity
   when matching block and manager items, when there's only one block there's no ambiguity (:issue:`7794`)
 
 
+
+- Bug in repeated timeseries line and area plot may result in ``ValueError`` or  incorrect kind (:issue:`7733`)
+
+
+
 - Bug in ``is_superperiod`` and ``is_subperiod`` cannot handle higher frequencies than ``S`` (:issue:`7760`, :issue:`7772`, :issue:`7803`)
 
 - Bug in ``DataFrame.reset_index`` which has ``MultiIndex`` contains ``PeriodIndex`` or ``DatetimeIndex`` with tz raises ``ValueError`` (:issue:`7746`, :issue:`7793`)
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
@@ -1564,10 +1564,8 @@ def _make_plot(self):
 
             label = com.pprint_thing(label)  # .encode('utf-8')
             kwds['label'] = label
-            y_values = self._get_stacked_values(y, label)
 
-            newlines = plotf(ax, x, y_values, style=style, **kwds)
-            self._update_prior(y)
+            newlines = plotf(ax, x, y, style=style, column_num=i, **kwds)
             self._add_legend_handle(newlines[0], label, index=i)
 
             lines = _get_all_lines(ax)
@@ -1586,6 +1584,18 @@ def _get_stacked_values(self, y, label):
         else:
             return y
 
+    def _get_plot_function(self):
+        f = MPLPlot._get_plot_function(self)
+        def plotf(ax, x, y, style=None, column_num=None, **kwds):
+            # column_num is used to get the target column from protf in line and area plots
+            if column_num == 0:
+                self._initialize_prior(len(self.data))
+            y_values = self._get_stacked_values(y, kwds['label'])
+            lines = f(ax, x, y_values, style=style, **kwds)
+            self._update_prior(y)
+            return lines
+        return plotf
+
     def _get_ts_plot_function(self):
         from pandas.tseries.plotting import tsplot
         plotf = self._get_plot_function()
@@ -1678,11 +1688,13 @@ def _get_plot_function(self):
             raise ValueError("Log-y scales are not supported in area plot")
         else:
             f = MPLPlot._get_plot_function(self)
-            def plotf(ax, x, y, style=None, **kwds):
-                lines = f(ax, x, y, style=style, **kwds)
+            def plotf(ax, x, y, style=None, column_num=0, **kwds):
+                if column_num == 0:
+                    self._initialize_prior(len(self.data))
+                y_values = self._get_stacked_values(y, kwds['label'])
+                lines = f(ax, x, y_values, style=style, **kwds)
 
-                # get data from the line
-                # insert fill_between starting point
+                # get data from the line to get coordinates for fill_between
                 xdata, y_values = lines[0].get_data(orig=False)
 
                 if (y >= 0).all():
@@ -1696,6 +1708,7 @@ def plotf(ax, x, y, style=None, **kwds):
                     kwds['color'] = lines[0].get_color()
 
                 self.plt.Axes.fill_between(ax, xdata, start, y_values, **kwds)
+                self._update_prior(y)
                 return lines
 
         return plotf
diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py
@@ -60,8 +60,7 @@ def tsplot(series, plotf, **kwargs):
     # how to make sure ax.clear() flows through?
     if not hasattr(ax, '_plot_data'):
         ax._plot_data = []
-    ax._plot_data.append((series, kwargs))
-
+    ax._plot_data.append((series, plotf, kwargs))
     lines = plotf(ax, series.index, series.values, **kwargs)
 
     # set date formatter, locators and rescale limits
@@ -118,7 +117,7 @@ def _is_sup(f1, f2):
 
 def _upsample_others(ax, freq, plotf, kwargs):
     legend = ax.get_legend()
-    lines, labels = _replot_ax(ax, freq, plotf, kwargs)
+    lines, labels = _replot_ax(ax, freq, kwargs)
 
     other_ax = None
     if hasattr(ax, 'left_ax'):
@@ -127,7 +126,7 @@ def _upsample_others(ax, freq, plotf, kwargs):
         other_ax = ax.right_ax
 
     if other_ax is not None:
-        rlines, rlabels = _replot_ax(other_ax, freq, plotf, kwargs)
+        rlines, rlabels = _replot_ax(other_ax, freq, kwargs)
         lines.extend(rlines)
         labels.extend(rlabels)
 
@@ -139,7 +138,7 @@ def _upsample_others(ax, freq, plotf, kwargs):
         ax.legend(lines, labels, loc='best', title=title)
 
 
-def _replot_ax(ax, freq, plotf, kwargs):
+def _replot_ax(ax, freq, kwargs):
     data = getattr(ax, '_plot_data', None)
     ax._plot_data = []
     ax.clear()
@@ -148,7 +147,7 @@ def _replot_ax(ax, freq, plotf, kwargs):
     lines = []
     labels = []
     if data is not None:
-        for series, kwds in data:
+        for series, plotf, kwds in data:
             series = series.copy()
             idx = series.index.asfreq(freq, how='S')
             series.index = idx
diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py
@@ -704,9 +704,81 @@ def test_from_weekly_resampling(self):
         low = Series(np.random.randn(len(idxl)), idxl)
         low.plot()
         ax = high.plot()
+
+        expected_h = idxh.to_period().asi8
+        expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549,
+                               1553, 1558, 1562])
         for l in ax.get_lines():
             self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
 
+            xdata = l.get_xdata(orig=False)
+            if len(xdata) == 12: # idxl lines
+                self.assert_numpy_array_equal(xdata, expected_l)
+            else:
+                self.assert_numpy_array_equal(xdata, expected_h)
+
+    @slow
+    def test_from_resampling_area_line_mixed(self):
+        idxh = date_range('1/1/1999', periods=52, freq='W')
+        idxl = date_range('1/1/1999', periods=12, freq='M')
+        high = DataFrame(np.random.rand(len(idxh), 3),
+                         index=idxh, columns=[0, 1, 2])
+        low = DataFrame(np.random.rand(len(idxl), 3),
+                     index=idxl, columns=[0, 1, 2])
+
+        # low to high
+        for kind1, kind2 in [('line', 'area'), ('area', 'line')]:
+            ax = low.plot(kind=kind1, stacked=True)
+            ax = high.plot(kind=kind2, stacked=True, ax=ax)
+
+            # check low dataframe result
+            expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549,
+                                   1553, 1558, 1562])
+            expected_y = np.zeros(len(expected_x))
+            for i in range(3):
+                l = ax.lines[i]
+                self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
+                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
+                # check stacked values are correct
+                expected_y += low[i].values
+                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)
+
+            # check high dataframe result
+            expected_x = idxh.to_period().asi8
+            expected_y = np.zeros(len(expected_x))
+            for i in range(3):
+                l = ax.lines[3 + i]
+                self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
+                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
+                expected_y += high[i].values
+                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)
+
+        # high to low
+        for kind1, kind2 in [('line', 'area'), ('area', 'line')]:
+            ax = high.plot(kind=kind1, stacked=True)
+            ax = low.plot(kind=kind2, stacked=True, ax=ax)
+
+            # check high dataframe result
+            expected_x = idxh.to_period().asi8
+            expected_y = np.zeros(len(expected_x))
+            for i in range(3):
+                l = ax.lines[i]
+                self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
+                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
+                expected_y += high[i].values
+                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)
+
+            # check low dataframe result
+            expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549,
+                                   1553, 1558, 1562])
+            expected_y = np.zeros(len(expected_x))
+            for i in range(3):
+                l = ax.lines[3 + i]
+                self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
+                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
+                expected_y += low[i].values
+                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)
+
     @slow
     def test_mixed_freq_second_millisecond(self):
         # GH 7772, GH 7760