From fb623e79c145120100f522af0c75501db5a3f91b Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Fri, 1 Feb 2019 15:44:42 +0100 Subject: [PATCH 01/14] Convert query to working doctest --- pandas/core/frame.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 78c9f2aa96472..b9108c6c195bb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3076,9 +3076,23 @@ def query(self, expr, inplace=False, **kwargs): Examples -------- - >>> df = pd.DataFrame(np.random.randn(10, 2), columns=list('ab')) - >>> df.query('a > b') - >>> df[df.a > df.b] # same result as the previous expression + >>> df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)}) + >>> df + A B + 0 1 10 + 1 2 8 + 2 3 6 + 3 4 4 + 4 5 2 + >>> df.query('A > B') + A B + 4 5 2 + + The previous expression is equivalent to + + >>> df[df.A > df.B] + A B + 4 5 2 """ inplace = validate_bool_kwarg(inplace, 'inplace') if not isinstance(expr, compat.string_types): From a05cc9cdee26949b044afb7921f419c794e6a1b6 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Fri, 1 Feb 2019 15:58:20 +0100 Subject: [PATCH 02/14] Convert frame.round to working doctest --- pandas/core/frame.py | 63 ++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b9108c6c195bb..c194f94e8ba2a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6902,8 +6902,7 @@ def merge(self, right, how='inner', on=None, left_on=None, right_on=None, copy=copy, indicator=indicator, validate=validate) def round(self, decimals=0, *args, **kwargs): - """ - Round a DataFrame to a variable number of decimal places. + """Round a DataFrame to a variable number of decimal places. Parameters ---------- @@ -6928,29 +6927,47 @@ def round(self, decimals=0, *args, **kwargs): Examples -------- - >>> df = pd.DataFrame(np.random.random([3, 3]), - ... columns=['A', 'B', 'C'], index=['first', 'second', 'third']) + >>> df = pd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)], + ... columns=['dogs', 'cats']) >>> df - A B C - first 0.028208 0.992815 0.173891 - second 0.038683 0.645646 0.577595 - third 0.877076 0.149370 0.491027 - >>> df.round(2) - A B C - first 0.03 0.99 0.17 - second 0.04 0.65 0.58 - third 0.88 0.15 0.49 - >>> df.round({'A': 1, 'C': 2}) - A B C - first 0.0 0.992815 0.17 - second 0.0 0.645646 0.58 - third 0.9 0.149370 0.49 - >>> decimals = pd.Series([1, 0, 2], index=['A', 'B', 'C']) + dogs cats + 0 0.21 0.32 + 1 0.01 0.67 + 2 0.66 0.03 + 3 0.21 0.18 + + By providing an integer each column is rounded to the same number + of places + + >>> df.round(1) + dogs cats + 0 0.2 0.3 + 1 0.0 0.7 + 2 0.7 0.0 + 3 0.2 0.2 + + With a dict, the number of places for specific columns can be + specfified with the column names as key and the number of places as + value + + >>> df.round({'dogs': 1, 'cats': 0}) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + + Using a Series, the number of places for specific columns can be + specfified with the column names as index and the number of places as + value + + >>> decimals = pd.Series([0, 1], index=['cats', 'dogs']) >>> df.round(decimals) - A B C - first 0.0 1 0.17 - second 0.0 1 0.58 - third 0.9 0 0.49 + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 """ from pandas.core.reshape.concat import concat From 3153d6a5c59118fd0bdf3efdf033b53e892abc90 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Fri, 1 Feb 2019 16:10:00 +0100 Subject: [PATCH 03/14] Add parameter and return info --- pandas/core/frame.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c194f94e8ba2a..596c740f94e4f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6902,7 +6902,8 @@ def merge(self, right, how='inner', on=None, left_on=None, right_on=None, copy=copy, indicator=indicator, validate=validate) def round(self, decimals=0, *args, **kwargs): - """Round a DataFrame to a variable number of decimal places. + """ + Round a DataFrame to a variable number of decimal places. Parameters ---------- @@ -6915,10 +6916,18 @@ def round(self, decimals=0, *args, **kwargs): columns not included in `decimals` will be left as is. Elements of `decimals` which are not columns of the input will be ignored. + *args + Additional keywords have no effect but might be accepted for + compatibility with numpy. + **kwargs + Additional keywords have no effect but might be accepted for + compatibility with numpy. Returns ------- - DataFrame + DataFrame : + A DataFrame with the affected columns rounded to the specified + number of decimal places See Also -------- From 846e683bce8f1572432501a154833ea21fe612b7 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Fri, 1 Feb 2019 16:10:42 +0100 Subject: [PATCH 04/14] Sepcify decimal places --- pandas/core/frame.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 596c740f94e4f..3545e64473c2f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6946,7 +6946,7 @@ def round(self, decimals=0, *args, **kwargs): 3 0.21 0.18 By providing an integer each column is rounded to the same number - of places + of decimal places >>> df.round(1) dogs cats @@ -6956,8 +6956,8 @@ def round(self, decimals=0, *args, **kwargs): 3 0.2 0.2 With a dict, the number of places for specific columns can be - specfified with the column names as key and the number of places as - value + specfified with the column names as key and the number of decimal + places as value >>> df.round({'dogs': 1, 'cats': 0}) dogs cats @@ -6967,8 +6967,8 @@ def round(self, decimals=0, *args, **kwargs): 3 0.2 0.0 Using a Series, the number of places for specific columns can be - specfified with the column names as index and the number of places as - value + specfified with the column names as index and the number of + decimal places as value >>> decimals = pd.Series([0, 1], index=['cats', 'dogs']) >>> df.round(decimals) From 44c5c307c22c935f0ee73266b8e5a75e8a4c6ccb Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Fri, 1 Feb 2019 16:20:26 +0100 Subject: [PATCH 05/14] Improve docstring round method --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3545e64473c2f..bffcd4e8c7415 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6927,12 +6927,12 @@ def round(self, decimals=0, *args, **kwargs): ------- DataFrame : A DataFrame with the affected columns rounded to the specified - number of decimal places + number of decimal places. See Also -------- - numpy.around - Series.round + numpy.around : Round a numpy array to the given number of decimals. + Series.round : Round a Series to the given number of decimals. Examples -------- From 967455e9b6f76324581316a4a1b8d8997735156e Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Fri, 1 Feb 2019 16:29:48 +0100 Subject: [PATCH 06/14] Improve dosctring of query method --- pandas/core/frame.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bffcd4e8c7415..548d0ccd8bc4a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3016,28 +3016,30 @@ def query(self, expr, inplace=False, **kwargs): Parameters ---------- - expr : string + expr : str The query string to evaluate. You can refer to variables in the environment by prefixing them with an '@' character like ``@a + b``. inplace : bool Whether the query should modify the data in place or return - a modified copy - - .. versionadded:: 0.18.0 - - kwargs : dict + a modified copy. + **kwargs See the documentation for :func:`pandas.eval` for complete details on the keyword arguments accepted by :meth:`DataFrame.query`. + .. versionadded:: 0.18.0 + Returns ------- - q : DataFrame + DataFrame + DataFrame resulting from the provided query expression. See Also -------- - pandas.eval - DataFrame.eval + eval : Evaluate a string describing operations on + DataFrame columns. + DataFrame.eval : Evaluate a string describing operations on + DataFrame columns. Notes ----- From 478c119af87a08112e412c86a40bd797ed7f2e17 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Fri, 1 Feb 2019 16:37:02 +0100 Subject: [PATCH 07/14] Fix pivot_table doctests --- pandas/core/frame.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 548d0ccd8bc4a..d016cfb0f4747 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5731,19 +5731,19 @@ def pivot(self, index=None, columns=None, values=None): This first example aggregates values by taking the sum. - >>> table = pivot_table(df, values='D', index=['A', 'B'], + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], ... columns=['C'], aggfunc=np.sum) >>> table C large small A B - bar one 4 5 - two 7 6 - foo one 4 1 - two NaN 6 + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 We can also fill missing values using the `fill_value` parameter. - >>> table = pivot_table(df, values='D', index=['A', 'B'], + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], ... columns=['C'], aggfunc=np.sum, fill_value=0) >>> table C large small @@ -5755,12 +5755,11 @@ def pivot(self, index=None, columns=None, values=None): The next example aggregates by taking the mean across multiple columns. - >>> table = pivot_table(df, values=['D', 'E'], index=['A', 'C'], + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], ... aggfunc={'D': np.mean, ... 'E': np.mean}) >>> table - D E - mean mean + D E A C bar large 5.500000 7.500000 small 5.500000 8.500000 @@ -5770,17 +5769,17 @@ def pivot(self, index=None, columns=None, values=None): We can also calculate multiple types of aggregations for any given value column. - >>> table = pivot_table(df, values=['D', 'E'], index=['A', 'C'], + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], ... aggfunc={'D': np.mean, ... 'E': [min, max, np.mean]}) >>> table - D E - mean max mean min + D E + mean max mean min A C - bar large 5.500000 9 7.500000 6 - small 5.500000 9 8.500000 8 - foo large 2.000000 5 4.500000 4 - small 2.333333 6 4.333333 2 + bar large 5.500000 9.0 7.500000 6.0 + small 5.500000 9.0 8.500000 8.0 + foo large 2.000000 5.0 4.500000 4.0 + small 2.333333 6.0 4.333333 2.0 """ @Substitution('') From dd04260fc31bd435762032561354e2d1cf851986 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 2 Feb 2019 19:39:55 +0100 Subject: [PATCH 08/14] Fix docstring of ewm method --- pandas/core/window.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 5a9157b43ecd6..7674a0c49d7e6 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -2116,8 +2116,8 @@ def _constructor(self): class EWM(_Rolling): - r""" - Provides exponential weighted functions. + """ + Provide exponential weighted functions. .. versionadded:: 0.18.0 @@ -2125,16 +2125,16 @@ class EWM(_Rolling): ---------- com : float, optional Specify decay in terms of center of mass, - :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0` + :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`. span : float, optional Specify decay in terms of span, - :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1` + :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`. halflife : float, optional Specify decay in terms of half-life, - :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0` + :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0`. alpha : float, optional Specify smoothing factor :math:`\alpha` directly, - :math:`0 < \alpha \leq 1` + :math:`0 < \alpha \leq 1`. .. versionadded:: 0.18.0 @@ -2143,14 +2143,18 @@ class EWM(_Rolling): (otherwise result is NA). adjust : bool, default True Divide by decaying adjustment factor in beginning periods to account - for imbalance in relative weightings (viewing EWMA as a moving average) + for imbalance in relative weightings (viewing EWMA as a moving average). ignore_na : bool, default False Ignore missing values when calculating weights; - specify True to reproduce pre-0.15.0 behavior + specify True to reproduce pre-0.15.0 behavior. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. The value 0 identifies the rows, and 1 + identifies the columns. Returns ------- - a Window sub-classed for the particular operation + DataFrame + A Window sub-classed for the particular operation. See Also -------- @@ -2188,6 +2192,7 @@ class EWM(_Rolling): -------- >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df B 0 0.0 1 1.0 From a8feb38473ab08db0c70d7e5f11a0a8516c22149 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 2 Feb 2019 19:58:42 +0100 Subject: [PATCH 09/14] Fix doctest combine --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d016cfb0f4747..27b1cf7d407a8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5224,8 +5224,8 @@ def combine(self, other, func, fill_value=None, overwrite=True): >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [None, 3]}) >>> df1.combine(df2, take_smaller, fill_value=-5) - A B - 0 0 NaN + A B + 0 0 -5.0 1 0 3.0 Example that demonstrates the use of `overwrite` and behavior when From 71ae22ff6963a49ba7c3683b2241e503a07195c6 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 2 Feb 2019 20:02:57 +0100 Subject: [PATCH 10/14] Fix doctest DataFrame.axes --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 27b1cf7d407a8..815b6be0027d9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -483,7 +483,7 @@ def axes(self): -------- >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.axes - [RangeIndex(start=0, stop=2, step=1), Index(['coll', 'col2'], + [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], dtype='object')] """ return [self.index, self.columns] From 6182abd8459a567bb909e681f097e41f03f56f22 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 2 Feb 2019 20:09:31 +0100 Subject: [PATCH 11/14] Fix docstring issues Pandas.DataFrame.combine --- pandas/core/frame.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 815b6be0027d9..e7f285e3b4cd4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5157,8 +5157,7 @@ def _combine_const(self, other, func): def combine(self, other, func, fill_value=None, overwrite=True): """ - Perform column-wise combine with another DataFrame based on a - passed function. + Perform column-wise combine with another DataFrame. Combines a DataFrame with `other` DataFrame using `func` to element-wise combine columns. The row and column indexes of the @@ -5174,13 +5173,14 @@ def combine(self, other, func, fill_value=None, overwrite=True): fill_value : scalar value, default None The value to fill NaNs with prior to passing any column to the merge func. - overwrite : boolean, default True + overwrite : bool, default True If True, columns in `self` that do not exist in `other` will be overwritten with NaNs. Returns ------- - result : DataFrame + DataFrame + Combination of the provided DataFrames. See Also -------- @@ -5232,7 +5232,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): the axis differ between the dataframes. >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) - >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1],}, index=[1, 2]) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1], }, index=[1, 2]) >>> df1.combine(df2, take_smaller) A B C 0 NaN NaN NaN @@ -5247,7 +5247,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): Demonstrating the preference of the passed in dataframe. - >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1], }, index=[1, 2]) >>> df2.combine(df1, take_smaller) A B C 0 0.0 NaN NaN From ac15ef24797cb3a248d559d077844c4d0819451a Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 2 Feb 2019 20:16:35 +0100 Subject: [PATCH 12/14] Add doctests to code check --- ci/code_checks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c8bfc564e7573..f71d2ce68f800 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -206,7 +206,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Doctests frame.py' ; echo $MSG pytest -q --doctest-modules pandas/core/frame.py \ - -k"-axes -combine -itertuples -join -pivot_table -query -reindex -reindex_axis -round" + -k" -itertuples -join -reindex -reindex_axis -round" RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests series.py' ; echo $MSG From b5cecdd2035eed7a476c1efd967be0bd5de6b447 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 2 Feb 2019 20:16:50 +0100 Subject: [PATCH 13/14] Fix line length ewm --- pandas/core/window.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 7674a0c49d7e6..25a71886f06be 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -2131,7 +2131,8 @@ class EWM(_Rolling): :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`. halflife : float, optional Specify decay in terms of half-life, - :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0`. + :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } + halflife > 0`. alpha : float, optional Specify smoothing factor :math:`\alpha` directly, :math:`0 < \alpha \leq 1`. @@ -2143,7 +2144,8 @@ class EWM(_Rolling): (otherwise result is NA). adjust : bool, default True Divide by decaying adjustment factor in beginning periods to account - for imbalance in relative weightings (viewing EWMA as a moving average). + for imbalance in relative weightings + (viewing EWMA as a moving average). ignore_na : bool, default False Ignore missing values when calculating weights; specify True to reproduce pre-0.15.0 behavior. From 2683b2201b944107a198832b1668e2775a476e1a Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sun, 3 Feb 2019 11:04:11 +0100 Subject: [PATCH 14/14] Fix docstring math rendering --- pandas/core/window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 25a71886f06be..5556a01307a7e 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -2116,7 +2116,7 @@ def _constructor(self): class EWM(_Rolling): - """ + r""" Provide exponential weighted functions. .. versionadded:: 0.18.0