pandas-dev
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎asv_bench/benchmarks/groupby.py
Lines changed: 58 additions & 79 deletions b/‎asv_bench/benchmarks/groupby.py
Lines changed: 58 additions & 79 deletions
diff --git a/‎ci/requirements-3.6_NUMPY_DEV.build.sh
Lines changed: 1 addition & 2 deletions b/‎ci/requirements-3.6_NUMPY_DEV.build.sh
Lines changed: 1 addition & 2 deletions
diff --git a/‎doc/source/contributing.rst
Lines changed: 6 additions & 8 deletions b/‎doc/source/contributing.rst
Lines changed: 6 additions & 8 deletions
@@ -109,3 +109,4 @@ doc/tmp.sv
 doc/source/styled.xlsx
 doc/source/templates/
 env/
+doc/source/savefig/
@@ -14,7 +14,10 @@
 method_blacklist = {
     'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean',
                'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min',
-               'var', 'mad', 'describe', 'std'}
+               'var', 'mad', 'describe', 'std'},
+    'datetime': {'median', 'prod', 'sem', 'cumsum', 'sum', 'mean', 'skew',
+                 'cumprod', 'cummax', 'pct_change', 'var', 'mad', 'describe',
+                 'std'}
 }
 
 
@@ -90,45 +93,6 @@ def time_series_groups(self, data, key):
         self.ser.groupby(self.ser).groups
 
 
-class FirstLast(object):
-
-    goal_time = 0.2
-
-    param_names = ['dtype']
-    params = ['float32', 'float64', 'datetime', 'object']
-
-    def setup(self, dtype):
-        N = 10**5
-        # with datetimes (GH7555)
-        if dtype == 'datetime':
-            self.df = DataFrame({'values': date_range('1/1/2011',
-                                                      periods=N,
-                                                      freq='s'),
-                                 'key': range(N)})
-        elif dtype == 'object':
-            self.df = DataFrame({'values': ['foo'] * N,
-                                 'key': range(N)})
-        else:
-            labels = np.arange(N / 10).repeat(10)
-            data = Series(np.random.randn(len(labels)), dtype=dtype)
-            data[::3] = np.nan
-            data[1::3] = np.nan
-            labels = labels.take(np.random.permutation(len(labels)))
-            self.df = DataFrame({'values': data, 'key': labels})
-
-    def time_groupby_first(self, dtype):
-        self.df.groupby('key').first()
-
-    def time_groupby_last(self, dtype):
-        self.df.groupby('key').last()
-
-    def time_groupby_nth_all(self, dtype):
-        self.df.groupby('key').nth(0, dropna='all')
-
-    def time_groupby_nth_none(self, dtype):
-        self.df.groupby('key').nth(0)
-
-
 class GroupManyLabels(object):
 
     goal_time = 0.2
@@ -149,39 +113,40 @@ class Nth(object):
 
     goal_time = 0.2
 
-    def setup_cache(self):
-        df = DataFrame(np.random.randint(1, 100, (10000, 2)))
-        df.iloc[1, 1] = np.nan
-        return df
-
-    def time_frame_nth_any(self, df):
-        df.groupby(0).nth(0, dropna='any')
-
-    def time_frame_nth(self, df):
-        df.groupby(0).nth(0)
-
+    param_names = ['dtype']
+    params = ['float32', 'float64', 'datetime', 'object']
 
-    def time_series_nth_any(self, df):
-        df[1].groupby(df[0]).nth(0, dropna='any')
+    def setup(self, dtype):
+        N = 10**5
+        # with datetimes (GH7555)
+        if dtype == 'datetime':
+            values = date_range('1/1/2011', periods=N, freq='s')
+        elif dtype == 'object':
+            values = ['foo'] * N
+        else:
+            values = np.arange(N).astype(dtype)
 
-    def time_series_nth(self, df):
-        df[1].groupby(df[0]).nth(0)
+        key = np.arange(N)
+        self.df = DataFrame({'key': key, 'values': values})
+        self.df.iloc[1, 1] = np.nan  # insert missing data
 
+    def time_frame_nth_any(self, dtype):
+        self.df.groupby('key').nth(0, dropna='any')
 
-class NthObject(object):
+    def time_groupby_nth_all(self, dtype):
+        self.df.groupby('key').nth(0, dropna='all')
 
-    goal_time = 0.2
+    def time_frame_nth(self, dtype):
+        self.df.groupby('key').nth(0)
 
-    def setup_cache(self):
-        df = DataFrame(np.random.randint(1, 100, (10000,)), columns=['g'])
-        df['obj'] = ['a'] * 5000 + ['b'] * 5000
-        return df
+    def time_series_nth_any(self, dtype):
+        self.df['values'].groupby(self.df['key']).nth(0, dropna='any')
 
-    def time_nth(self, df):
-        df.groupby('g').nth(5)
+    def time_groupby_nth_all(self, dtype):
+        self.df['values'].groupby(self.df['key']).nth(0, dropna='all')
 
-    def time_nth_last(self, df):
-        df.groupby('g').last()
+    def time_series_nth(self, dtype):
+        self.df['values'].groupby(self.df['key']).nth(0)
 
 
 class DateAttributes(object):
@@ -243,7 +208,7 @@ def time_multi_count(self, df):
         df.groupby(['key1', 'key2']).count()
 
 
-class CountInt(object):
+class CountMultiInt(object):
 
     goal_time = 0.2
 
@@ -255,18 +220,18 @@ def setup_cache(self):
                         'ints2': np.random.randint(0, 1000, size=n)})
         return df
 
-    def time_int_count(self, df):
+    def time_multi_int_count(self, df):
         df.groupby(['key1', 'key2']).count()
 
-    def time_int_nunique(self, df):
+    def time_multi_int_nunique(self, df):
         df.groupby(['key1', 'key2']).nunique()
 
 
 class AggFunctions(object):
 
     goal_time = 0.2
 
-    def setup_cache(self):
+    def setup_cache():
         N = 10**5
         fac1 = np.array(['A', 'B', 'C'], dtype='O')
         fac2 = np.array(['one', 'two'], dtype='O')
@@ -361,9 +326,6 @@ def setup(self):
     def time_multi_size(self):
         self.df.groupby(['key1', 'key2']).size()
 
-    def time_dt_size(self):
-        self.df.groupby(['dates']).size()
-
     def time_dt_timegrouper_size(self):
         with warnings.catch_warnings(record=True):
             self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
@@ -376,15 +338,16 @@ class GroupByMethods(object):
 
     goal_time = 0.2
 
-    param_names = ['dtype', 'method']
-    params = [['int', 'float', 'object'],
+    param_names = ['dtype', 'method', 'application']
+    params = [['int', 'float', 'object', 'datetime'],
               ['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
                'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
                'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
                'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
-               'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]
+               'std', 'sum', 'tail', 'unique', 'value_counts', 'var'],
+              ['direct', 'transformation']]
 
-    def setup(self, dtype, method):
+    def setup(self, dtype, method, application):
         if method in method_blacklist.get(dtype, {}):
             raise NotImplementedError  # skip benchmark
         ngroups = 1000
@@ -398,12 +361,28 @@ def setup(self, dtype, method):
                                   np.random.random(ngroups) * 10.0])
         elif dtype == 'object':
             key = ['foo'] * size
+        elif dtype == 'datetime':
+            key = date_range('1/1/2011', periods=size, freq='s')
 
         df = DataFrame({'values': values, 'key': key})
-        self.df_groupby_method = getattr(df.groupby('key')['values'], method)
 
-    def time_method(self, dtype, method):
-        self.df_groupby_method()
+        if application == 'transform':
+            if method == 'describe':
+                raise NotImplementedError
+
+            self.as_group_method = lambda: df.groupby(
+                'key')['values'].transform(method)
+            self.as_field_method = lambda: df.groupby(
+                'values')['key'].transform(method)
+        else:
+            self.as_group_method = getattr(df.groupby('key')['values'], method)
+            self.as_field_method = getattr(df.groupby('values')['key'], method)
+
+    def time_dtype_as_group(self, dtype, method, application):
+        self.as_group_method()
+
+    def time_dtype_as_field(self, dtype, method, application):
+        self.as_field_method()
 
 
 class Float32(object):
 
@@ -12,8 +12,7 @@ PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf
 pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy
 
 # install dateutil from master
-# pip install -U git+git://github.com/dateutil/dateutil.git
-pip install dateutil
+pip install -U git+git://github.com/dateutil/dateutil.git
 
 # cython via pip
 pip install cython
 
@@ -262,8 +262,9 @@ after updating.
 Contributing to the documentation
 =================================
 
-If you're not the developer type, contributing to the documentation is still of
-huge value. You don't even have to be an expert on *pandas* to do so! In fact,
+Contributing to the documentation benefits everyone who uses *pandas*.
+We encourage you to help us improve the documentation, and
+you don't have to be an expert on *pandas* to do so! In fact,
 there are sections of the docs that are worse off after being written by
 experts. If something in the docs doesn't make sense to you, updating the
 relevant section after you figure it out is a great way to ensure it will help
@@ -292,12 +293,9 @@ Some other important things to know about the docs:
   overviews per topic together with some other information (what's new,
   installation, etc).
 
-- The docstrings follow the **Numpy Docstring Standard**, which is used widely
-  in the Scientific Python community. This standard specifies the format of
-  the different sections of the docstring. See `this document
-  <https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt>`_
-  for a detailed explanation, or look at some of the existing functions to
-  extend it in a similar manner.
+- The docstrings follow a pandas convention, based on the **Numpy Docstring
+  Standard**. Follow the :ref:`pandas docstring guide <docstring>` for detailed
+  instructions on how to write a correct docstring.
 
 - The tutorials make heavy use of the `ipython directive
   <http://matplotlib.org/sampledoc/ipython_directive.html>`_ sphinx extension.