pandas-dev
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 4 deletions b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 4 deletions
diff --git a/‎MANIFEST.in
Lines changed: 5 additions & 0 deletions b/‎MANIFEST.in
Lines changed: 5 additions & 0 deletions
diff --git a/‎Makefile
Lines changed: 1 addition & 1 deletion b/‎Makefile
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 7 additions & 6 deletions b/‎README.md
Lines changed: 7 additions & 6 deletions
diff --git a/‎asv_bench/asv.conf.json
Lines changed: 2 additions & 1 deletion b/‎asv_bench/asv.conf.json
Lines changed: 2 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/frame_methods.py
Lines changed: 11 additions & 0 deletions b/‎asv_bench/benchmarks/frame_methods.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/io/excel.py
Lines changed: 54 additions & 22 deletions b/‎asv_bench/benchmarks/io/excel.py
Lines changed: 54 additions & 22 deletions
diff --git a/‎asv_bench/benchmarks/io/json.py
Lines changed: 2 additions & 2 deletions b/‎asv_bench/benchmarks/io/json.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎asv_bench/benchmarks/package.py
Lines changed: 25 additions & 0 deletions b/‎asv_bench/benchmarks/package.py
Lines changed: 25 additions & 0 deletions
@@ -57,6 +57,7 @@ dist
 # wheel files
 *.whl
 **/wheelhouse/*
+pip-wheel-metadata
 # coverage
 .coverage
 coverage.xml
 
@@ -15,7 +15,4 @@ repos:
     hooks:
     -   id: isort
         language: python_venv
--   repo: https://github.com/asottile/seed-isort-config
-    rev: v1.9.2
-    hooks:
-    -   id: seed-isort-config
+        exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
@@ -3,6 +3,7 @@ include LICENSE
 include RELEASE.md
 include README.md
 include setup.py
+include pyproject.toml
 
 graft doc
 prune doc/build
@@ -14,6 +15,7 @@ graft pandas
 global-exclude *.bz2
 global-exclude *.csv
 global-exclude *.dta
+global-exclude *.feather
 global-exclude *.gz
 global-exclude *.h5
 global-exclude *.html
@@ -23,7 +25,10 @@ global-exclude *.pickle
 global-exclude *.png
 global-exclude *.pyc
 global-exclude *.pyd
+global-exclude *.ods
+global-exclude *.odt
 global-exclude *.sas7bdat
+global-exclude *.sav
 global-exclude *.so
 global-exclude *.xls
 global-exclude *.xlsm
 
@@ -18,7 +18,7 @@ black:
 	black . --exclude '(asv_bench/env|\.egg|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist|setup.py)'
 
 develop: build
-	python setup.py develop
+	python -m pip install --no-build-isolation -e .
 
 doc:
 	-rm -rf doc/build doc/source/generated
 
@@ -188,16 +188,17 @@ python setup.py install
 
 or for installing in [development mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs):
 
+
 ```sh
-python setup.py develop
+python -m pip install --no-build-isolation -e .
 ```
 
-Alternatively, you can use `pip` if you want all the dependencies pulled
-in automatically (the `-e` option is for installing it in [development
-mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs)):
+If you have `make`, you can also use `make develop` to run the same command.
+
+or alternatively
 
 ```sh
-pip install -e .
+python setup.py develop
 ```
 
 See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source).
@@ -224,7 +225,7 @@ Most development discussion is taking place on github in this repo. Further, the
 
 All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
 
-A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
+A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/docs/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
 
 If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
 
 
@@ -50,12 +50,13 @@
         "xlsxwriter": [],
         "xlrd": [],
         "xlwt": [],
+        "odfpy": [],
         "pytest": [],
         // If using Windows with python 2.7 and want to build using the
         // mingw toolchain (rather than MSVC), uncomment the following line.
         // "libpython": [],
     },
-
+    "conda_channels": ["defaults", "conda-forge"],
     // Combinations of libraries/python versions can be excluded/included
     // from the set to test. Each entry is a dictionary containing additional
     // key-value pairs to include/exclude.
 
@@ -609,4 +609,15 @@ def time_dataframe_describe(self):
         self.df.describe()
 
 
+class SelectDtypes:
+    params = [100, 1000]
+    param_names = ["n"]
+
+    def setup(self, n):
+        self.df = DataFrame(np.random.randn(10, n))
+
+    def time_select_dtypes(self, n):
+        self.df.select_dtypes(include="int")
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -1,40 +1,72 @@
 from io import BytesIO
 
 import numpy as np
+from odf.opendocument import OpenDocumentSpreadsheet
+from odf.table import Table, TableCell, TableRow
+from odf.text import P
 
 from pandas import DataFrame, ExcelWriter, date_range, read_excel
 import pandas.util.testing as tm
 
 
-class Excel:
+def _generate_dataframe():
+    N = 2000
+    C = 5
+    df = DataFrame(
+        np.random.randn(N, C),
+        columns=["float{}".format(i) for i in range(C)],
+        index=date_range("20000101", periods=N, freq="H"),
+    )
+    df["object"] = tm.makeStringIndex(N)
+    return df
+
+
+class WriteExcel:
 
     params = ["openpyxl", "xlsxwriter", "xlwt"]
     param_names = ["engine"]
 
     def setup(self, engine):
-        N = 2000
-        C = 5
-        self.df = DataFrame(
-            np.random.randn(N, C),
-            columns=["float{}".format(i) for i in range(C)],
-            index=date_range("20000101", periods=N, freq="H"),
-        )
-        self.df["object"] = tm.makeStringIndex(N)
-        self.bio_read = BytesIO()
-        self.writer_read = ExcelWriter(self.bio_read, engine=engine)
-        self.df.to_excel(self.writer_read, sheet_name="Sheet1")
-        self.writer_read.save()
-        self.bio_read.seek(0)
-
-    def time_read_excel(self, engine):
-        read_excel(self.bio_read)
+        self.df = _generate_dataframe()
 
     def time_write_excel(self, engine):
-        bio_write = BytesIO()
-        bio_write.seek(0)
-        writer_write = ExcelWriter(bio_write, engine=engine)
-        self.df.to_excel(writer_write, sheet_name="Sheet1")
-        writer_write.save()
+        bio = BytesIO()
+        bio.seek(0)
+        writer = ExcelWriter(bio, engine=engine)
+        self.df.to_excel(writer, sheet_name="Sheet1")
+        writer.save()
+
+
+class ReadExcel:
+
+    params = ["xlrd", "openpyxl", "odf"]
+    param_names = ["engine"]
+    fname_excel = "spreadsheet.xlsx"
+    fname_odf = "spreadsheet.ods"
+
+    def _create_odf(self):
+        doc = OpenDocumentSpreadsheet()
+        table = Table(name="Table1")
+        for row in self.df.values:
+            tr = TableRow()
+            for val in row:
+                tc = TableCell(valuetype="string")
+                tc.addElement(P(text=val))
+                tr.addElement(tc)
+            table.addElement(tr)
+
+        doc.spreadsheet.addElement(table)
+        doc.save(self.fname_odf)
+
+    def setup_cache(self):
+        self.df = _generate_dataframe()
+
+        self.df.to_excel(self.fname_excel, sheet_name="Sheet1")
+        self._create_odf()
+
+    def time_read_excel(self, engine):
+        fname = self.fname_odf if engine == "odf" else self.fname_excel
+        read_excel(fname, engine=engine)
 
 
 from ..pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -118,15 +118,15 @@ def setup(self, orient, frame):
     def time_to_json(self, orient, frame):
         getattr(self, frame).to_json(self.fname, orient=orient)
 
-    def mem_to_json(self, orient, frame):
+    def peakmem_to_json(self, orient, frame):
         getattr(self, frame).to_json(self.fname, orient=orient)
 
     def time_to_json_wide(self, orient, frame):
         base_df = getattr(self, frame).copy()
         df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1)
         df.to_json(self.fname, orient=orient)
 
-    def mem_to_json_wide(self, orient, frame):
+    def peakmem_to_json_wide(self, orient, frame):
         base_df = getattr(self, frame).copy()
         df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1)
         df.to_json(self.fname, orient=orient)
 
@@ -0,0 +1,25 @@
+"""
+Benchmarks for pandas at the package-level.
+"""
+import subprocess
+import sys
+
+from pandas.compat import PY37
+
+
+class TimeImport:
+    def time_import(self):
+        if PY37:
+            # on py37+ we the "-X importtime" usage gives us a more precise
+            #  measurement of the import time we actually care about,
+            #  without the subprocess or interpreter overhead
+            cmd = [sys.executable, "-X", "importtime", "-c", "import pandas as pd"]
+            p = subprocess.run(cmd, stderr=subprocess.PIPE)
+
+            line = p.stderr.splitlines()[-1]
+            field = line.split(b"|")[-2].strip()
+            total = int(field)  # microseconds
+            return total
+
+        cmd = [sys.executable, "-c", "import pandas as pd"]
+        subprocess.run(cmd, stderr=subprocess.PIPE)