diff --git a/.travis.yml b/.travis.yml index bd5cac8955c8d..4cbe7f86bd2fa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -73,6 +73,10 @@ matrix: env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" # In allow_failures + - dist: trusty + env: + - JOB="3.6_ASV" ASV=true + # In allow_failures - dist: trusty env: - JOB="3.6_DOC" DOC=true @@ -93,6 +97,9 @@ matrix: - dist: trusty env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" + - dist: trusty + env: + - JOB="3.6_ASV" ASV=true - dist: trusty env: - JOB="3.6_DOC" DOC=true @@ -128,6 +135,7 @@ script: - ci/script_single.sh - ci/script_multi.sh - ci/lint.sh + - ci/asv.sh - echo "checking imports" - source activate pandas && python ci/check_imports.py - echo "script done" diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 45d62163ae80b..cccd38ef11251 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -1,3 +1,4 @@ +import warnings from importlib import import_module import numpy as np @@ -83,7 +84,8 @@ def setup(self): self.all = self.uniques.repeat(10) def time_match_string(self): - pd.match(self.all, self.uniques) + with warnings.catch_warnings(record=True): + pd.match(self.all, self.uniques) class Hashing(object): diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 1613ca1b97f4b..7743921003353 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import pandas as pd import pandas.util.testing as tm @@ -119,11 +121,15 @@ def setup(self): self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str) self.s_str_cat = self.s_str.astype('category') - self.s_str_cat_ordered = self.s_str.astype('category', ordered=True) + with warnings.catch_warnings(record=True): + self.s_str_cat_ordered = self.s_str.astype('category', + ordered=True) self.s_int = pd.Series(np.random.randint(0, ncats, size=N)) self.s_int_cat = self.s_int.astype('category') - self.s_int_cat_ordered = self.s_int.astype('category', ordered=True) + with warnings.catch_warnings(record=True): + self.s_int_cat_ordered = self.s_int.astype('category', + ordered=True) def time_rank_string(self): self.s_str.rank() diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 4cecf12a27042..4ff71c706cd34 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -1,4 +1,6 @@ import string +import warnings + import numpy as np import pandas.util.testing as tm from pandas import (DataFrame, Series, MultiIndex, date_range, period_range, @@ -15,7 +17,8 @@ def setup(self): self.df = DataFrame(np.random.randn(10000, 25)) self.df['foo'] = 'bar' self.df['bar'] = 'baz' - self.df = self.df.consolidate() + with warnings.catch_warnings(record=True): + self.df = self.df.consolidate() def time_frame_get_numeric_data(self): self.df._get_numeric_data() @@ -141,8 +144,8 @@ class Repr(object): def setup(self): nrows = 10000 data = np.random.randn(nrows, 10) - idx = MultiIndex.from_arrays(np.tile(np.random.randn(3, nrows / 100), - 100)) + arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100) + idx = MultiIndex.from_arrays(arrays) self.df3 = DataFrame(data, index=idx) self.df4 = DataFrame(data, index=np.random.randn(nrows)) self.df_tall = DataFrame(np.random.randn(nrows, 10)) diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 7d63d78084270..21c1ccf46e1c4 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -1,9 +1,13 @@ import numpy as np import pandas.util.testing as tm -from pandas import (DataFrame, Series, rolling_median, rolling_mean, - rolling_min, rolling_max, rolling_var, rolling_skew, - rolling_kurt, rolling_std, read_csv, factorize, date_range) +from pandas import DataFrame, Series, read_csv, factorize, date_range from pandas.core.algorithms import take_1d +try: + from pandas import (rolling_median, rolling_mean, rolling_min, rolling_max, + rolling_var, rolling_skew, rolling_kurt, rolling_std) + have_rolling_methods = True +except ImportError: + have_rolling_methods = False try: from pandas._libs import algos except ImportError: @@ -171,8 +175,7 @@ def run(period): class ParallelRolling(object): goal_time = 0.2 - params = ['rolling_median', 'rolling_mean', 'rolling_min', 'rolling_max', - 'rolling_var', 'rolling_skew', 'rolling_kurt', 'rolling_std'] + params = ['median', 'mean', 'min', 'max', 'var', 'skew', 'kurt', 'std'] param_names = ['method'] def setup(self, method): @@ -181,34 +184,28 @@ def setup(self, method): win = 100 arr = np.random.rand(100000) if hasattr(DataFrame, 'rolling'): - rolling = {'rolling_median': 'median', - 'rolling_mean': 'mean', - 'rolling_min': 'min', - 'rolling_max': 'max', - 'rolling_var': 'var', - 'rolling_skew': 'skew', - 'rolling_kurt': 'kurt', - 'rolling_std': 'std'} df = DataFrame(arr).rolling(win) @test_parallel(num_threads=2) def parallel_rolling(): - getattr(df, rolling[method])() + getattr(df, method)() self.parallel_rolling = parallel_rolling - else: - rolling = {'rolling_median': rolling_median, - 'rolling_mean': rolling_mean, - 'rolling_min': rolling_min, - 'rolling_max': rolling_max, - 'rolling_var': rolling_var, - 'rolling_skew': rolling_skew, - 'rolling_kurt': rolling_kurt, - 'rolling_std': rolling_std} + elif have_rolling_methods: + rolling = {'median': rolling_median, + 'mean': rolling_mean, + 'min': rolling_min, + 'max': rolling_max, + 'var': rolling_var, + 'skew': rolling_skew, + 'kurt': rolling_kurt, + 'std': rolling_std} @test_parallel(num_threads=2) def parallel_rolling(): rolling[method](arr, win) self.parallel_rolling = parallel_rolling + else: + raise NotImplementedError def time_rolling(self, method): self.parallel_rolling() diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 4dfd215e6dc3a..8aa67d8bc6a6a 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -1,3 +1,4 @@ +import warnings from string import ascii_letters from itertools import product from functools import partial @@ -340,7 +341,8 @@ def time_dt_size(self): self.df.groupby(['dates']).size() def time_dt_timegrouper_size(self): - self.df.groupby(TimeGrouper(key='dates', freq='M')).size() + with warnings.catch_warnings(record=True): + self.df.groupby(TimeGrouper(key='dates', freq='M')).size() def time_category_size(self): self.draws.groupby(self.cats).size() @@ -467,7 +469,7 @@ class SumMultiLevel(object): def setup(self): N = 50 - self.df = DataFrame({'A': range(N) * 2, + self.df = DataFrame({'A': list(range(N)) * 2, 'B': range(N * 2), 'C': 1}).set_index(['A', 'B']) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index b35f00db2b054..77e013e1e4fb0 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import pandas.util.testing as tm from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index, @@ -91,7 +93,8 @@ def time_getitem_pos_slice(self, index): self.s[:80000] def time_get_value(self, index): - self.s.get_value(self.lbl) + with warnings.catch_warnings(record=True): + self.s.get_value(self.lbl) def time_getitem_scalar(self, index): self.s[self.lbl] @@ -112,7 +115,8 @@ def setup(self): self.bool_obj_indexer = self.bool_indexer.astype(object) def time_get_value(self): - self.df.get_value(self.idx_scalar, self.col_scalar) + with warnings.catch_warnings(record=True): + self.df.get_value(self.idx_scalar, self.col_scalar) def time_ix(self): self.df.ix[self.idx_scalar, self.col_scalar] @@ -231,11 +235,13 @@ class PanelIndexing(object): goal_time = 0.2 def setup(self): - self.p = Panel(np.random.randn(100, 100, 100)) - self.inds = range(0, 100, 10) + with warnings.catch_warnings(record=True): + self.p = Panel(np.random.randn(100, 100, 100)) + self.inds = range(0, 100, 10) def time_subset(self): - self.p.ix[(self.inds, self.inds, self.inds)] + with warnings.catch_warnings(record=True): + self.p.ix[(self.inds, self.inds, self.inds)] class MethodLookup(object): @@ -295,7 +301,8 @@ def setup(self): def time_insert(self): np.random.seed(1234) for i in range(100): - self.df.insert(0, i, np.random.randn(self.N)) + self.df.insert(0, i, np.random.randn(self.N), + allow_duplicates=True) def time_assign_with_setitem(self): np.random.seed(1234) diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py index 5c0e9586c1cb5..4b6e1d69af92d 100644 --- a/asv_bench/benchmarks/io/hdf.py +++ b/asv_bench/benchmarks/io/hdf.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from pandas import DataFrame, Panel, date_range, HDFStore, read_hdf import pandas.util.testing as tm @@ -105,22 +107,25 @@ class HDFStorePanel(BaseIO): def setup(self): self.fname = '__test__.h5' - self.p = Panel(np.random.randn(20, 1000, 25), - items=['Item%03d' % i for i in range(20)], - major_axis=date_range('1/1/2000', periods=1000), - minor_axis=['E%03d' % i for i in range(25)]) - self.store = HDFStore(self.fname) - self.store.append('p1', self.p) + with warnings.catch_warnings(record=True): + self.p = Panel(np.random.randn(20, 1000, 25), + items=['Item%03d' % i for i in range(20)], + major_axis=date_range('1/1/2000', periods=1000), + minor_axis=['E%03d' % i for i in range(25)]) + self.store = HDFStore(self.fname) + self.store.append('p1', self.p) def teardown(self): self.store.close() self.remove(self.fname) def time_read_store_table_panel(self): - self.store.select('p1') + with warnings.catch_warnings(record=True): + self.store.select('p1') def time_write_store_table_panel(self): - self.store.append('p2', self.p) + with warnings.catch_warnings(record=True): + self.store.append('p2', self.p) class HDF(BaseIO): diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 5b40a29d54683..de0a3b33da147 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -1,3 +1,4 @@ +import warnings import string import numpy as np @@ -26,7 +27,8 @@ def setup(self): self.mdf1['obj2'] = 'bar' self.mdf1['int1'] = 5 try: - self.mdf1.consolidate(inplace=True) + with warnings.catch_warnings(record=True): + self.mdf1.consolidate(inplace=True) except: pass self.mdf2 = self.mdf1.copy() @@ -75,16 +77,23 @@ class ConcatPanels(object): param_names = ['axis', 'ignore_index'] def setup(self, axis, ignore_index): - panel_c = Panel(np.zeros((10000, 200, 2), dtype=np.float32, order='C')) - self.panels_c = [panel_c] * 20 - panel_f = Panel(np.zeros((10000, 200, 2), dtype=np.float32, order='F')) - self.panels_f = [panel_f] * 20 + with warnings.catch_warnings(record=True): + panel_c = Panel(np.zeros((10000, 200, 2), + dtype=np.float32, + order='C')) + self.panels_c = [panel_c] * 20 + panel_f = Panel(np.zeros((10000, 200, 2), + dtype=np.float32, + order='F')) + self.panels_f = [panel_f] * 20 def time_c_ordered(self, axis, ignore_index): - concat(self.panels_c, axis=axis, ignore_index=ignore_index) + with warnings.catch_warnings(record=True): + concat(self.panels_c, axis=axis, ignore_index=ignore_index) def time_f_ordered(self, axis, ignore_index): - concat(self.panels_f, axis=axis, ignore_index=ignore_index) + with warnings.catch_warnings(record=True): + concat(self.panels_f, axis=axis, ignore_index=ignore_index) class ConcatDataFrames(object): diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py index 034e861e7fc01..e161b887ee86f 100644 --- a/asv_bench/benchmarks/offset.py +++ b/asv_bench/benchmarks/offset.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import warnings from datetime import datetime import numpy as np @@ -76,7 +77,8 @@ def setup(self, offset): self.data = pd.Series(rng) def time_add_offset(self, offset): - self.data + offset + with warnings.catch_warnings(record=True): + self.data + offset class OffsetDatetimeIndexArithmetic(object): @@ -90,7 +92,8 @@ def setup(self, offset): self.data = pd.date_range(start='1/1/2000', periods=N, freq='T') def time_add_offset(self, offset): - self.data + offset + with warnings.catch_warnings(record=True): + self.data + offset class OffestDatetimeArithmetic(object): diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index 456fe959c5aa3..ce946c76ed199 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -1,3 +1,4 @@ +import warnings from datetime import datetime, timedelta from pandas import DataFrame, DatetimeIndex, date_range @@ -19,7 +20,8 @@ def setup(self): self.data_frames[x] = df def time_from_dict(self): - Panel.from_dict(self.data_frames) + with warnings.catch_warnings(record=True): + Panel.from_dict(self.data_frames) class SameIndexes(object): @@ -34,7 +36,8 @@ def setup(self): self.data_frames = dict(enumerate([df] * 100)) def time_from_dict(self): - Panel.from_dict(self.data_frames) + with warnings.catch_warnings(record=True): + Panel.from_dict(self.data_frames) class TwoIndexes(object): @@ -53,4 +56,5 @@ def setup(self): self.data_frames = dict(enumerate(dfs)) def time_from_dict(self): - Panel.from_dict(self.data_frames) + with warnings.catch_warnings(record=True): + Panel.from_dict(self.data_frames) diff --git a/asv_bench/benchmarks/panel_methods.py b/asv_bench/benchmarks/panel_methods.py index 9ee1949b311db..a5b1a92e9cf67 100644 --- a/asv_bench/benchmarks/panel_methods.py +++ b/asv_bench/benchmarks/panel_methods.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from .pandas_vb_common import Panel, setup # noqa @@ -10,10 +12,13 @@ class PanelMethods(object): param_names = ['axis'] def setup(self, axis): - self.panel = Panel(np.random.randn(100, 1000, 100)) + with warnings.catch_warnings(record=True): + self.panel = Panel(np.random.randn(100, 1000, 100)) def time_pct_change(self, axis): - self.panel.pct_change(1, axis=axis) + with warnings.catch_warnings(record=True): + self.panel.pct_change(1, axis=axis) def time_shift(self, axis): - self.panel.shift(1, axis=axis) + with warnings.catch_warnings(record=True): + self.panel.shift(1, axis=axis) diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 69a1a604b1ccc..413427a16f40b 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -167,10 +167,6 @@ def setup(self): col_array2 = col_array.copy() col_array2[:, :10000] = np.nan self.col_array_list = list(col_array) - self.col_array_list2 = list(col_array2) def time_lib_fast_zip(self): lib.fast_zip(self.col_array_list) - - def time_lib_fast_zip_fillna(self): - lib.fast_zip_fillna(self.col_array_list2) diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index bd3b580d9d130..9044b080c45f9 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -104,9 +104,9 @@ def setup(self): self.letters = list('ABCD') yrvars = [l + str(num) for l, num in product(self.letters, range(1, nyrs + 1))] - + columns = [str(i) for i in range(nidvars)] + yrvars self.df = DataFrame(np.random.randn(N, nidvars + len(yrvars)), - columns=list(range(nidvars)) + yrvars) + columns=columns) self.df['id'] = self.df.index def time_wide_to_long_big(self): diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index 4435327e1eb38..b203c8b0fa5c9 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from pandas import Series import pandas.util.testing as tm @@ -23,7 +25,8 @@ def time_endswith(self): self.s.str.endswith('A') def time_extract(self): - self.s.str.extract('(\\w*)A(\\w*)') + with warnings.catch_warnings(record=True): + self.s.str.extract('(\\w*)A(\\w*)') def time_findall(self): self.s.str.findall('[A-Z]+') diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index ea2f077f980d0..e1a6bc7a68e9d 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -1,3 +1,4 @@ +import warnings from datetime import timedelta import numpy as np @@ -74,7 +75,8 @@ def setup(self): freq='S')) def time_infer_dst(self): - self.index.tz_localize('US/Eastern', infer_dst=True) + with warnings.catch_warnings(record=True): + self.index.tz_localize('US/Eastern', infer_dst=True) class ResetIndex(object): @@ -365,7 +367,7 @@ class ToDatetimeCache(object): def setup(self, cache): N = 10000 - self.unique_numeric_seconds = range(N) + self.unique_numeric_seconds = list(range(N)) self.dup_numeric_seconds = [1000] * N self.dup_string_dates = ['2000-02-11'] * N self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * N diff --git a/ci/asv.sh b/ci/asv.sh new file mode 100755 index 0000000000000..1e9a8d6380eb5 --- /dev/null +++ b/ci/asv.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +echo "inside $0" + +source activate pandas + +RET=0 + +if [ "$ASV" ]; then + echo "Check for failed asv benchmarks" + + cd asv_bench + + asv machine --yes + + time asv dev | tee failed_asv.txt + + echo "The following asvs benchmarks (if any) failed." + + cat failed_asv.txt | grep "failed" failed_asv.txt + + if [ $? = "0" ]; then + RET=1 + fi + + echo "DONE displaying failed asvs benchmarks." + + rm failed_asv.txt + + echo "Check for failed asv benchmarks DONE" +else + echo "NOT checking for failed asv benchmarks" +fi + +exit $RET diff --git a/ci/requirements-3.6_ASV.build b/ci/requirements-3.6_ASV.build new file mode 100644 index 0000000000000..bc72eed2a0d4e --- /dev/null +++ b/ci/requirements-3.6_ASV.build @@ -0,0 +1,5 @@ +python=3.6* +python-dateutil +pytz +numpy=1.13* +cython diff --git a/ci/requirements-3.6_ASV.run b/ci/requirements-3.6_ASV.run new file mode 100644 index 0000000000000..6c45e3371e9cf --- /dev/null +++ b/ci/requirements-3.6_ASV.run @@ -0,0 +1,25 @@ +ipython +ipykernel +ipywidgets +sphinx=1.5* +nbconvert +nbformat +notebook +matplotlib +seaborn +scipy +lxml +beautifulsoup4 +html5lib +pytables +python-snappy +openpyxl +xlrd +xlwt +xlsxwriter +sqlalchemy +numexpr +bottleneck +statsmodels +xarray +pyqt diff --git a/ci/requirements-3.6_ASV.sh b/ci/requirements-3.6_ASV.sh new file mode 100755 index 0000000000000..8a46f85dbb6bc --- /dev/null +++ b/ci/requirements-3.6_ASV.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +source activate pandas + +echo "[install ASV_BUILD deps]" + +pip install git+https://github.com/spacetelescope/asv diff --git a/ci/script_multi.sh b/ci/script_multi.sh index c1fa756ece965..766e51625fbe6 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -37,6 +37,9 @@ if [ "$PIP_BUILD_TEST" ] || [ "$CONDA_BUILD_TEST" ]; then elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" +elif [ "$ASV" ]; then + echo "We are not running pytest as this is an asv-build" + elif [ "$COVERAGE" ]; then echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas diff --git a/ci/script_single.sh b/ci/script_single.sh index 005c648ee025f..153847ab2e8c9 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -22,6 +22,9 @@ if [ "$PIP_BUILD_TEST" ] || [ "$CONDA_BUILD_TEST" ]; then elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" +elif [ "$ASV" ]; then + echo "We are not running pytest as this is an asv-build" + elif [ "$COVERAGE" ]; then echo pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas