Skip to content

Commit 9189c78

Browse files
authored
Merge branch 'master' into catDtype_copy_nan_codes
2 parents 567d48f + 148ed63 commit 9189c78

File tree

104 files changed

+4111
-2940
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+4111
-2940
lines changed

.travis.yml

+12-12
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,6 @@ matrix:
5252
- dist: trusty
5353
env:
5454
- JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true
55-
addons:
56-
apt:
57-
packages:
58-
- xsel
5955
- dist: trusty
6056
env:
6157
- JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true
@@ -66,7 +62,11 @@ matrix:
6662
# In allow_failures
6763
- dist: trusty
6864
env:
69-
- JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
65+
- JOB="3.6_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
66+
addons:
67+
apt:
68+
packages:
69+
- xsel
7070
# In allow_failures
7171
- dist: trusty
7272
env:
@@ -75,17 +75,17 @@ matrix:
7575
- dist: trusty
7676
env:
7777
- JOB="3.6_DOC" DOC=true
78-
addons:
79-
apt:
80-
packages:
81-
- xsel
8278
allow_failures:
8379
- dist: trusty
8480
env:
8581
- JOB="2.7_SLOW" SLOW=true
8682
- dist: trusty
8783
env:
88-
- JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
84+
- JOB="3.6_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
85+
addons:
86+
apt:
87+
packages:
88+
- xsel
8989
- dist: trusty
9090
env:
9191
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
@@ -102,8 +102,6 @@ before_install:
102102
- uname -a
103103
- git --version
104104
- git tag
105-
- ci/before_install_travis.sh
106-
- export DISPLAY=":99.0"
107105

108106
install:
109107
- echo "install start"
@@ -114,6 +112,8 @@ install:
114112

115113
before_script:
116114
- ci/install_db_travis.sh
115+
- export DISPLAY=":99.0"
116+
- ci/before_script_travis.sh
117117

118118
script:
119119
- echo "script start"

asv_bench/benchmarks/timedelta.py

+43
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,46 @@ def setup(self):
4040

4141
def test_add_td_ts(self):
4242
self.td + self.ts
43+
44+
45+
class TimedeltaProperties(object):
46+
goal_time = 0.2
47+
48+
def setup(self):
49+
self.td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35)
50+
51+
def time_timedelta_days(self):
52+
self.td.days
53+
54+
def time_timedelta_seconds(self):
55+
self.td.seconds
56+
57+
def time_timedelta_microseconds(self):
58+
self.td.microseconds
59+
60+
def time_timedelta_nanoseconds(self):
61+
self.td.nanoseconds
62+
63+
64+
class DatetimeAccessor(object):
65+
goal_time = 0.2
66+
67+
def setup(self):
68+
self.N = 100000
69+
self.series = pd.Series(
70+
pd.timedelta_range('1 days', periods=self.N, freq='h')
71+
)
72+
def time_dt_accessor(self):
73+
self.series.dt
74+
75+
def time_timedelta_dt_accessor_days(self):
76+
self.series.dt.days
77+
78+
def time_timedelta_dt_accessor_seconds(self):
79+
self.series.dt.seconds
80+
81+
def time_timedelta_dt_accessor_microseconds(self):
82+
self.series.dt.microseconds
83+
84+
def time_timedelta_dt_accessor_nanoseconds(self):
85+
self.series.dt.nanoseconds

asv_bench/benchmarks/timeseries.py

+38-3
Original file line numberDiff line numberDiff line change
@@ -346,17 +346,22 @@ class ToDatetime(object):
346346

347347
def setup(self):
348348
self.rng = date_range(start='1/1/2000', periods=10000, freq='D')
349-
self.stringsD = Series((((self.rng.year * 10000) + (self.rng.month * 100)) + self.rng.day), dtype=np.int64).apply(str)
349+
self.stringsD = Series(self.rng.strftime('%Y%m%d'))
350350

351351
self.rng = date_range(start='1/1/2000', periods=20000, freq='H')
352-
self.strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in self.rng]
353-
self.strings_nosep = [x.strftime('%Y%m%d %H:%M:%S') for x in self.rng]
352+
self.strings = self.rng.strftime('%Y-%m-%d %H:%M:%S').tolist()
353+
self.strings_nosep = self.rng.strftime('%Y%m%d %H:%M:%S').tolist()
354354
self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800'
355355
for x in self.rng]
356356

357357
self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
358358
self.s2 = self.s.str.replace(':\\S+$', '')
359359

360+
self.unique_numeric_seconds = range(10000)
361+
self.dup_numeric_seconds = [1000] * 10000
362+
self.dup_string_dates = ['2000-02-11'] * 10000
363+
self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * 10000
364+
360365
def time_format_YYYYMMDD(self):
361366
to_datetime(self.stringsD, format='%Y%m%d')
362367

@@ -381,6 +386,36 @@ def time_format_exact(self):
381386
def time_format_no_exact(self):
382387
to_datetime(self.s, format='%d%b%y', exact=False)
383388

389+
def time_cache_true_with_unique_seconds_and_unit(self):
390+
to_datetime(self.unique_numeric_seconds, unit='s', cache=True)
391+
392+
def time_cache_false_with_unique_seconds_and_unit(self):
393+
to_datetime(self.unique_numeric_seconds, unit='s', cache=False)
394+
395+
def time_cache_true_with_dup_seconds_and_unit(self):
396+
to_datetime(self.dup_numeric_seconds, unit='s', cache=True)
397+
398+
def time_cache_false_with_dup_seconds_and_unit(self):
399+
to_datetime(self.dup_numeric_seconds, unit='s', cache=False)
400+
401+
def time_cache_true_with_dup_string_dates(self):
402+
to_datetime(self.dup_string_dates, cache=True)
403+
404+
def time_cache_false_with_dup_string_dates(self):
405+
to_datetime(self.dup_string_dates, cache=False)
406+
407+
def time_cache_true_with_dup_string_dates_and_format(self):
408+
to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=True)
409+
410+
def time_cache_false_with_dup_string_dates_and_format(self):
411+
to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=False)
412+
413+
def time_cache_true_with_dup_string_tzoffset_dates(self):
414+
to_datetime(self.dup_string_with_tz, cache=True)
415+
416+
def time_cache_false_with_dup_string_tzoffset_dates(self):
417+
to_datetime(self.dup_string_with_tz, cache=False)
418+
384419

385420
class Offsets(object):
386421
goal_time = 0.2

ci/before_install_travis.sh renamed to ci/before_script_travis.sh

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ echo "inside $0"
44

55
if [ "${TRAVIS_OS_NAME}" == "linux" ]; then
66
sh -e /etc/init.d/xvfb start
7+
sleep 3
78
fi
89

910
# Never fail because bad things happened here.

ci/requirements-2.7_BUILD_TEST.build

-6
This file was deleted.

ci/requirements-3.6_BUILD_TEST.build

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
python=3.6*
2+
python-dateutil
3+
pytz
4+
nomkl
5+
numpy
6+
cython

ci/requirements-2.7_BUILD_TEST.sh renamed to ci/requirements-3.6_BUILD_TEST.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22

33
source activate pandas
44

5-
echo "install 27 BUILD_TEST"
5+
echo "install 36 BUILD_TEST"
66

77
conda install -n pandas -c conda-forge pyarrow dask

ci/requirements-3.6_NUMPY_DEV.build.sh

+1-4
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,7 @@ PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf
1212
pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy
1313

1414
# install dateutil from master
15-
16-
# TODO(jreback), temp disable dateutil master has changed
17-
# pip install -U git+git://github.com/dateutil/dateutil.git
18-
pip install python-dateutil
15+
pip install -U git+git://github.com/dateutil/dateutil.git
1916

2017
# cython via pip
2118
pip install cython

ci/script_multi.sh

+5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ if [ "$BUILD_TEST" ]; then
2727
echo "[running]"
2828
cd /tmp
2929
unset PYTHONPATH
30+
31+
echo "[build-test: single]"
32+
python -c 'import pandas; pandas.test(["--skip-slow", "--skip-network", "-r xX", "-m single"])'
33+
34+
echo "[build-test: not single]"
3035
python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])'
3136

3237
elif [ "$DOC" ]; then

doc/source/api.rst

+44
Original file line numberDiff line numberDiff line change
@@ -1870,8 +1870,52 @@ Methods
18701870
Timedelta.to_timedelta64
18711871
Timedelta.total_seconds
18721872

1873+
.. _api.frequencies:
1874+
1875+
Frequencies
1876+
-----------
1877+
1878+
.. currentmodule:: pandas.tseries.frequencies
1879+
1880+
1881+
.. autosummary::
1882+
:toctree: generated/
1883+
1884+
to_offset
1885+
1886+
.. _api.offsets:
1887+
1888+
Offsets
1889+
-------
1890+
1891+
.. currentmodule:: pandas.tseries.offsets
1892+
1893+
.. autosummary::
1894+
:toctree: generated/
1895+
1896+
DateOffset
1897+
Week
1898+
Day
1899+
Hour
1900+
Minute
1901+
Second
1902+
Milli
1903+
Micro
1904+
Nano
1905+
1906+
.. autosummary::
1907+
:toctree: generated/
1908+
1909+
MonthBegin
1910+
MonthEnd
1911+
QuarterBegin
1912+
QuarterEnd
1913+
YearBegin
1914+
YearEnd
1915+
18731916
Window
18741917
------
1918+
18751919
.. currentmodule:: pandas.core.window
18761920

18771921
Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc.

doc/source/io.rst

+9-5
Original file line numberDiff line numberDiff line change
@@ -4427,8 +4427,10 @@ Several caveats.
44274427

44284428
- This is a newer library, and the format, though stable, is not guaranteed to be backward compatible
44294429
to the earlier versions.
4430-
- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an
4431-
error if a non-default one is provided. You can simply ``.reset_index()`` in order to store the index.
4430+
- The format will NOT write an ``Index``, or ``MultiIndex`` for the
4431+
``DataFrame`` and will raise an error if a non-default one is provided. You
4432+
can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to
4433+
ignore it.
44324434
- Duplicate column names and non-string columns names are not supported
44334435
- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
44344436
on an attempt at serialization.
@@ -4491,8 +4493,10 @@ dtypes, including extension dtypes such as datetime with tz.
44914493

44924494
Several caveats.
44934495

4494-
- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an
4495-
error if a non-default one is provided. You can simply ``.reset_index(drop=True)`` in order to store the index.
4496+
- The format will NOT write an ``Index``, or ``MultiIndex`` for the
4497+
``DataFrame`` and will raise an error if a non-default one is provided. You
4498+
can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to
4499+
ignore it.
44964500
- Duplicate column names and non-string columns names are not supported
44974501
- Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
44984502
- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
@@ -4538,7 +4542,7 @@ Read from a parquet file.
45384542
45394543
result.dtypes
45404544
4541-
Read only certain columns of a parquet file.
4545+
Read only certain columns of a parquet file.
45424546

45434547
.. ipython:: python
45444548

doc/source/release.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ Highlights include:
5252
- Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here <whatsnew_0210.enhancements.parquet>`.
5353
- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
5454
categoricals independent of the data, see :ref:`here <whatsnew_0210.enhancements.categorical_dtype>`.
55-
- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
55+
- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
5656
- Compatibility fixes for pypy, see :ref:`here <whatsnew_0210.pypy>`.
5757
- Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here <whatsnew_0210.enhancements.drop_api>`.
5858
- Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here <whatsnew_0210.enhancements.infer_objects>`) and ``GroupBy.pipe`` (see :ref:`here <whatsnew_0210.enhancements.GroupBy_pipe>`).

doc/source/whatsnew.rst

+4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ What's New
1818

1919
These are new features and improvements of note in each release.
2020

21+
.. include:: whatsnew/v0.22.0.txt
22+
23+
.. include:: whatsnew/v0.21.1.txt
24+
2125
.. include:: whatsnew/v0.21.0.txt
2226

2327
.. include:: whatsnew/v0.20.3.txt

0 commit comments

Comments
 (0)