Skip to content

Commit 71179bb

Browse files
committed
Merge remote-tracking branch 'upstream/master' into depr_str_cat_LLoLL
2 parents d6b8a68 + fa47b8d commit 71179bb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+603
-138
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ dist
6262
coverage.xml
6363
coverage_html_report
6464
*.pytest_cache
65+
# hypothesis test database
66+
.hypothesis/
6567

6668
# OS generated files #
6769
######################

ci/appveyor-27.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ dependencies:
2828
- pytest
2929
- pytest-xdist
3030
- moto
31+
- hypothesis>=3.58.0

ci/appveyor-36.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ dependencies:
2525
- cython>=0.28.2
2626
- pytest
2727
- pytest-xdist
28+
- hypothesis>=3.58.0

ci/check_imports.py

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
'html5lib',
1010
'ipython',
1111
'jinja2'
12+
'hypothesis',
1213
'lxml',
1314
'numexpr',
1415
'openpyxl',

ci/circle-27-compat.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies:
77
- cython=0.28.2
88
- jinja2=2.8
99
- numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr
10-
- numpy=1.9.2
10+
- numpy=1.9.3
1111
- openpyxl
1212
- psycopg2
1313
- pytables=3.2.2
@@ -26,3 +26,4 @@ dependencies:
2626
- html5lib==1.0b2
2727
- beautifulsoup4==4.2.1
2828
- pymysql==0.6.0
29+
- hypothesis>=3.58.0

ci/circle-35-ascii.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@ dependencies:
1111
# universal
1212
- pytest
1313
- pytest-xdist
14+
- pip:
15+
- hypothesis>=3.58.0

ci/circle-36-locale.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,5 @@ dependencies:
3131
- pytest
3232
- pytest-xdist
3333
- moto
34+
- pip:
35+
- hypothesis>=3.58.0

ci/circle-36-locale_slow.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,5 @@ dependencies:
3232
- pytest
3333
- pytest-xdist
3434
- moto
35+
- pip:
36+
- hypothesis>=3.58.0

ci/doctests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ if [ "$DOCTEST" ]; then
2828
fi
2929

3030
pytest --doctest-modules -v pandas/core/series.py \
31-
-k"-agg -map -nlargest -nonzero -nsmallest -reindex -searchsorted -to_dict"
31+
-k"-nlargest -nonzero -nsmallest -reindex -searchsorted -to_dict"
3232

3333
if [ $? -ne "0" ]; then
3434
RET=1

ci/environment-dev.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ dependencies:
77
- NumPy
88
- flake8
99
- flake8-comprehensions
10+
- hypothesis>=3.58.0
1011
- moto
1112
- pytest>=3.6
1213
- python-dateutil>=2.5.0

ci/requirements_dev.txt

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ Cython>=0.28.2
44
NumPy
55
flake8
66
flake8-comprehensions
7+
hypothesis>=3.58.0
78
moto
89
pytest>=3.6
910
python-dateutil>=2.5.0

ci/travis-27-locale.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies:
77
- cython=0.28.2
88
- lxml
99
- matplotlib=1.4.3
10-
- numpy=1.9.2
10+
- numpy=1.9.3
1111
- openpyxl=2.4.0
1212
- python-dateutil
1313
- python-blosc
@@ -22,6 +22,7 @@ dependencies:
2222
# universal
2323
- pytest
2424
- pytest-xdist
25+
- hypothesis>=3.58.0
2526
- pip:
2627
- html5lib==1.0b2
2728
- beautifulsoup4==4.2.1

ci/travis-27.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ dependencies:
4545
- pytest
4646
- pytest-xdist
4747
- moto
48+
- hypothesis>=3.58.0
4849
- pip:
4950
- backports.lzma
5051
- cpplint

ci/travis-35-osx.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ dependencies:
2525
- pytest-xdist
2626
- pip:
2727
- python-dateutil==2.5.3
28+
- hypothesis>=3.58.0

ci/travis-36-doc.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ dependencies:
1010
- fastparquet
1111
- feather-format
1212
- html5lib
13+
- hypothesis>=3.58.0
1314
- ipykernel
1415
- ipython
1516
- ipywidgets

ci/travis-36-numpydev.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
# universal
99
- pytest
1010
- pytest-xdist
11+
- hypothesis>=3.58.0
1112
- pip:
1213
- "git+git://github.com/dateutil/dateutil.git"
1314
- "-f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com"

ci/travis-36-slow.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ dependencies:
2828
- pytest
2929
- pytest-xdist
3030
- moto
31+
- hypothesis>=3.58.0

ci/travis-36.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ dependencies:
4141
- pytest-xdist
4242
- pytest-cov
4343
- moto
44+
- hypothesis>=3.58.0
4445
- pip:
4546
- brotlipy
4647
- coverage

ci/travis-37.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ dependencies:
1212
- pytz
1313
- pytest
1414
- pytest-xdist
15+
- hypothesis>=3.58.0

doc/source/contributing.rst

+40
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,46 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex
820820
test_cool_feature.py::test_series[int8] PASSED
821821
822822
823+
.. _using-hypothesis:
824+
825+
Using ``hypothesis``
826+
~~~~~~~~~~~~~~~~~~~~
827+
828+
Hypothesis is a library for property-based testing. Instead of explicitly
829+
parametrizing a test, you can describe *all* valid inputs and let Hypothesis
830+
try to find a failing input. Even better, no matter how many random examples
831+
it tries, Hypothesis always reports a single minimal counterexample to your
832+
assertions - often an example that you would never have thought to test.
833+
834+
See `Getting Started with Hypothesis <https://hypothesis.works/articles/getting-started-with-hypothesis/>`_
835+
for more of an introduction, then `refer to the Hypothesis documentation
836+
for details <https://hypothesis.readthedocs.io/en/latest/index.html>`_.
837+
838+
.. code-block:: python
839+
840+
import json
841+
from hypothesis import given, strategies as st
842+
843+
any_json_value = st.deferred(lambda: st.one_of(
844+
st.none(), st.booleans(), st.floats(allow_nan=False), st.text(),
845+
st.lists(any_json_value), st.dictionaries(st.text(), any_json_value)
846+
))
847+
848+
@given(value=any_json_value)
849+
def test_json_roundtrip(value):
850+
result = json.loads(json.dumps(value))
851+
assert value == result
852+
853+
This test shows off several useful features of Hypothesis, as well as
854+
demonstrating a good use-case: checking properties that should hold over
855+
a large or complicated domain of inputs.
856+
857+
To keep the Pandas test suite running quickly, parametrized tests are
858+
preferred if the inputs or logic are simple, with Hypothesis tests reserved
859+
for cases with complex logic or where there are too many combinations of
860+
options or subtle interactions to test (or think of!) all of them.
861+
862+
823863
Running the test suite
824864
----------------------
825865

doc/source/install.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@ pandas is equipped with an exhaustive set of unit tests, covering about 97% of
202202
the code base as of this writing. To run it on your machine to verify that
203203
everything is working (and that you have all of the dependencies, soft and hard,
204204
installed), make sure you have `pytest
205-
<http://docs.pytest.org/en/latest/>`__ >= 3.6 and run:
205+
<http://docs.pytest.org/en/latest/>`__ >= 3.6 and `Hypothesis
206+
<https://hypothesis.readthedocs.io/>`__ >= 3.58, then run:
206207

207208
::
208209

doc/source/whatsnew/v0.24.0.txt

+10-5
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ without timezone localization. This is inconsistent from parsing the same
239239
datetime string with :class:`Timestamp` which would preserve the UTC
240240
offset in the ``tz`` attribute. Now, :func:`to_datetime` preserves the UTC
241241
offset in the ``tz`` attribute when all the datetime strings have the same
242-
UTC offset (:issue:`17697`, :issue:`11736`)
242+
UTC offset (:issue:`17697`, :issue:`11736`, :issue:`22457`)
243243

244244
*Previous Behavior*:
245245

@@ -447,6 +447,7 @@ ExtensionType Changes
447447
- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`).
448448
- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
449449
- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
450+
- :meth:`~Series.shift` now dispatches to :meth:`ExtensionArray.shift` (:issue:`22386`)
450451
- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
451452
- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`)
452453
- :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`).
@@ -583,12 +584,14 @@ Datetimelike
583584
- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`)
584585
- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
585586
- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
586-
- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
587-
-
588587

589588
Timedelta
590589
^^^^^^^^^
591-
590+
- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
591+
- Bug in adding a :class:`Index` with object dtype to a :class:`Series` with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`22390`)
592+
- Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`)
593+
- Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`)
594+
- Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`)
592595
-
593596
-
594597
-
@@ -633,6 +636,7 @@ Numeric
633636
a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`).
634637
- Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`)
635638
- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`,:issue:`22163`)
639+
- Bug in :meth:`DataFrame.apply` where, when supplied with a string argument and additional positional or keyword arguments (e.g. ``df.apply('sum', min_count=1)``), a ``TypeError`` was wrongly raised (:issue:`22376`)
636640
-
637641

638642
Strings
@@ -721,12 +725,13 @@ Reshaping
721725
- Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`)
722726
- Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`)
723727
- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
724-
-
728+
- Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`)
725729

726730
Build Changes
727731
^^^^^^^^^^^^^
728732

729733
- Building pandas for development now requires ``cython >= 0.28.2`` (:issue:`21688`)
734+
- Testing pandas now requires ``hypothesis>=3.58`` (:issue:22280). You can find `the Hypothesis docs here <https://hypothesis.readthedocs.io/en/latest/index.html>`_, and a pandas-specific introduction :ref:`in the contributing guide <using-hypothesis>` .
730735
-
731736

732737
Other

pandas/conftest.py

+34
Original file line numberDiff line numberDiff line change
@@ -450,3 +450,37 @@ def mock():
450450
return importlib.import_module("unittest.mock")
451451
else:
452452
return pytest.importorskip("mock")
453+
454+
455+
# ----------------------------------------------------------------
456+
# Global setup for tests using Hypothesis
457+
458+
from hypothesis import strategies as st
459+
460+
# Registering these strategies makes them globally available via st.from_type,
461+
# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py
462+
for name in 'MonthBegin MonthEnd BMonthBegin BMonthEnd'.split():
463+
cls = getattr(pd.tseries.offsets, name)
464+
st.register_type_strategy(cls, st.builds(
465+
cls,
466+
n=st.integers(-99, 99),
467+
normalize=st.booleans(),
468+
))
469+
470+
for name in 'YearBegin YearEnd BYearBegin BYearEnd'.split():
471+
cls = getattr(pd.tseries.offsets, name)
472+
st.register_type_strategy(cls, st.builds(
473+
cls,
474+
n=st.integers(-5, 5),
475+
normalize=st.booleans(),
476+
month=st.integers(min_value=1, max_value=12),
477+
))
478+
479+
for name in 'QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd'.split():
480+
cls = getattr(pd.tseries.offsets, name)
481+
st.register_type_strategy(cls, st.builds(
482+
cls,
483+
n=st.integers(-24, 24),
484+
normalize=st.booleans(),
485+
startingMonth=st.integers(min_value=1, max_value=12)
486+
))

pandas/core/apply.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ def __init__(self, obj, func, broadcast, raw, reduce, result_type,
7171
self.result_type = result_type
7272

7373
# curry if needed
74-
if kwds or args and not isinstance(func, np.ufunc):
74+
if ((kwds or args) and
75+
not isinstance(func, (np.ufunc, compat.string_types))):
76+
7577
def f(x):
7678
return func(x, *args, **kwds)
7779
else:

pandas/core/arrays/base.py

+38
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ class ExtensionArray(object):
5959
* factorize / _values_for_factorize
6060
* argsort / _values_for_argsort
6161
62+
The remaining methods implemented on this class should be performant,
63+
as they only compose abstract methods. Still, a more efficient
64+
implementation may be available, and these methods can be overridden.
65+
6266
This class does not inherit from 'abc.ABCMeta' for performance reasons.
6367
Methods and properties required by the interface raise
6468
``pandas.errors.AbstractMethodError`` and no ``register`` method is
@@ -400,6 +404,40 @@ def dropna(self):
400404

401405
return self[~self.isna()]
402406

407+
def shift(self, periods=1):
408+
# type: (int) -> ExtensionArray
409+
"""
410+
Shift values by desired number.
411+
412+
Newly introduced missing values are filled with
413+
``self.dtype.na_value``.
414+
415+
.. versionadded:: 0.24.0
416+
417+
Parameters
418+
----------
419+
periods : int, default 1
420+
The number of periods to shift. Negative values are allowed
421+
for shifting backwards.
422+
423+
Returns
424+
-------
425+
shifted : ExtensionArray
426+
"""
427+
# Note: this implementation assumes that `self.dtype.na_value` can be
428+
# stored in an instance of your ExtensionArray with `self.dtype`.
429+
if periods == 0:
430+
return self.copy()
431+
empty = self._from_sequence([self.dtype.na_value] * abs(periods),
432+
dtype=self.dtype)
433+
if periods > 0:
434+
a = empty
435+
b = self[:-periods]
436+
else:
437+
a = self[abs(periods):]
438+
b = empty
439+
return self._concat_same_type([a, b])
440+
403441
def unique(self):
404442
"""Compute the ExtensionArray of unique values.
405443

0 commit comments

Comments
 (0)