Skip to content

Commit 292fcdc

Browse files
committed
merge with master
2 parents ee55191 + 031fb16 commit 292fcdc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+979
-467
lines changed

.travis.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,6 @@ matrix:
5858
services:
5959
- mysql
6060
- postgresql
61-
62-
- env:
63-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
64-
services:
65-
- mysql
66-
- postgresql
6761
allow_failures:
6862
- arch: arm64
6963
env:

asv_bench/benchmarks/rolling.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,18 @@ class EWMMethods:
9191
def setup(self, constructor, window, dtype, method):
9292
N = 10 ** 5
9393
arr = (100 * np.random.random(N)).astype(dtype)
94+
times = pd.date_range("1900", periods=N, freq="23s")
9495
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
96+
self.ewm_times = getattr(pd, constructor)(arr).ewm(
97+
halflife="1 Day", times=times
98+
)
9599

96100
def time_ewm(self, constructor, window, dtype, method):
97101
getattr(self.ewm, method)()
98102

103+
def time_ewm_times(self, constructor, window, dtype, method):
104+
self.ewm.mean()
105+
99106

100107
class VariableWindowMethods(Methods):
101108
params = (
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
ipython analogue:
3+
4+
tr = TimeResolution()
5+
mi = pd.MultiIndex.from_product(tr.params[:-1] + ([str(x) for x in tr.params[-1]],))
6+
df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"])
7+
8+
for unit in tr.params[0]:
9+
for size in tr.params[1]:
10+
for tz in tr.params[2]:
11+
tr.setup(unit, size, tz)
12+
key = (unit, size, str(tz))
13+
print(key)
14+
15+
val = %timeit -o tr.time_get_resolution(unit, size, tz)
16+
17+
df.loc[key] = (val.average, val.stdev)
18+
19+
"""
20+
from datetime import timedelta, timezone
21+
22+
from dateutil.tz import gettz, tzlocal
23+
import numpy as np
24+
import pytz
25+
26+
from pandas._libs.tslibs.resolution import get_resolution
27+
28+
29+
class TimeResolution:
30+
params = (
31+
["D", "h", "m", "s", "us", "ns"],
32+
[1, 100, 10 ** 4, 10 ** 6],
33+
[
34+
None,
35+
timezone.utc,
36+
timezone(timedelta(minutes=60)),
37+
pytz.timezone("US/Pacific"),
38+
gettz("Asia/Tokyo"),
39+
tzlocal(),
40+
],
41+
)
42+
param_names = ["unit", "size", "tz"]
43+
44+
def setup(self, unit, size, tz):
45+
arr = np.random.randint(0, 10, size=size, dtype="i8")
46+
arr = arr.view(f"M8[{unit}]").astype("M8[ns]").view("i8")
47+
self.i8data = arr
48+
49+
def time_get_resolution(self, unit, size, tz):
50+
get_resolution(self.i8data, tz)

asv_bench/benchmarks/tslibs/tslib.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
ipython analogue:
3+
4+
tr = TimeIntsToPydatetime()
5+
mi = pd.MultiIndex.from_product(
6+
tr.params[:-1] + ([str(x) for x in tr.params[-1]],)
7+
)
8+
df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"])
9+
for box in tr.params[0]:
10+
for size in tr.params[1]:
11+
for tz in tr.params[2]:
12+
tr.setup(box, size, tz)
13+
key = (box, size, str(tz))
14+
print(key)
15+
val = %timeit -o tr.time_ints_to_pydatetime(box, size, tz)
16+
df.loc[key] = (val.average, val.stdev)
17+
"""
18+
from datetime import timedelta, timezone
19+
20+
from dateutil.tz import gettz, tzlocal
21+
import numpy as np
22+
import pytz
23+
24+
from pandas._libs.tslib import ints_to_pydatetime
25+
26+
_tzs = [
27+
None,
28+
timezone.utc,
29+
timezone(timedelta(minutes=60)),
30+
pytz.timezone("US/Pacific"),
31+
gettz("Asia/Tokyo"),
32+
tzlocal(),
33+
]
34+
_sizes = [0, 1, 100, 10 ** 4, 10 ** 6]
35+
36+
37+
class TimeIntsToPydatetime:
38+
params = (
39+
["time", "date", "datetime", "timestamp"],
40+
_sizes,
41+
_tzs,
42+
)
43+
param_names = ["box", "size", "tz"]
44+
# TODO: fold? freq?
45+
46+
def setup(self, box, size, tz):
47+
arr = np.random.randint(0, 10, size=size, dtype="i8")
48+
self.i8data = arr
49+
50+
def time_ints_to_pydatetime(self, box, size, tz):
51+
if box == "date":
52+
# ints_to_pydatetime does not allow non-None tz with date;
53+
# this will mean doing some duplicate benchmarks
54+
tz = None
55+
ints_to_pydatetime(self.i8data, tz, box=box)
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
from pytz import UTC
3+
4+
from pandas._libs.tslibs.tzconversion import tz_convert, tz_localize_to_utc
5+
6+
from .tslib import _sizes, _tzs
7+
8+
9+
class TimeTZConvert:
10+
params = (
11+
_sizes,
12+
[x for x in _tzs if x is not None],
13+
)
14+
param_names = ["size", "tz"]
15+
16+
def setup(self, size, tz):
17+
arr = np.random.randint(0, 10, size=size, dtype="i8")
18+
self.i8data = arr
19+
20+
def time_tz_convert_from_utc(self, size, tz):
21+
# effectively:
22+
# dti = DatetimeIndex(self.i8data, tz=tz)
23+
# dti.tz_localize(None)
24+
tz_convert(self.i8data, UTC, tz)
25+
26+
def time_tz_localize_to_utc(self, size, tz):
27+
# effectively:
28+
# dti = DatetimeIndex(self.i8data)
29+
# dti.tz_localize(tz, ambiguous="NaT", nonexistent="NaT")
30+
tz_localize_to_utc(self.i8data, tz, ambiguous="NaT", nonexistent="NaT")

ci/azure/posix.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ jobs:
3030
LC_ALL: "zh_CN.utf8"
3131
EXTRA_APT: "language-pack-zh-hans"
3232

33+
py36_slow:
34+
ENV_FILE: ci/deps/azure-36-slow.yaml
35+
CONDA_PY: "36"
36+
PATTERN: "slow"
37+
3338
py36_locale:
3439
ENV_FILE: ci/deps/azure-36-locale.yaml
3540
CONDA_PY: "36"
File renamed without changes.

ci/setup_env.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,5 +166,4 @@ if [[ -n ${SQL:0} ]]; then
166166
else
167167
echo "not using dbs on non-linux Travis builds or Azure Pipelines"
168168
fi
169-
170169
echo "done"

doc/source/user_guide/computation.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,25 @@ and **alpha** to the EW functions:
10951095
one half.
10961096
* **Alpha** specifies the smoothing factor directly.
10971097

1098+
.. versionadded:: 1.1.0
1099+
1100+
You can also specify ``halflife`` in terms of a timedelta convertible unit to specify the amount of
1101+
time it takes for an observation to decay to half its value when also specifying a sequence
1102+
of ``times``.
1103+
1104+
.. ipython:: python
1105+
1106+
df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
1107+
df
1108+
times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17']
1109+
df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean()
1110+
1111+
The following formula is used to compute exponentially weighted mean with an input vector of times:
1112+
1113+
.. math::
1114+
1115+
y_t = \frac{\sum_{i=0}^t 0.5^\frac{t_{t} - t_{i}}{\lambda} x_{t-i}}{0.5^\frac{t_{t} - t_{i}}{\lambda}},
1116+
10981117
Here is an example for a univariate time series:
10991118

11001119
.. ipython:: python

doc/source/whatsnew/v1.1.0.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,8 @@ Other enhancements
328328
- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list or dict to change only some specific columns' width (:issue:`28917`).
329329
- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
330330
- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`).
331+
- :meth:`read_csv` now accepts string values like "0", "0.0", "1", "1.0" as convertible to the nullable boolean dtype (:issue:`34859`)
332+
- :class:`pandas.core.window.ExponentialMovingWindow` now supports a ``times`` argument that allows ``mean`` to be calculated with observations spaced by the timestamps in ``times`` (:issue:`34839`)
331333

332334
.. ---------------------------------------------------------------------------
333335
@@ -787,6 +789,7 @@ Deprecations
787789
- :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`)
788790
- The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`)
789791
- Providing ``suffixes`` as a ``set`` in :func:`pandas.merge` is deprecated. Provide a tuple instead (:issue:`33740`, :issue:`34741`).
792+
- Indexing a series with a multi-dimensional indexer like ``[:, None]`` to return an ndarray now raises a ``FutureWarning``. Convert to a NumPy array before indexing instead (:issue:`27837`)
790793
- :meth:`Index.is_mixed` is deprecated and will be removed in a future version, check ``index.inferred_type`` directly instead (:issue:`32922`)
791794

792795
- Passing any arguments but the first one to :func:`read_html` as
@@ -818,6 +821,7 @@ Deprecations
818821
- :meth:`util.testing.assert_almost_equal` now accepts both relative and absolute
819822
precision through the ``rtol``, and ``atol`` parameters, thus deprecating the
820823
``check_less_precise`` parameter. (:issue:`13357`).
824+
- :func:`DataFrame.melt` accepting a value_name that already exists is deprecated, and will be removed in a future version (:issue:`34731`)
821825

822826
.. ---------------------------------------------------------------------------
823827
@@ -974,6 +978,7 @@ Indexing
974978
- Bug in :meth:`DataFrame.loc` with dictionary of values changes columns with dtype of ``int`` to ``float`` (:issue:`34573`)
975979
- Bug in :meth:`Series.loc` when used with a :class:`MultiIndex` would raise an IndexingError when accessing a None value (:issue:`34318`)
976980
- Bug in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` would not preserve data types on an empty :class:`DataFrame` or :class:`Series` with a :class:`MultiIndex` (:issue:`19602`)
981+
- Bug in :class:`Series` and :class:`DataFrame` indexing with a ``time`` key on a :class:`DatetimeIndex` with ``NaT`` entries (:issue:`35114`)
977982

978983
Missing
979984
^^^^^^^

environment.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ dependencies:
2020
- flake8<3.8.0 # temporary pin, GH#34150
2121
- flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions
2222
- flake8-rst>=0.6.0,<=0.7.0 # linting of code blocks in rst files
23-
- isort # check that imports are in the right order
23+
- isort=4.3.21 # check that imports are in the right order
2424
- mypy=0.730
2525
- pycodestyle # used by flake8
2626

2727
# documentation
2828
- gitpython # obtain contributors from git for whatsnew
2929
- gitdb2=2.0.6 # GH-32060
30-
- sphinx
30+
- sphinx<=3.1.1
3131

3232
# documentation (jupyter notebooks)
3333
- nbconvert>=5.4.1

pandas/_libs/lib.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ from pandas._libs.tslibs.nattype cimport (
7373
)
7474
from pandas._libs.tslibs.conversion cimport convert_to_tsobject
7575
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
76-
from pandas._libs.tslibs.timezones cimport get_timezone, tz_compare
76+
from pandas._libs.tslibs.timezones cimport tz_compare
7777
from pandas._libs.tslibs.period cimport is_period_object
7878
from pandas._libs.tslibs.offsets cimport is_offset_object
7979

@@ -1789,7 +1789,7 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool:
17891789
for i in range(n):
17901790
base_val = values[i]
17911791
if base_val is not NaT:
1792-
base_tz = get_timezone(getattr(base_val, 'tzinfo', None))
1792+
base_tz = getattr(base_val, 'tzinfo', None)
17931793
break
17941794

17951795
for j in range(i, n):

0 commit comments

Comments
 (0)