Skip to content

Commit f0f8d83

Browse files
Support imputations with ndarray data
closes #4437
1 parent 823906a commit f0f8d83

File tree

3 files changed

+29
-14
lines changed

3 files changed

+29
-14
lines changed

RELEASE-NOTES.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# Release Notes
22

3-
## PyMC3 vNext (on deck)
3+
## PyMC3 vNext (3.11.1)
44

55
### Breaking Changes
66

77
### New Features
8+
+ Automatic imputations now also work with `ndarray` data, not just `pd.Series` or `pd.DataFrame` (see[#4439](https://github.com/pymc-devs/pymc3/pull/4439)).
89

910
### Maintenance
1011
- `math.log1mexp_numpy` no longer raises RuntimeWarning when given very small inputs. These were commonly observed during NUTS sampling (see [#4428](https://github.com/pymc-devs/pymc3/pull/4428)).

pymc3/model.py

+24-9
Original file line numberDiff line numberDiff line change
@@ -1695,16 +1695,31 @@ def pandas_to_array(data):
16951695
XXX: When `data` is a generator, this will return a Theano tensor!
16961696
16971697
"""
1698-
if hasattr(data, "values"): # pandas
1699-
if data.isnull().any().any(): # missing values
1700-
ret = np.ma.MaskedArray(data.values, data.isnull().values)
1698+
if hasattr(data, "to_numpy"):
1699+
# typically, but not limited to pandas objects
1700+
vals = data.to_numpy()
1701+
mask = np.isnan(vals)
1702+
if mask.any():
1703+
# there are missing values
1704+
ret = np.ma.MaskedArray(vals, mask)
17011705
else:
1702-
ret = data.values
1703-
elif hasattr(data, "mask"):
1704-
if data.mask.any():
1705-
ret = data
1706-
else: # empty mask
1707-
ret = data.filled()
1706+
ret = vals
1707+
elif isinstance(data, np.ndarray):
1708+
if isinstance(data, np.ma.MaskedArray):
1709+
if not data.mask.any():
1710+
# empty mask
1711+
ret = data.filled()
1712+
else:
1713+
# already masked and rightly so
1714+
ret = data
1715+
else:
1716+
# already a ndarray, but not masked
1717+
mask = np.isnan(data)
1718+
if np.any(mask):
1719+
ret = np.ma.MaskedArray(data, mask)
1720+
else:
1721+
# no masking required
1722+
ret = data
17081723
elif isinstance(data, theano.graph.basic.Variable):
17091724
ret = data
17101725
elif sps.issparse(data):

pymc3/tests/test_model_helpers.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,8 @@ def test_pandas_to_array(self, input_dtype):
4141
pandas_input = pd.DataFrame(dense_input)
4242

4343
# All the even numbers are replaced with NaN
44-
missing_pandas_input = pd.DataFrame(
45-
np.array([[np.nan, 1, np.nan], [3, np.nan, 5], [np.nan, 7, np.nan]])
46-
)
44+
missing_numpy_input = np.array([[np.nan, 1, np.nan], [3, np.nan, 5], [np.nan, 7, np.nan]])
45+
missing_pandas_input = pd.DataFrame(missing_numpy_input)
4746
masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0))
4847

4948
# Create a generator object. Apparently the generator object needs to
@@ -72,7 +71,7 @@ def test_pandas_to_array(self, input_dtype):
7271

7372
# Check function behavior when using masked array inputs and pandas
7473
# objects with missing data
75-
for input_value in [masked_array_input, missing_pandas_input]:
74+
for input_value in [missing_numpy_input, masked_array_input, missing_pandas_input]:
7675
func_output = func(input_value)
7776
assert isinstance(func_output, ma.core.MaskedArray)
7877
assert func_output.shape == input_value.shape

0 commit comments

Comments
 (0)