From c96578fff9eea53182302d8315d5706a33ee9d4e Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 18 May 2019 21:20:19 -0600 Subject: [PATCH 1/6] More support for missing_value. Fixes #2871 --- doc/whats-new.rst | 2 ++ xarray/coding/variables.py | 15 ++++++++++++++- xarray/conventions.py | 3 ++- xarray/tests/test_coding.py | 19 ++++++++++++++++++- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ac1b5269bfa..e92f99c2651 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,8 @@ Bug fixes By `Deepak Cherian `_. +- Increased support for `missing_value` (:issue:`2871`) + By `Deepak Cherian `_. .. _whats-new.0.12.1: diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index ae8b97c7352..6a3c6caeacf 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -145,11 +145,24 @@ class CFMaskCoder(VariableCoder): def encode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_encoding(variable) - if encoding.get('_FillValue') is not None: + fv = encoding.get('_FillValue') + mv = encoding.get('missing_value') + + if (fv is not None) and (mv is not None) and (fv != mv): + raise ValueError("Variable {!r} has multiple fill values {}. " + "Cannot encode data. " + .format(name, [fv, mv])) + + if fv is not None: fill_value = pop_to(encoding, attrs, '_FillValue', name=name) if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) + if mv is not None: + fill_value = pop_to(encoding, attrs, 'missing_value', name=name) + if not pd.isnull(fill_value): + data = duck_array_ops.fillna(data, fill_value) + return Variable(dims, data, attrs, encoding) def decode(self, variable, name=None): diff --git a/xarray/conventions.py b/xarray/conventions.py index 5f41639e890..eb84673eada 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -82,7 +82,8 @@ def maybe_encode_nonstring_dtype(var, name=None): if dtype != var.dtype: if np.issubdtype(dtype, np.integer): if (np.issubdtype(var.dtype, np.floating) and - '_FillValue' not in var.attrs): + '_FillValue' not in var.attrs and + 'missing_value' not in var.attrs): warnings.warn('saving variable %s with floating ' 'point data as an integer dtype without ' 'any _FillValue to use for NaNs' % name, diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index 95c8ebc0b42..9f937ac7f5e 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -6,7 +6,7 @@ import xarray as xr from xarray.coding import variables -from . import assert_identical, requires_dask +from . import assert_equal, assert_identical, requires_dask with suppress(ImportError): import dask.array as da @@ -20,6 +20,23 @@ def test_CFMaskCoder_decode(): assert_identical(expected, encoded) +def test_CFMaskCoder_missing_value(): + expected = xr.DataArray(np.array([[26915, 27755, -9999, 27705], + [25595, -9999, 28315, -9999]]), + dims=['npts', 'ntimes'], + name='tmpk') + expected.attrs['missing_value'] = -9999 + + decoded = xr.decode_cf(expected.to_dataset()) + encoded, _ = xr.conventions.cf_encoder(decoded, decoded.attrs) + + assert_equal(encoded['tmpk'], expected.variable) + + decoded.tmpk.encoding['_FillValue'] = -9940 + with pytest.raises(ValueError): + encoded, _ = xr.conventions.cf_encoder(decoded, decoded.attrs) + + @requires_dask def test_CFMaskCoder_decode_dask(): original = xr.Variable(('x',), [0, -1, 1], {'_FillValue': -1}).chunk() From 8b1f364a8845f98ce7f174735146e7491d6311af Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 19 May 2019 14:24:01 -0600 Subject: [PATCH 2/6] lint fixes. --- xarray/coding/variables.py | 2 +- xarray/conventions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 6a3c6caeacf..280c7cb0cf4 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -1,7 +1,7 @@ """Coders for individual Variable objects.""" -from typing import Any import warnings from functools import partial +from typing import Any import numpy as np import pandas as pd diff --git a/xarray/conventions.py b/xarray/conventions.py index eb84673eada..bc32a5207fe 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -83,7 +83,7 @@ def maybe_encode_nonstring_dtype(var, name=None): if np.issubdtype(dtype, np.integer): if (np.issubdtype(var.dtype, np.floating) and '_FillValue' not in var.attrs and - 'missing_value' not in var.attrs): + 'missing_value' not in var.attrs): # noqa warnings.warn('saving variable %s with floating ' 'point data as an integer dtype without ' 'any _FillValue to use for NaNs' % name, From 15d94f3abd9e11e842bf2d8fc18ce173fb7ee6f2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 29 May 2019 08:59:00 -0600 Subject: [PATCH 3/6] Use not equivalent instead of not equals check. --- xarray/coding/variables.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 280c7cb0cf4..e21372f8d2c 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -8,6 +8,7 @@ from ..core import dtypes, duck_array_ops, indexing from ..core.pycompat import dask_array_type +from ..core.utils import equivalent from ..core.variable import Variable @@ -148,7 +149,7 @@ def encode(self, variable, name=None): fv = encoding.get('_FillValue') mv = encoding.get('missing_value') - if (fv is not None) and (mv is not None) and (fv != mv): + if fv is not None and mv is not None and not equivalent(fv, mv): raise ValueError("Variable {!r} has multiple fill values {}. " "Cannot encode data. " .format(name, [fv, mv])) From 3aa2d9bd7d5e20228339cd828c2aad42e1cd95fa Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 29 May 2019 09:20:11 -0600 Subject: [PATCH 4/6] lint fix. --- xarray/conventions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index bc32a5207fe..3f8f76b08a2 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -82,8 +82,8 @@ def maybe_encode_nonstring_dtype(var, name=None): if dtype != var.dtype: if np.issubdtype(dtype, np.integer): if (np.issubdtype(var.dtype, np.floating) and - '_FillValue' not in var.attrs and - 'missing_value' not in var.attrs): # noqa + '_FillValue' not in var.attrs and + 'missing_value' not in var.attrs): warnings.warn('saving variable %s with floating ' 'point data as an integer dtype without ' 'any _FillValue to use for NaNs' % name, From ae7ba6b339bcada7daa3f83b8f4be8606b4d82fc Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 29 May 2019 09:41:18 -0600 Subject: [PATCH 5/6] =?UTF-8?q?if=20=E2=86=92=20elif=20so=20we=20don't=20c?= =?UTF-8?q?all=20fillna=20twice?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index e21372f8d2c..6c85a847a9e 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -159,7 +159,7 @@ def encode(self, variable, name=None): if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) - if mv is not None: + elif mv is not None: fill_value = pop_to(encoding, attrs, 'missing_value', name=name) if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) From 4861e949e432811d6d6794683f33c8ce46eff1fe Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 29 May 2019 13:43:27 -0600 Subject: [PATCH 6/6] Better fix. --- xarray/coding/variables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 6c85a847a9e..c23e45e44de 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -159,9 +159,9 @@ def encode(self, variable, name=None): if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) - elif mv is not None: + if mv is not None: fill_value = pop_to(encoding, attrs, 'missing_value', name=name) - if not pd.isnull(fill_value): + if not pd.isnull(fill_value) and fv is None: data = duck_array_ops.fillna(data, fill_value) return Variable(dims, data, attrs, encoding)