diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fb9d9dfa910..9ac671d5858 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -68,7 +68,15 @@ Enhancements - :py:meth:`pandas.Series.dropna` is now supported for a :py:class:`pandas.Series` indexed by a :py:class:`~xarray.CFTimeIndex` (:issue:`2688`). By `Spencer Clark `_. - +- :py:meth:`~xarray.open_dataset` now accepts a ``use_cftime`` argument, which + can be used to require that ``cftime.datetime`` objects are always used, or + never used when decoding dates encoded with a standard calendar. This can be + used to ensure consistent date types are returned when using + :py:meth:`~xarray.open_mfdataset` (:issue:`1263`) and/or to silence + serialization warnings raised if dates from a standard calendar are found to + be outside the :py:class:`pandas.Timestamp`-valid range (:issue:`2754`). By + `Spencer Clark `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e52f47a0841..61efcfdedf2 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -161,7 +161,7 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, mask_and_scale=None, decode_times=True, autoclose=None, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, - backend_kwargs=None): + backend_kwargs=None, use_cftime=None): """Load and decode a dataset from a file or file-like object. Parameters @@ -231,6 +231,16 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, A dictionary of keyword arguments to pass on to the backend. This may be useful when backend options would improve performance or allow user control of dataset processing. + use_cftime: bool, optional + Only relevant if encoded dates come from a standard calendar + (e.g. 'gregorian', 'proleptic_gregorian', 'standard', or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. Returns ------- @@ -269,7 +279,7 @@ def maybe_decode_store(store, lock=False): ds = conventions.decode_cf( store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, - drop_variables=drop_variables) + drop_variables=drop_variables, use_cftime=use_cftime) _protect_dataset_variables_inplace(ds, cache) @@ -284,7 +294,8 @@ def maybe_decode_store(store, lock=False): mtime = None token = tokenize(filename_or_obj, mtime, group, decode_cf, mask_and_scale, decode_times, concat_characters, - decode_coords, engine, chunks, drop_variables) + decode_coords, engine, chunks, drop_variables, + use_cftime) name_prefix = 'open_dataset-%s' % token ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token) ds2._file_obj = ds._file_obj @@ -360,7 +371,7 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True, mask_and_scale=None, decode_times=True, autoclose=None, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, - backend_kwargs=None): + backend_kwargs=None, use_cftime=None): """Open an DataArray from a netCDF file containing a single data variable. This is designed to read netCDF files with only one data variable. If @@ -428,6 +439,16 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True, A dictionary of keyword arguments to pass on to the backend. This may be useful when backend options would improve performance or allow user control of dataset processing. + use_cftime: bool, optional + Only relevant if encoded dates come from a standard calendar + (e.g. 'gregorian', 'proleptic_gregorian', 'standard', or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. Notes ----- @@ -450,7 +471,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True, decode_coords=decode_coords, engine=engine, chunks=chunks, lock=lock, cache=cache, drop_variables=drop_variables, - backend_kwargs=backend_kwargs) + backend_kwargs=backend_kwargs, + use_cftime=use_cftime) if len(dataset.data_vars) != 1: raise ValueError('Given file dataset contains more than one data ' diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 459e9e0956d..02303a3edc3 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -80,32 +80,7 @@ def _unpack_netcdf_time_units(units): return delta_units, ref_date -def _decode_datetime_with_cftime(num_dates, units, calendar): - cftime = _import_cftime() - - if cftime.__name__ == 'cftime': - dates = np.asarray(cftime.num2date(num_dates, units, calendar, - only_use_cftime_datetimes=True)) - else: - # Must be using num2date from an old version of netCDF4 which - # does not have the only_use_cftime_datetimes option. - dates = np.asarray(cftime.num2date(num_dates, units, calendar)) - - if (dates[np.nanargmin(num_dates)].year < 1678 or - dates[np.nanargmax(num_dates)].year >= 2262): - if calendar in _STANDARD_CALENDARS: - warnings.warn( - 'Unable to decode time axis into full ' - 'numpy.datetime64 objects, continuing using dummy ' - 'cftime.datetime objects instead, reason: dates out ' - 'of range', SerializationWarning, stacklevel=3) - else: - if calendar in _STANDARD_CALENDARS: - dates = cftime_to_nptime(dates) - return dates - - -def _decode_cf_datetime_dtype(data, units, calendar): +def _decode_cf_datetime_dtype(data, units, calendar, use_cftime): # Verify that at least the first and last date can be decoded # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. @@ -115,7 +90,8 @@ def _decode_cf_datetime_dtype(data, units, calendar): last_item(values) or [0]]) try: - result = decode_cf_datetime(example_value, units, calendar) + result = decode_cf_datetime(example_value, units, calendar, + use_cftime) except Exception: calendar_msg = ('the default calendar' if calendar is None else 'calendar %r' % calendar) @@ -129,7 +105,52 @@ def _decode_cf_datetime_dtype(data, units, calendar): return dtype -def decode_cf_datetime(num_dates, units, calendar=None): +def _decode_datetime_with_cftime(num_dates, units, calendar): + cftime = _import_cftime() + + if cftime.__name__ == 'cftime': + return np.asarray(cftime.num2date(num_dates, units, calendar, + only_use_cftime_datetimes=True)) + else: + # Must be using num2date from an old version of netCDF4 which + # does not have the only_use_cftime_datetimes option. + return np.asarray(cftime.num2date(num_dates, units, calendar)) + + +def _decode_datetime_with_pandas(flat_num_dates, units, calendar): + if calendar not in _STANDARD_CALENDARS: + raise OutOfBoundsDatetime( + 'Cannot decode times from a non-standard calendar, {!r}, using ' + 'pandas.'.format(calendar)) + + delta, ref_date = _unpack_netcdf_time_units(units) + delta = _netcdf_to_numpy_timeunit(delta) + try: + ref_date = pd.Timestamp(ref_date) + except ValueError: + # ValueError is raised by pd.Timestamp for non-ISO timestamp + # strings, in which case we fall back to using cftime + raise OutOfBoundsDatetime + + # fixes: https://github.com/pydata/pandas/issues/14068 + # these lines check if the the lowest or the highest value in dates + # cause an OutOfBoundsDatetime (Overflow) error + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'invalid value encountered', + RuntimeWarning) + pd.to_timedelta(flat_num_dates.min(), delta) + ref_date + pd.to_timedelta(flat_num_dates.max(), delta) + ref_date + + # Cast input dates to integers of nanoseconds because `pd.to_datetime` + # works much faster when dealing with integers + # make _NS_PER_TIME_DELTA an array to ensure type upcasting + flat_num_dates_ns_int = (flat_num_dates.astype(np.float64) * + _NS_PER_TIME_DELTA[delta]).astype(np.int64) + + return (pd.to_timedelta(flat_num_dates_ns_int, 'ns') + ref_date).values + + +def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): """Given an array of numeric dates in netCDF format, convert it into a numpy array of date time objects. @@ -149,41 +170,30 @@ def decode_cf_datetime(num_dates, units, calendar=None): if calendar is None: calendar = 'standard' - delta, ref_date = _unpack_netcdf_time_units(units) - - try: - if calendar not in _STANDARD_CALENDARS: - raise OutOfBoundsDatetime - - delta = _netcdf_to_numpy_timeunit(delta) + if use_cftime is None: try: - ref_date = pd.Timestamp(ref_date) - except ValueError: - # ValueError is raised by pd.Timestamp for non-ISO timestamp - # strings, in which case we fall back to using cftime - raise OutOfBoundsDatetime - - # fixes: https://github.com/pydata/pandas/issues/14068 - # these lines check if the the lowest or the highest value in dates - # cause an OutOfBoundsDatetime (Overflow) error - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'invalid value encountered', - RuntimeWarning) - pd.to_timedelta(flat_num_dates.min(), delta) + ref_date - pd.to_timedelta(flat_num_dates.max(), delta) + ref_date - - # Cast input dates to integers of nanoseconds because `pd.to_datetime` - # works much faster when dealing with integers - # make _NS_PER_TIME_DELTA an array to ensure type upcasting - flat_num_dates_ns_int = (flat_num_dates.astype(np.float64) * - _NS_PER_TIME_DELTA[delta]).astype(np.int64) - - dates = (pd.to_timedelta(flat_num_dates_ns_int, 'ns') + - ref_date).values - - except (OutOfBoundsDatetime, OverflowError): + dates = _decode_datetime_with_pandas(flat_num_dates, units, + calendar) + except (OutOfBoundsDatetime, OverflowError): + dates = _decode_datetime_with_cftime( + flat_num_dates.astype(np.float), units, calendar) + + if (dates[np.nanargmin(num_dates)].year < 1678 or + dates[np.nanargmax(num_dates)].year >= 2262): + if calendar in _STANDARD_CALENDARS: + warnings.warn( + 'Unable to decode time axis into full ' + 'numpy.datetime64 objects, continuing using ' + 'cftime.datetime objects instead, reason: dates out ' + 'of range', SerializationWarning, stacklevel=3) + else: + if calendar in _STANDARD_CALENDARS: + dates = cftime_to_nptime(dates) + elif use_cftime: dates = _decode_datetime_with_cftime( flat_num_dates.astype(np.float), units, calendar) + else: + dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) return dates.reshape(num_dates.shape) @@ -383,6 +393,8 @@ def encode_cf_timedelta(timedeltas, units=None): class CFDatetimeCoder(VariableCoder): + def __init__(self, use_cftime=None): + self.use_cftime = use_cftime def encode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_encoding(variable) @@ -403,9 +415,11 @@ def decode(self, variable, name=None): if 'units' in attrs and 'since' in attrs['units']: units = pop_to(attrs, encoding, 'units') calendar = pop_to(attrs, encoding, 'calendar') - dtype = _decode_cf_datetime_dtype(data, units, calendar) + dtype = _decode_cf_datetime_dtype(data, units, calendar, + self.use_cftime) transform = partial( - decode_cf_datetime, units=units, calendar=calendar) + decode_cf_datetime, units=units, calendar=calendar, + use_cftime=self.use_cftime) data = lazy_elemwise_func(data, transform, dtype) return Variable(dims, data, attrs, encoding) diff --git a/xarray/conventions.py b/xarray/conventions.py index c1c95a6b60e..5f41639e890 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -240,7 +240,7 @@ def encode_cf_variable(var, needs_copy=True, name=None): def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True, decode_times=True, decode_endianness=True, - stack_char_dim=True): + stack_char_dim=True, use_cftime=None): """ Decodes a variable which may hold CF encoded information. @@ -270,6 +270,16 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True, Whether to stack characters into bytes along the last dimension of this array. Passed as an argument because we need to look at the full dataset to figure out if this is appropriate. + use_cftime: bool, optional + Only relevant if encoded dates come from a standard calendar + (e.g. 'gregorian', 'proleptic_gregorian', 'standard', or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. Returns ------- @@ -292,7 +302,7 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True, if decode_times: for coder in [times.CFTimedeltaCoder(), - times.CFDatetimeCoder()]: + times.CFDatetimeCoder(use_cftime=use_cftime)]: var = coder.decode(var, name=name) dimensions, data, attributes, encoding = ( @@ -346,7 +356,8 @@ def _update_bounds_attributes(variables): def decode_cf_variables(variables, attributes, concat_characters=True, mask_and_scale=True, decode_times=True, - decode_coords=True, drop_variables=None): + decode_coords=True, drop_variables=None, + use_cftime=None): """ Decode several CF encoded variables. @@ -387,7 +398,7 @@ def stackable(dim): new_vars[k] = decode_cf_variable( k, v, concat_characters=concat_characters, mask_and_scale=mask_and_scale, decode_times=decode_times, - stack_char_dim=stack_char_dim) + stack_char_dim=stack_char_dim, use_cftime=use_cftime) if decode_coords: var_attrs = new_vars[k].attrs if 'coordinates' in var_attrs: @@ -406,7 +417,8 @@ def stackable(dim): def decode_cf(obj, concat_characters=True, mask_and_scale=True, - decode_times=True, decode_coords=True, drop_variables=None): + decode_times=True, decode_coords=True, drop_variables=None, + use_cftime=None): """Decode the given Dataset or Datastore according to CF conventions into a new Dataset. @@ -430,6 +442,16 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True, A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. + use_cftime: bool, optional + Only relevant if encoded dates come from a standard calendar + (e.g. 'gregorian', 'proleptic_gregorian', 'standard', or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. Returns ------- @@ -454,7 +476,7 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True, vars, attrs, coord_names = decode_cf_variables( vars, attrs, concat_characters, mask_and_scale, decode_times, - decode_coords, drop_variables=drop_variables) + decode_coords, drop_variables=drop_variables, use_cftime=use_cftime) ds = Dataset(vars, attrs=attrs) ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars)) ds._file_obj = file_obj diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 580cecb988b..f610dba1352 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -27,6 +27,7 @@ from xarray.core.options import set_options from xarray.core.pycompat import dask_array_type from xarray.tests import mock +from xarray.coding.variables import SerializationWarning from . import ( assert_allclose, assert_array_equal, assert_equal, assert_identical, @@ -35,6 +36,8 @@ requires_pathlib, requires_pseudonetcdf, requires_pydap, requires_pynio, requires_rasterio, requires_scipy, requires_scipy_or_netCDF4, requires_zarr) +from .test_coding_times import (_STANDARD_CALENDARS, _NON_STANDARD_CALENDARS, + _ALL_CALENDARS) from .test_dataset import create_test_data try: @@ -47,6 +50,12 @@ except ImportError: pass +try: + from pandas.errors import OutOfBoundsDatetime +except ImportError: + # pandas < 0.20 + from pandas.tslib import OutOfBoundsDatetime + ON_WINDOWS = sys.platform == 'win32' @@ -3536,3 +3545,170 @@ def test_source_encoding_always_present(): original.to_netcdf(tmp) with open_dataset(tmp) as ds: assert ds.encoding['source'] == tmp + + +@requires_scipy_or_netCDF4 +@pytest.mark.parametrize('calendar', _STANDARD_CALENDARS) +def test_use_cftime_standard_calendar_default_in_range(calendar): + x = [0, 1] + time = [0, 720] + units_date = '2000-01-01' + units = 'days since 2000-01-01' + original = DataArray(x, [('time', time)], name='x') + original = original.to_dataset() + for v in ['x', 'time']: + original[v].attrs['units'] = units + original[v].attrs['calendar'] = calendar + + x_timedeltas = np.array(x).astype('timedelta64[D]') + time_timedeltas = np.array(time).astype('timedelta64[D]') + decoded_x = np.datetime64(units_date, 'ns') + x_timedeltas + decoded_time = np.datetime64(units_date, 'ns') + time_timedeltas + expected_x = DataArray(decoded_x, [('time', decoded_time)], name='x') + expected_time = DataArray(decoded_time, [('time', decoded_time)], + name='time') + + with create_tmp_file() as tmp_file: + original.to_netcdf(tmp_file) + with pytest.warns(None) as record: + with open_dataset(tmp_file) as ds: + assert_identical(expected_x, ds.x) + assert_identical(expected_time, ds.time) + assert not record + + +@requires_cftime +@requires_scipy_or_netCDF4 +@pytest.mark.parametrize('calendar', _STANDARD_CALENDARS) +@pytest.mark.parametrize('units_year', [1500, 2500]) +def test_use_cftime_standard_calendar_default_out_of_range( + calendar, + units_year): + import cftime + + x = [0, 1] + time = [0, 720] + units = 'days since {}-01-01'.format(units_year) + original = DataArray(x, [('time', time)], name='x') + original = original.to_dataset() + for v in ['x', 'time']: + original[v].attrs['units'] = units + original[v].attrs['calendar'] = calendar + + decoded_x = cftime.num2date(x, units, calendar, + only_use_cftime_datetimes=True) + decoded_time = cftime.num2date(time, units, calendar, + only_use_cftime_datetimes=True) + expected_x = DataArray(decoded_x, [('time', decoded_time)], name='x') + expected_time = DataArray(decoded_time, [('time', decoded_time)], + name='time') + + with create_tmp_file() as tmp_file: + original.to_netcdf(tmp_file) + with pytest.warns(SerializationWarning): + with open_dataset(tmp_file) as ds: + assert_identical(expected_x, ds.x) + assert_identical(expected_time, ds.time) + + +@requires_cftime +@requires_scipy_or_netCDF4 +@pytest.mark.parametrize('calendar', _ALL_CALENDARS) +@pytest.mark.parametrize('units_year', [1500, 2000, 2500]) +def test_use_cftime_true( + calendar, + units_year): + import cftime + + x = [0, 1] + time = [0, 720] + units = 'days since {}-01-01'.format(units_year) + original = DataArray(x, [('time', time)], name='x') + original = original.to_dataset() + for v in ['x', 'time']: + original[v].attrs['units'] = units + original[v].attrs['calendar'] = calendar + + decoded_x = cftime.num2date(x, units, calendar, + only_use_cftime_datetimes=True) + decoded_time = cftime.num2date(time, units, calendar, + only_use_cftime_datetimes=True) + expected_x = DataArray(decoded_x, [('time', decoded_time)], name='x') + expected_time = DataArray(decoded_time, [('time', decoded_time)], + name='time') + + with create_tmp_file() as tmp_file: + original.to_netcdf(tmp_file) + with pytest.warns(None) as record: + with open_dataset(tmp_file, use_cftime=True) as ds: + assert_identical(expected_x, ds.x) + assert_identical(expected_time, ds.time) + assert not record + + +@requires_scipy_or_netCDF4 +@pytest.mark.parametrize('calendar', _STANDARD_CALENDARS) +def test_use_cftime_false_standard_calendar_in_range(calendar): + x = [0, 1] + time = [0, 720] + units_date = '2000-01-01' + units = 'days since 2000-01-01' + original = DataArray(x, [('time', time)], name='x') + original = original.to_dataset() + for v in ['x', 'time']: + original[v].attrs['units'] = units + original[v].attrs['calendar'] = calendar + + x_timedeltas = np.array(x).astype('timedelta64[D]') + time_timedeltas = np.array(time).astype('timedelta64[D]') + decoded_x = np.datetime64(units_date, 'ns') + x_timedeltas + decoded_time = np.datetime64(units_date, 'ns') + time_timedeltas + expected_x = DataArray(decoded_x, [('time', decoded_time)], name='x') + expected_time = DataArray(decoded_time, [('time', decoded_time)], + name='time') + + with create_tmp_file() as tmp_file: + original.to_netcdf(tmp_file) + with pytest.warns(None) as record: + with open_dataset(tmp_file, use_cftime=False) as ds: + assert_identical(expected_x, ds.x) + assert_identical(expected_time, ds.time) + assert not record + + +@requires_scipy_or_netCDF4 +@pytest.mark.parametrize('calendar', _STANDARD_CALENDARS) +@pytest.mark.parametrize('units_year', [1500, 2500]) +def test_use_cftime_false_standard_calendar_out_of_range(calendar, units_year): + x = [0, 1] + time = [0, 720] + units = 'days since {}-01-01'.format(units_year) + original = DataArray(x, [('time', time)], name='x') + original = original.to_dataset() + for v in ['x', 'time']: + original[v].attrs['units'] = units + original[v].attrs['calendar'] = calendar + + with create_tmp_file() as tmp_file: + original.to_netcdf(tmp_file) + with pytest.raises((OutOfBoundsDatetime, ValueError)): + open_dataset(tmp_file, use_cftime=False) + + +@requires_scipy_or_netCDF4 +@pytest.mark.parametrize('calendar', _NON_STANDARD_CALENDARS) +@pytest.mark.parametrize('units_year', [1500, 2000, 2500]) +def test_use_cftime_false_nonstandard_calendar(calendar, units_year): + x = [0, 1] + time = [0, 720] + units = 'days since {}'.format(units_year) + original = DataArray(x, [('time', time)], name='x') + original = original.to_dataset() + for v in ['x', 'time']: + original[v].attrs['units'] = units + original[v].attrs['calendar'] = calendar + + with create_tmp_file() as tmp_file: + original.to_netcdf(tmp_file) + with pytest.raises((OutOfBoundsDatetime, ValueError)): + open_dataset(tmp_file, use_cftime=False) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 863c0378835..d40abd4acc3 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -8,13 +8,20 @@ from xarray import DataArray, Variable, coding, decode_cf from xarray.coding.times import ( _import_cftime, cftime_to_nptime, decode_cf_datetime, encode_cf_datetime) +from xarray.coding.variables import SerializationWarning from xarray.conventions import _update_bounds_attributes from xarray.core.common import contains_cftime_datetimes from xarray.testing import assert_equal from . import ( assert_array_equal, has_cftime, has_cftime_or_netCDF4, has_dask, - requires_cftime_or_netCDF4) + requires_cftime_or_netCDF4, requires_cftime) + +try: + from pandas.errors import OutOfBoundsDatetime +except ImportError: + # pandas < 0.20 + from pandas.tslib import OutOfBoundsDatetime _NON_STANDARD_CALENDARS_SET = {'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'} @@ -781,3 +788,99 @@ def test_time_units_with_timezone_roundtrip(calendar): assert result_units == expected_units assert result_calendar == calendar + + +@pytest.mark.parametrize('calendar', _STANDARD_CALENDARS) +def test_use_cftime_default_standard_calendar_in_range(calendar): + numerical_dates = [0, 1] + units = 'days since 2000-01-01' + expected = pd.date_range('2000', periods=2) + + with pytest.warns(None) as record: + result = decode_cf_datetime(numerical_dates, units, calendar) + np.testing.assert_array_equal(result, expected) + assert not record + + +@requires_cftime +@pytest.mark.parametrize('calendar', _STANDARD_CALENDARS) +@pytest.mark.parametrize('units_year', [1500, 2500]) +def test_use_cftime_default_standard_calendar_out_of_range( + calendar, + units_year): + from cftime import num2date + + numerical_dates = [0, 1] + units = 'days since {}-01-01'.format(units_year) + expected = num2date(numerical_dates, units, calendar, + only_use_cftime_datetimes=True) + + with pytest.warns(SerializationWarning): + result = decode_cf_datetime(numerical_dates, units, calendar) + np.testing.assert_array_equal(result, expected) + + +@requires_cftime +@pytest.mark.parametrize('calendar', _NON_STANDARD_CALENDARS) +@pytest.mark.parametrize('units_year', [1500, 2000, 2500]) +def test_use_cftime_default_non_standard_calendar(calendar, units_year): + from cftime import num2date + + numerical_dates = [0, 1] + units = 'days since {}-01-01'.format(units_year) + expected = num2date(numerical_dates, units, calendar, + only_use_cftime_datetimes=True) + + with pytest.warns(None) as record: + result = decode_cf_datetime(numerical_dates, units, calendar) + np.testing.assert_array_equal(result, expected) + assert not record + + +@requires_cftime +@pytest.mark.parametrize('calendar', _ALL_CALENDARS) +@pytest.mark.parametrize('units_year', [1500, 2000, 2500]) +def test_use_cftime_true(calendar, units_year): + from cftime import num2date + + numerical_dates = [0, 1] + units = 'days since {}-01-01'.format(units_year) + expected = num2date(numerical_dates, units, calendar, + only_use_cftime_datetimes=True) + + with pytest.warns(None) as record: + result = decode_cf_datetime(numerical_dates, units, calendar, + use_cftime=True) + np.testing.assert_array_equal(result, expected) + assert not record + + +@pytest.mark.parametrize('calendar', _STANDARD_CALENDARS) +def test_use_cftime_false_standard_calendar_in_range(calendar): + numerical_dates = [0, 1] + units = 'days since 2000-01-01' + expected = pd.date_range('2000', periods=2) + + with pytest.warns(None) as record: + result = decode_cf_datetime(numerical_dates, units, calendar, + use_cftime=False) + np.testing.assert_array_equal(result, expected) + assert not record + + +@pytest.mark.parametrize('calendar', _STANDARD_CALENDARS) +@pytest.mark.parametrize('units_year', [1500, 2500]) +def test_use_cftime_false_standard_calendar_out_of_range(calendar, units_year): + numerical_dates = [0, 1] + units = 'days since {}-01-01'.format(units_year) + with pytest.raises(OutOfBoundsDatetime): + decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False) + + +@pytest.mark.parametrize('calendar', _NON_STANDARD_CALENDARS) +@pytest.mark.parametrize('units_year', [1500, 2000, 2500]) +def test_use_cftime_false_non_standard_calendar(calendar, units_year): + numerical_dates = [0, 1] + units = 'days since {}-01-01'.format(units_year) + with pytest.raises(OutOfBoundsDatetime): + decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False)