pydata · MeraX · Nov 16, 2019 · Nov 16, 2019
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -287,12 +287,12 @@ def load_dataarray(filename_or_obj, **kwargs):
 def open_dataset(
     filename_or_obj,
     group=None,
-    decode_cf=True,
+    decode_cf=None,
     mask_and_scale=None,
-    decode_times=True,
+    decode_times=None,
     autoclose=None,
-    concat_characters=True,
-    decode_coords=True,
+    concat_characters=None,
+    decode_coords=None,
     engine=None,
     chunks=None,
     lock=None,
@@ -316,19 +316,27 @@ def open_dataset(
         netCDF4 files).
     decode_cf : bool, optional
         Whether to decode these variables, assuming they were saved according
-        to CF conventions.
+        to CF conventions. Defaults to None which means decode variables while
+        allowing any of the arguments mask_and_scale, decode_times,
+        concat_characters, or decode_coords being deactivated. If decode_cf is
+        explicitly set True or False, the four options mask_and_scale,
+        decode_times, concat_characters, and decode_coords are set likewise.
+        A ValueError is raised if decode_cf conflicts with any of the four
+        mentioned options set explicitly.
     mask_and_scale : bool, optional
         If True, replace array values equal to `_FillValue` with NA and scale
         values according to the formula `original_values * scale_factor +
         add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
         taken from variable attributes (if they exist).  If the `_FillValue` or
         `missing_value` attribute contains multiple values a warning will be
         issued and all array values matching one of the multiple values will
-        be replaced by NA. mask_and_scale defaults to True except for the
-        pseudonetcdf backend.
+        be replaced by NA. mask_and_scale defaults to None which means True
+        except for the pseudonetcdf backend.
     decode_times : bool, optional
         If True, decode times encoded in the standard NetCDF datetime format
         into datetime objects. Otherwise, leave them encoded as numbers.
+        Defaults to None which means decoding unless decode_cf is False in
+        which case the time is not decoded.
     autoclose : bool, optional
         If True, automatically close files to avoid OS Error of too many files
         being open.  However, this option doesn't work with streams, e.g.,
@@ -337,10 +345,13 @@ def open_dataset(
         If True, concatenate along the last dimension of character arrays to
         form string arrays. Dimensions will only be concatenated over (and
         removed) if they have no corresponding variable and if they are only
-        used as the last dimension of character arrays.
+        used as the last dimension of character arrays. Defaults to None which
+        means concatenating unless decode_cf is False in which case characters
+        are not concatenated.
     decode_coords : bool, optional
         If True, decode the 'coordinates' attribute to identify coordinates in
-        the resulting dataset.
+        the resulting dataset.  Defaults to None which means decoding unless
+        decode_cf is False in which case coordinates are not decoded.
     engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', 'cfgrib', \
         'pseudonetcdf'}, optional
         Engine to use when reading files. If not provided, the default engine
@@ -424,15 +435,53 @@ def open_dataset(
             stacklevel=2,
         )
 
-    if mask_and_scale is None:
-        mask_and_scale = not engine == "pseudonetcdf"
-
-    if not decode_cf:
+    if decode_cf:
+        if mask_and_scale is not None and not mask_and_scale:
+            raise ValueError(
+                "cannot deactivate mask_and_scale if decode_cf=True is explicitly set"
+            )
+        if decode_times is not None and not decode_times:
+            raise ValueError(
+                "cannot deactivate decode_times if decode_cf=True is explicitly set"
+            )
+        if concat_characters is not None and not concat_characters:
+            raise ValueError(
+                "cannot deactivate concat_characters if "
+                "decode_cf=True is explicitly set"
+            )
+        if decode_coords is not None and not decode_coords:
+            raise ValueError(
+                "cannot deactivate decode_coords if decode_cf=True is explicitly set"
+            )
+    elif decode_cf is None:
+        # by default decode in general unless specific options are turned off
+        decode_cf = True
+    elif not decode_cf:
+        if mask_and_scale:
+            raise ValueError("cannot use mask_and_scale if decode_cf=False")
         mask_and_scale = False
+        if decode_times:
+            raise ValueError("cannot use decode_times if decode_cf=False")
         decode_times = False
+        if concat_characters:
+            raise ValueError("cannot use concat_characters if decode_cf=False")
         concat_characters = False
+        if decode_coords:
+            raise ValueError("cannot use decode_coords if decode_cf=False")
         decode_coords = False
 
+    if mask_and_scale is None:
+        mask_and_scale = not engine == "pseudonetcdf"
+
+    if decode_times is None:
+        decode_times = True
+
+    if concat_characters is None:
+        concat_characters = True
+
+    if decode_coords is None:
+        decode_coords = True
+
     if cache is None:
         cache = chunks is None
 

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -979,6 +979,142 @@ def test_multiindex_not_implemented(self):
             with self.roundtrip(ds):
                 pass
 
+    @requires_netCDF4
+    def test_open_dataset_decode_cf_mask_and_scale_conflicts(self):
+        with create_tmp_file() as tmp_file:
+            with nc4.Dataset(tmp_file, mode="w") as nc:
+                nc.createDimension("t", 5)
+                nc.createVariable("x", "int16", ("t",), fill_value=-1)
+                v = nc.variables["x"]
+                v.set_auto_maskandscale(False)
+                v.add_offset = 10
+                v.scale_factor = 0.1
+                v[:] = np.array([-1, -1, 0, 1, 2])
+
+            expected_decoded = create_masked_and_scaled_data()
+            expected_encoded = np.array([-1, -1, 0, 1, 2])
+
+            with open_dataset(tmp_file, decode_cf=True) as ds:
+                assert_identical(expected_decoded, ds)
+
+            with open_dataset(tmp_file, decode_cf=False) as ds:
+                assert_array_equal(expected_encoded, ds.x.values)
+
+            with open_dataset(tmp_file, decode_cf=None, mask_and_scale=True) as ds:
+                assert_identical(expected_decoded, ds)
+
+            with open_dataset(tmp_file, decode_cf=True, mask_and_scale=True) as ds:
+                assert_identical(expected_decoded, ds)
+
+            with open_dataset(tmp_file, decode_cf=False, mask_and_scale=False) as ds:
+                assert_array_equal(expected_encoded, ds.x.values)
+
+            with raises_regex(
+                ValueError, "cannot use mask_and_scale if decode_cf=False"
+            ):
+                open_dataset(tmp_file, decode_cf=False, mask_and_scale=True)
+
+            with raises_regex(
+                ValueError, "cannot deactivate mask_and_scale if decode_cf=True"
+            ):
+                open_dataset(tmp_file, decode_cf=True, mask_and_scale=False)
+
+    @requires_netCDF4
+    def test_open_dataset_decode_cf_decode_time_conflicts(self):
+        expected_decoded = np.dtype("<M8[ns]")
+        expected_encoded = np.dtype("int16")
+
+        with open_example_dataset("example_1.nc", decode_cf=True) as ds:
+            assert ds.time.dtype == np.dtype(expected_decoded)
+
+        with open_example_dataset("example_1.nc", decode_cf=False) as ds:
+            assert ds.time.dtype == expected_encoded
+
+        with open_example_dataset(
+            "example_1.nc", decode_cf=None, decode_times=True
+        ) as ds:
+            assert ds.time.dtype == expected_decoded
+
+        with open_example_dataset(
+            "example_1.nc", decode_cf=True, decode_times=True
+        ) as ds:
+            assert ds.time.dtype == expected_decoded
+
+        with open_example_dataset(
+            "example_1.nc", decode_cf=False, decode_times=False
+        ) as ds:
+            assert ds.time.dtype == expected_encoded
+
+        with raises_regex(ValueError, "cannot use decode_times if decode_cf=False"):
+            open_example_dataset("example_1.nc", decode_cf=False, decode_times=True)
+
+        with raises_regex(
+            ValueError, "cannot deactivate decode_times if decode_cf=True"
+        ):
+            open_example_dataset("example_1.nc", decode_cf=True, decode_times=False)
+
+    @requires_netCDF4
+    def test_open_dataset_decode_cf_concat_characters_conflicts(self):
+        expected_decoded = np.dtype("|S4")
+        expected_encoded = np.dtype("|S1")
+
+        with open_example_dataset("bears.nc", decode_cf=True) as ds:
+            assert ds.bears.dtype == np.dtype(expected_decoded)
+
+        with open_example_dataset("bears.nc", decode_cf=False) as ds:
+            assert ds.bears.dtype == expected_encoded
+
+        with open_example_dataset(
+            "bears.nc", decode_cf=None, concat_characters=True
+        ) as ds:
+            assert ds.bears.dtype == expected_decoded
+
+        with open_example_dataset(
+            "bears.nc", decode_cf=True, concat_characters=True
+        ) as ds:
+            assert ds.bears.dtype == expected_decoded
+
+        with open_example_dataset(
+            "bears.nc", decode_cf=False, concat_characters=False
+        ) as ds:
+            assert ds.bears.dtype == expected_encoded
+
+        with raises_regex(
+            ValueError, "cannot use concat_characters if decode_cf=False"
+        ):
+            open_example_dataset("bears.nc", decode_cf=False, concat_characters=True)
+
+        with raises_regex(
+            ValueError, "cannot deactivate concat_characters if decode_cf=True"
+        ):
+            open_example_dataset("bears.nc", decode_cf=True, concat_characters=False)
+
+    @requires_netCDF4
+    def test_open_dataset_decodeing_conflicts(self):
+        expected_decoded = np.dtype("|S4")
+        expected_encoded = np.dtype("|S1")
+
+        # individual decodings can be (de)activated without ValueError
+        with open_example_dataset(
+            "bears.nc", concat_characters=True, decode_times=False
+        ) as ds:
+            assert ds.bears.dtype == expected_decoded
+
+        with open_example_dataset(
+            "bears.nc", concat_characters=True, decode_times=True
+        ) as ds:
+            assert ds.bears.dtype == expected_decoded
+
+        with open_example_dataset(
+            "bears.nc", concat_characters=False, decode_times=True
+        ) as ds:
+            assert ds.bears.dtype == expected_encoded
+
+        with open_example_dataset(
+            "bears.nc", concat_characters=False, decode_times=False
+        ) as ds:
+            assert ds.bears.dtype == expected_encoded
+
 
 _counter = itertools.count()