pydata · maciekswat · Apr 27, 2016 · May 2, 2016 · May 2, 2016 · May 2, 2016
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -624,13 +624,18 @@ Enhancements
 Bug fixes
 ~~~~~~~~~
 
+- Fixed losing of dimansions dtype during concat operation. By
+ `Maciek Swat <https://github.com/maciekswat>`_.
+
 - Attributes were being retained by default for some resampling
   operations when they should not. With the ``keep_attrs=False`` option, they
   will no longer be retained by default. This may be backwards-incompatible
   with some scripts, but the attributes may be kept by adding the
   ``keep_attrs=True`` option. By
   `Jeremy McGibbon <https://github.com/mcgibbon>`_.
 
+- Fixed bug in arithmetic operations on DataArray objects whose dimensions
+  are numpy structured arrays or recarrays :issue:`861`, :issue:`837`.
 - Concatenating xarray objects along an axis with a MultiIndex or PeriodIndex
   preserves the nature of the index (:issue:`875`). By
   `Stephan Hoyer <https://github.com/shoyer>`_.

diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
@@ -103,6 +103,10 @@ def align(*objects, **kwargs):
         for dim in obj.dims:
             if dim not in exclude:
                 try:
+                    # GH1434
+                    # dtype is lost after obj.indexes[dim]
+                    # problem originates in  Indexes.__getitem__
+                    # in coordinates.py
                     index = obj.indexes[dim]
                 except KeyError:
                     unlabeled_dim_sizes[dim].add(obj.sizes[dim])

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -207,8 +207,29 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):
 
     dim, coord = _calc_concat_dim_coord(dim)
     datasets = [as_dataset(ds) for ds in datasets]
+
+    # GH1434
+    # constructing a dictionary that will be used to preserve dtype
+    # of the original dataset dimensions
+    dtype_dict = {}
+    for ds in datasets:
+        for dim_tuple in ds.dims.items():
+            dim_name = dim_tuple[0]
+            if dim_name != dim:
+                dtype_dict[dim_name] = ds[dim_name].dtype
+
+    # align loses original dtype of the datasets' dim variables
     datasets = align(*datasets, join='outer', copy=False, exclude=[dim])
 
+    # GH1434
+    # restoring original dtype of the datasets' dimensions
+    for ds in datasets:
+        for dim_name, dim_dtype in dtype_dict.items():
+            try:
+                ds[dim_name] = ds[dim_name].astype(dtype_dict[dim_name])
+            except KeyError:
+                pass
+
     concat_over = _calc_concat_over(datasets, dim, data_vars, coords)
 
     def insert_result_variable(k, v):

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -1212,35 +1212,94 @@ def __setitem__(self, key, value):
         raise TypeError('%s values cannot be modified' % type(self).__name__)
 
     @classmethod
-    def concat(cls, variables, dim='concat_dim', positions=None,
-               shortcut=False):
-        """Specialized version of Variable.concat for IndexVariable objects.
-
-        This exists because we want to avoid converting Index objects to NumPy
-        arrays, if possible.
+    def concat_numpy(cls, variables, positions=None):
         """
-        if not isinstance(dim, basestring):
-            dim, = dim.dims
+        Concatenates variables. Works for variables whose dtype is
+        different from numpy.object. If variables' dtype is numpy.object
+        it throws TypeError and "concat" function will use
+        concat_pandas function
+        :param variables: list of variables to concatenate
+        :return: Concatenated variables
+        """
+        variable_type_set = set(map(lambda v: type(v.data), variables))
 
-        variables = list(variables)
-        first_var = variables[0]
+        if len(variable_type_set) > 1:
+            raise TypeError('Trying to concatenate variables of '
+                            'different types')
 
-        if any(not isinstance(v, cls) for v in variables):
-            raise TypeError('IndexVariable.concat requires that all input '
-                            'variables be IndexVariable objects')
+        variable_type = list(variable_type_set)[0]
+        if not variable_type == np.ndarray:
+            raise TypeError('Can only concatenate variables whose '
+                            '_data member is ndarray')
+
+        if variables[0].dtype == np.object:
+            raise TypeError('We use concat_numpy for objects whose '
+                            'dtypes are different than numpy.object ')
+
+        indexes = [v._data for v in variables]
+
+        if not indexes:
+            data = []
+        else:
+            data = np.concatenate((indexes))
+
+            if positions is not None:
+                indices = nputils.inverse_permutation(
+                    np.concatenate(positions))
+                data = data.take(indices)
+
+        return data
 
+    @classmethod
+    def concat_pandas(cls, variables, positions=None):
+        """
+        Concatenates variables. This is generic function that
+        handles all cases for which concat_numpy does not work
+        :param variables: list of variables to concatenate
+        :return: Concatenated variables
+        """
         indexes = [v._data.array for v in variables]
 
         if not indexes:
             data = []
         else:
+
             data = indexes[0].append(indexes[1:])
 
             if positions is not None:
                 indices = nputils.inverse_permutation(
                     np.concatenate(positions))
                 data = data.take(indices)
 
+        return data
+
+    @classmethod
+    def concat(cls, variables, dim='concat_dim', positions=None,
+               shortcut=False):
+        """Specialized version of Variable.concat for
+        IndexVariable objects.
+        This exists because we want to avoid converting Index objects to NumPy
+        arrays, if possible.
+        """
+        if not isinstance(dim, basestring):
+            dim, = dim.dims
+
+        variables = list(variables)
+
+        first_var = variables[0]
+
+        if any(not isinstance(v, cls) for v in variables):
+            raise TypeError('IndexVariable.concat requires that all input '
+                            'variables be IndexVariable objects')
+
+        # GH1434
+        # Fixes bug: "xr.concat loses coordinate dtype
+        # information with recarrays in 0.9"
+        try:
+            data = cls.concat_numpy(variables, positions)
+        except TypeError:
+            data = cls.concat_pandas(variables, positions)
+
         attrs = OrderedDict(first_var.attrs)
         if not shortcut:
             for var in variables:

diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
@@ -101,7 +101,7 @@
 try:
     _SKIP_FLAKY = not pytest.config.getoption("--run-flaky")
     _SKIP_NETWORK_TESTS = not pytest.config.getoption("--run-network-tests")
-except ValueError:
+except (ValueError, AttributeError) as e:
     # Can't get config from pytest, e.g., because xarray is installed instead
     # of being run from a development version (and hence conftests.py is not
     # available). Don't run flaky tests.

diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
@@ -75,6 +75,51 @@ def rectify_dim_order(dataset):
         expected['dim1'] = dim
         self.assertDatasetIdentical(expected, concat(datasets, dim))
 
+    def test_concat_dtype_preservation(self):
+        """
+        This test checks whether concatennation of two DataArrays
+        along the axis whose dimension is numpy structured array
+        preserves dtype of the numpy structured array
+        """
+
+        p1 = np.array([('A', 180), ('B', 150), ('C', 200)],
+                      dtype=[('name', '|S256'), ('height', int)])
+        p2 = np.array([('D', 170), ('E', 250), ('F', 150)],
+                      dtype=[('name', '|S256'), ('height', int)])
+
+        data = np.arange(50, 80, 1, dtype=np.float)
+
+        dims = ['measurement', 'participant']
+
+        da1 = DataArray(
+            data.reshape(10, 3),
+            coords={
+                'measurement': np.arange(10),
+                'participant': p1,
+            },
+            dims=dims
+        )
+
+        da2 = DataArray(
+            data.reshape(10, 3),
+            coords={
+                'measurement': np.arange(10),
+                'participant': p2,
+            },
+            dims=dims
+        )
+
+        combined_1 = concat([da1, da2], dim='participant')
+
+        assert combined_1.participant.dtype == da1.participant.dtype
+        assert combined_1.measurement.dtype == da1.measurement.dtype
+
+        combined_2 = concat([da1, da2], dim='measurement')
+
+        print (combined_2.participant.dtype)
+        assert combined_2.participant.dtype == da1.participant.dtype
+        assert combined_2.measurement.dtype == da1.measurement.dtype
+
     def test_concat_data_vars(self):
         data = Dataset({'foo': ('x', np.random.randn(10))})
         objs = [data.isel(x=slice(5)), data.isel(x=slice(5, None))]