From 34d4852ff8e015dc15e5fe0f5db939d6c512ed1d Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@climate.com>
Date: Wed, 14 Jan 2015 13:22:15 -0800
Subject: [PATCH 1/3] Automatic alignment on indexes in binary arithmetic

---
 xray/core/alignment.py      | 10 ++++++----
 xray/core/dataarray.py      |  8 ++++++++
 xray/core/dataset.py        |  7 +++++++
 xray/test/test_dataarray.py | 26 +++++++++++++++++---------
 xray/test/test_dataset.py   | 15 +++++++++++++++
 5 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/xray/core/alignment.py b/xray/core/alignment.py
index a1025b6d64d..d3bfb58fa5c 100644
--- a/xray/core/alignment.py
+++ b/xray/core/alignment.py
@@ -27,8 +27,7 @@ def align(*objects, **kwargs):
     objects with aligned indexes.
 
     Array from the aligned objects are suitable as input to mathematical
-    operators, because along each dimension they are indexed by the same
-    indexes.
+    operators, because along each dimension they have the same indexes.
 
     Missing values (if ``join != 'inner'``) are filled with NaN.
 
@@ -44,8 +43,8 @@ def align(*objects, **kwargs):
          - 'left': use indexes from the first object with each dimension
          - 'right': use indexes from the last object with each dimension
     copy : bool, optional
-        If `copy=True`, the returned objects contain all new variables. If
-        `copy=False` and no reindexing is required then the aligned objects
+        If ``copy=True``, the returned objects contain all new variables. If
+        ``copy=False`` and no reindexing is required then the aligned objects
         will include original variables.
 
     Returns
@@ -55,6 +54,9 @@ def align(*objects, **kwargs):
     """
     join = kwargs.pop('join', 'inner')
     copy = kwargs.pop('copy', True)
+    if kwargs:
+        raise TypeError('align() got unexpected keyword arguments: %s'
+                        % list(kwargs))
 
     if join == 'outer':
         join_indices = functools.partial(functools.reduce, operator.or_)
diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py
index 85ce7e2cd77..44259d038d4 100644
--- a/xray/core/dataarray.py
+++ b/xray/core/dataarray.py
@@ -9,6 +9,7 @@
 from . import ops
 from . import utils
 from . import variable
+from .alignment import align
 from .common import AbstractArray, AttrAccessMixin
 from .coordinates import DataArrayCoordinates, Indexes
 from .dataset import Dataset
@@ -950,6 +951,13 @@ def _binary_op(f, reflexive=False):
         def func(self, other):
             if isinstance(other, (Dataset, groupby.GroupBy)):
                 return NotImplemented
+            if hasattr(other, 'indexes'):
+                self, other = align(self, other, join='inner', copy=False)
+                empty_indexes = [d for d, s in zip(self.dims, self.shape)
+                                 if s == 0]
+                if empty_indexes:
+                    raise ValueError('no overlapping labels for some '
+                                     'dimensions: %s' % empty_indexes)
             other_coords = getattr(other, 'coords', None)
             other_variable = getattr(other, 'variable', other)
             ds = self.coords.merge(other_coords)
diff --git a/xray/core/dataset.py b/xray/core/dataset.py
index 9fb18c79f0a..d3361df00fa 100644
--- a/xray/core/dataset.py
+++ b/xray/core/dataset.py
@@ -17,6 +17,7 @@
 from . import alignment
 from . import formatting
 from .. import backends, conventions
+from .alignment import align
 from .coordinates import DatasetCoordinates, Indexes
 from .common import ImplementsDatasetReduce, AttrAccessMixin
 from .utils import Frozen, SortedKeysDict, ChainMap
@@ -1651,6 +1652,12 @@ def _binary_op(f, reflexive=False):
         def func(self, other):
             if isinstance(other, groupby.GroupBy):
                 return NotImplemented
+            if hasattr(other, 'indexes'):
+                self, other = align(self, other, join='inner', copy=False)
+                empty_indexes = [d for d, s in self.dims.items() if s == 0]
+                if empty_indexes:
+                    raise ValueError('no overlapping labels for some '
+                                     'dimensions: %s' % empty_indexes)
             other_coords = getattr(other, 'coords', None)
             ds = self.coords.merge(other_coords)
             g = f if not reflexive else lambda x, y: f(y, x)
diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py
index 77571f22c38..1956805aaf3 100644
--- a/xray/test/test_dataarray.py
+++ b/xray/test/test_dataarray.py
@@ -530,13 +530,15 @@ def test_math(self):
         self.assertDataArrayEqual(a, 0 * x + a)
         self.assertDataArrayEqual(a, a + 0 * a)
         self.assertDataArrayEqual(a, 0 * a + a)
-        # test different indices
-        b = a.copy()
-        b.coords['x'] = 3 + np.arange(10)
-        with self.assertRaisesRegexp(ValueError, 'not aligned'):
-            a + b
-        with self.assertRaisesRegexp(ValueError, 'not aligned'):
-            b + a
+
+    def test_math_automatic_alignment(self):
+        a = DataArray(range(5), [('x', range(5))])
+        b = DataArray(range(5), [('x', range(1, 6))])
+        expected = DataArray(np.ones(4), [('x', [1, 2, 3, 4])])
+        self.assertDataArrayIdentical(a - b, expected)
+
+        with self.assertRaisesRegexp(ValueError, 'no overlapping labels'):
+            a.isel(x=slice(2)) + a.isel(x=slice(2, None))
 
     def test_inplace_math_basics(self):
         x = self.x
@@ -550,6 +552,14 @@ def test_inplace_math_basics(self):
         self.assertIs(source_ndarray(b.values), x)
         self.assertDatasetIdentical(b._dataset, self.ds)
 
+    def test_inplace_math_automatic_alignment(self):
+        a = DataArray(range(5), [('x', range(5))])
+        b = DataArray(range(1, 6), [('x', range(1, 6))])
+        with self.assertRaisesRegexp(ValueError, 'not aligned'):
+            a += b
+        with self.assertRaisesRegexp(ValueError, 'not aligned'):
+            b += a
+
     def test_math_name(self):
         # Verify that name is preserved only when it can be done unambiguously.
         # The rule (copied from pandas.Series) is keep the current name only if
@@ -902,8 +912,6 @@ def test_concat(self):
 
     def test_align(self):
         self.ds['x'] = ('x', np.array(list('abcdefghij')))
-        with self.assertRaises(ValueError):
-            self.dv + self.dv[:5]
         dv1, dv2 = align(self.dv, self.dv[:5], join='inner')
         self.assertDataArrayIdentical(dv1, self.dv[:5])
         self.assertDataArrayIdentical(dv2, self.dv[:5])
diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py
index fb4ec3fad51..b77495318c7 100644
--- a/xray/test/test_dataset.py
+++ b/xray/test/test_dataset.py
@@ -1481,6 +1481,21 @@ def test_dataset_dataset_math(self):
 
         self.assertDatasetIdentical(ds == ds, ds.notnull())
 
+        subsampled = ds.isel(y=slice(2))
+        expected = 2 * subsampled
+        self.assertDatasetIdentical(expected, subsampled + ds)
+        self.assertDatasetIdentical(expected, ds + subsampled)
+
+    def test_dataset_math_automatic_alignment(self):
+        ds = self.make_example_math_dataset()
+        subset = ds.isel(x=slice(2), y=[1, 3])
+        expected = 2 * subset
+        actual = ds + subset
+        self.assertDatasetIdentical(expected, actual)
+
+        with self.assertRaisesRegexp(ValueError, 'no overlapping labels'):
+            ds.isel(x=slice(1)) + ds.isel(x=slice(1, None))
+
     def test_dataset_math_errors(self):
         ds = self.make_example_math_dataset()
 

From a0c0c89a3147c8c94d2e93f22832c7c90a9cf177 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@climate.com>
Date: Fri, 13 Feb 2015 00:25:10 -0800
Subject: [PATCH 2/3] Auto-align Dataset __init__, __setitem__, merge, update

---
 doc/whats-new.rst         |   8 ++
 xray/core/alignment.py    |  64 +++++++++++-----
 xray/core/dataset.py      | 157 ++++++++++++++++++++++----------------
 xray/test/test_dataset.py | 111 +++++++++++++++++++++++++--
 4 files changed, 250 insertions(+), 90 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index d20bd79fc03..798bbef040e 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -9,6 +9,14 @@ What's New
     import xray
     np.random.seed(123456)
 
+v0.4 (unreleased)
+-----------------
+
+.. These need tests:
+.. ``Dataset.update`` no longer updates attributes as well as variables.
+.. ``Dataset.__init__`` is not longer as strict by default
+.. TODO: auto-align Dataset arithmetic
+
 v0.3.2 (23 December, 2014)
 --------------------------
 
diff --git a/xray/core/alignment.py b/xray/core/alignment.py
index d3bfb58fa5c..e25941c18e7 100644
--- a/xray/core/alignment.py
+++ b/xray/core/alignment.py
@@ -12,14 +12,39 @@
 from .variable import as_variable, Variable, Coordinate, broadcast_variables
 
 
-def _get_all_indexes(objects):
+def _get_joiner(join):
+    if join == 'outer':
+        return functools.partial(functools.reduce, operator.or_)
+    elif join == 'inner':
+        return functools.partial(functools.reduce, operator.and_)
+    elif join == 'left':
+        return operator.itemgetter(0)
+    elif join == 'right':
+        return operator.itemgetter(-1)
+    else:
+        raise ValueError('invalid value for join: %s' % join)
+
+
+def _get_all_indexes(objects, exclude=set()):
     all_indexes = defaultdict(list)
     for obj in objects:
         for k, v in iteritems(obj.indexes):
-            all_indexes[k].append(v)
+            if k not in exclude:
+                all_indexes[k].append(v)
     return all_indexes
 
 
+def _join_indexes(join, objects, exclude=set()):
+    joiner = _get_joiner(join)
+    indexes = _get_all_indexes(objects, exclude=exclude)
+    # exclude dimensions with all equal indices (the usual case) to avoid
+    # unnecessary reindexing work.
+    # TODO: don't bother to check equals for left or right joins
+    joined_indexes = dict((k, joiner(v)) for k, v in iteritems(indexes)
+                          if any(not v[0].equals(idx) for idx in v[1:]))
+    return joined_indexes
+
+
 def align(*objects, **kwargs):
     """align(*objects, join='inner', copy=True)
 
@@ -38,10 +63,10 @@ def align(*objects, **kwargs):
     join : {'outer', 'inner', 'left', 'right'}, optional
         Method for joining the indexes of the passed objects along each
         dimension:
-         - 'outer': use the union of object indexes
-         - 'inner': use the intersection of object indexes
-         - 'left': use indexes from the first object with each dimension
-         - 'right': use indexes from the last object with each dimension
+        - 'outer': use the union of object indexes
+        - 'inner': use the intersection of object indexes
+        - 'left': use indexes from the first object with each dimension
+        - 'right': use indexes from the last object with each dimension
     copy : bool, optional
         If ``copy=True``, the returned objects contain all new variables. If
         ``copy=False`` and no reindexing is required then the aligned objects
@@ -58,22 +83,23 @@ def align(*objects, **kwargs):
         raise TypeError('align() got unexpected keyword arguments: %s'
                         % list(kwargs))
 
-    if join == 'outer':
-        join_indices = functools.partial(functools.reduce, operator.or_)
-    elif join == 'inner':
-        join_indices = functools.partial(functools.reduce, operator.and_)
-    elif join == 'left':
-        join_indices = operator.itemgetter(0)
-    elif join == 'right':
-        join_indices = operator.itemgetter(-1)
+    joined_indexes = _join_indexes(join, objects)
+    return tuple(obj.reindex(copy=copy, **joined_indexes) for obj in objects)
 
-    all_indexes = _get_all_indexes(objects)
 
-    # Exclude dimensions with all equal indices to avoid unnecessary reindexing
-    # work.
-    joined_indexes = dict((k, join_indices(v)) for k, v in iteritems(all_indexes)
-                          if any(not v[0].equals(idx) for idx in v[1:]))
+def partial_align(*objects, **kwargs):
+    """partial_align(*objects, join='inner', copy=True, exclude=set()
+
+    Like align, but don't align along dimensions in exclude. Not public API.
+    """
+    join = kwargs.pop('join', 'inner')
+    copy = kwargs.pop('copy', True)
+    exclude = kwargs.pop('exclude', set())
+    if kwargs:
+        raise TypeError('align() got unexpected keyword arguments: %s'
+                        % list(kwargs))
 
+    joined_indexes = _join_indexes(join, objects, exclude=exclude)
     return tuple(obj.reindex(copy=copy, **joined_indexes) for obj in objects)
 
 
diff --git a/xray/core/dataset.py b/xray/core/dataset.py
index d3361df00fa..c68e7b07409 100644
--- a/xray/core/dataset.py
+++ b/xray/core/dataset.py
@@ -17,7 +17,7 @@
 from . import alignment
 from . import formatting
 from .. import backends, conventions
-from .alignment import align
+from .alignment import align, partial_align
 from .coordinates import DatasetCoordinates, Indexes
 from .common import ImplementsDatasetReduce, AttrAccessMixin
 from .utils import Frozen, SortedKeysDict, ChainMap
@@ -172,7 +172,18 @@ def _as_dataset_variable(name, var):
     return var
 
 
-def _expand_arrays(raw_variables, old_variables={}, compat='identical'):
+def _align_variables(arrays, join='outer'):
+    """Align all DataArrays in the provided dict, leaving other values alone.
+    """
+    alignable = [k for k, v in arrays.items() if hasattr(v, 'indexes')]
+    aligned = align(*[arrays[a] for a in alignable],
+                    join=join, copy=False)
+    new_arrays = OrderedDict(arrays)
+    new_arrays.update(zip(alignable, aligned))
+    return new_arrays
+
+
+def _expand_variables(raw_variables, old_variables={}, compat='identical'):
     """Expand a dictionary of variables.
 
     Returns a dictionary of Variable objects suitable for inserting into a
@@ -190,15 +201,16 @@ def _expand_arrays(raw_variables, old_variables={}, compat='identical'):
     variables = ChainMap(new_variables, old_variables)
 
     def add_variable(name, var):
+        var = _as_dataset_variable(name, var)
         if name not in variables:
-            variables[name] = _as_dataset_variable(name, var)
+            variables[name] = var
+            new_coord_names.update(variables[name].dims)
         else:
             if not getattr(variables[name], compat)(var):
                 raise ValueError('conflicting value for variable %s:\n'
                                  'first value: %r\nsecond value: %r'
                                  % (name, variables[name], var))
             if compat == 'broadcast_equals':
-                new_dims = _as_dataset_variable(name, var).dims
                 common_dims = OrderedDict(zip(variables[name].dims,
                                               variables[name].shape))
                 common_dims.update(zip(var.dims, var.shape))
@@ -210,8 +222,10 @@ def add_variable(name, var):
             new_coord_names.update(var.coords)
             for dim, coord in iteritems(var.coords):
                 if dim != name:
-                    add_variable(dim, coord)
+                    add_variable(dim, coord.variable)
+            var = var.variable
         add_variable(name, var)
+
     return new_variables, new_coord_names
 
 
@@ -239,19 +253,38 @@ def _calculate_dims(variables):
     return dims
 
 
-class _DatasetLike(object):
-    """A Dataset-like object that only contains a few private attributes
+def _merge_expand(aligned_self, other, overwrite_vars, compat):
+    possible_conflicts = dict((k, v) for k, v in aligned_self._arrays.items()
+                              if k not in overwrite_vars)
+    new_vars, new_coord_names = _expand_variables(other, possible_conflicts, compat)
+    replace_vars = aligned_self._arrays.copy()
+    replace_vars.update(new_vars)
+    return replace_vars, new_vars, new_coord_names
 
-    Like `as_dataset`, handles DataArrays, Datasets and dictionaries of
-    variables. The difference is that this method never creates a new Dataset
-    object, and hence is much more lightweight, avoiding any consistency
-    checks on the variables (that should be handled later).
-    """
-    def __init__(self, obj):
-        obj = getattr(obj, '_dataset', obj)
-        self._arrays = getattr(obj, '_arrays', obj)
-        self._coord_names = getattr(obj, '_coord_names', set())
-        self.attrs = getattr(obj, 'attrs', {})
+
+def _merge_dataset(self, other, overwrite_vars, compat, join):
+    aligned_self, other = partial_align(self, other, join=join, copy=False)
+
+    replace_vars, new_vars, new_coord_names = _merge_expand(
+        aligned_self, other._arrays, overwrite_vars, compat)
+    new_coord_names.update(other._coord_names)
+
+    return replace_vars, new_vars, new_coord_names
+
+
+def _merge_dict(self, other, overwrite_vars, compat, join):
+    other = _align_variables(other, join='outer')
+
+    alignable = [k for k, v in other.items() if hasattr(v, 'indexes')]
+    aligned = partial_align(self, *[other[a] for a in alignable],
+                            join=join, copy=False, exclude=overwrite_vars)
+
+    aligned_self = aligned[0]
+
+    other = OrderedDict(other)
+    other.update(zip(alignable, aligned[1:]))
+
+    return _merge_expand(aligned_self, other, overwrite_vars, compat)
 
 
 def _assert_empty(args, msg='%s'):
@@ -324,7 +357,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, AttrAccessMixin):
     _attrs = None
     _arrays = Frozen({})
 
-    def __init__(self, variables=None, coords=None, attrs=None):
+    def __init__(self, variables=None, coords=None, attrs=None,
+                 compat='broadcast_equals'):
         """To load data from a file or file-like object, use the `open_dataset`
         function.
 
@@ -359,12 +393,12 @@ def __init__(self, variables=None, coords=None, attrs=None):
         if coords is None:
             coords = set()
         if variables or coords:
-            self._set_init_vars_and_dims(variables, coords)
+            self._set_init_vars_and_dims(variables, coords, compat)
         if attrs is not None:
             self.attrs = attrs
 
-    def _add_missing_coords(self):
-        """Add missing coordinates IN-PLACE to _arrays
+    def _add_missing_coords_inplace(self):
+        """Add missing coordinates to self._arrays
         """
         for dim, size in iteritems(self.dims):
             if dim not in self._arrays:
@@ -399,25 +433,23 @@ def _update_vars_and_coords(self, new_arrays, new_coord_names={},
         # all checks are complete: it's safe to update
         self._arrays = arrays
         self._dims = dims
-        self._add_missing_coords()
-        self._coord_names.update(dims)
+        self._add_missing_coords_inplace()
         self._coord_names.update(new_coord_names)
 
-    def _set_init_vars_and_dims(self, vars, coords):
+    def _set_init_vars_and_dims(self, vars, coords, compat):
         """Set the initial value of Dataset arrays and dimensions
         """
         _assert_empty([k for k in vars if k in coords],
                       'redundant variables and coordinates: %s')
         arrays = ChainMap(vars, coords)
 
-        new_arrays, new_coord_names = _expand_arrays(arrays)
-        _assert_empty([k for k in new_coord_names if k not in new_arrays],
-                      'no matching variables exist for some coordinates: %s')
+        aligned = _align_variables(arrays)
+        new_variables, new_coord_names = _expand_variables(aligned,
+                                                           compat=compat)
 
         new_coord_names.update(coords)
-        self._update_vars_and_coords(new_arrays, new_coord_names,
-                                     needs_copy=False,
-                                     check_coord_names=False)
+        self._update_vars_and_coords(new_variables, new_coord_names,
+                                     needs_copy=False, check_coord_names=False)
 
     @classmethod
     def load_store(cls, store, decoder=None):
@@ -682,7 +714,7 @@ def __setitem__(self, key, value):
         if utils.is_dict_like(key):
             raise NotImplementedError('cannot yet use a dictionary as a key '
                                       'to set Dataset values')
-        self.merge({key: value}, inplace=True, overwrite_vars=[key])
+        self.update({key: value})
 
     def __delitem__(self, key):
         """Remove a variable from this dataset.
@@ -1079,8 +1111,7 @@ def rename(self, name_dict, inplace=False):
         return obj
 
     def update(self, other, inplace=True):
-        """Update this dataset's variables and attributes with those from
-        another dataset.
+        """Update this dataset's variables with those from another dataset.
 
         Parameters
         ----------
@@ -1101,14 +1132,11 @@ def update(self, other, inplace=True):
             If any dimensions would have inconsistent sizes in the updated
             dataset.
         """
-        other = _DatasetLike(other)
-        obj = self.merge(other, inplace=inplace,
-                         overwrite_vars=other._arrays)
-        obj.attrs.update(other.attrs)
-        return obj
+        return self.merge(
+            other, inplace=inplace, overwrite_vars=list(other), join='left')
 
     def merge(self, other, inplace=False, overwrite_vars=set(),
-              compat='broadcast_equals'):
+              compat='broadcast_equals', join='outer'):
         """Merge the arrays of two datasets into a single dataset.
 
         This method generally not allow for overriding data, with the exception
@@ -1135,6 +1163,13 @@ def merge(self, other, inplace=False, overwrite_vars=set(),
             - 'equals': all values and dimensions must be the same.
             - 'identical': all values, dimensions and attributes must be the
               same.
+        join : {'outer', 'inner', 'left', 'right'}, optional
+            Method for joining ``self`` and ``other`` along shared dimensions:
+
+            - 'outer': use the union of the indexes
+            - 'inner': use the intersection of the indexes
+            - 'left': use indexes from ``self``
+            - 'right': use indexes from ``other``
 
         Returns
         -------
@@ -1149,34 +1184,26 @@ def merge(self, other, inplace=False, overwrite_vars=set(),
         if compat not in ['broadcast_equals', 'equals', 'identical']:
             raise ValueError("compat=%r invalid: must be 'broadcast_equals', "
                              "'equals' or 'identical'" % compat)
-        other = _DatasetLike(other)
 
-        # determine variables to check for conflicts
-        if not overwrite_vars:
-            potential_conflicts = self._arrays
-        else:
-            if isinstance(overwrite_vars, basestring):
-                overwrite_vars = set([overwrite_vars])
-            else:
-                overwrite_vars = set(overwrite_vars)
-            potential_conflicts = dict((k, v) for k, v in iteritems(self._arrays)
-                                       if k not in overwrite_vars)
-
-        new_variables, new_coord_names = _expand_arrays(
-            other._arrays, potential_conflicts, compat)
-        new_coord_names |= other._coord_names
-
-        _assert_empty([k for k in other._arrays
-                       if k in potential_conflicts
-                       and k not in new_coord_names
-                       and k in self.coords],
-                      'variables with these names already exist as '
-                      'coordinates: %s')
-
-        # update variables
+        if isinstance(overwrite_vars, basestring):
+            overwrite_vars = [overwrite_vars]
+        overwrite_vars = set(overwrite_vars)
+
+        merge = _merge_dataset if isinstance(other, Dataset) else _merge_dict
+
+        replace_vars, new_vars, new_coord_names = merge(
+            self, other, overwrite_vars, compat=compat, join=join)
+
+        newly_coords = new_coord_names & (set(self) - set(self.coords))
+        no_longer_coords = set(self.coords) & (set(new_vars) - new_coord_names)
+        ambiguous_coords = (newly_coords | no_longer_coords) - overwrite_vars
+        if ambiguous_coords:
+            raise ValueError('cannot merge: the following variables are '
+                             'coordinates on one dataset but not the other: %s'
+                             % list(ambiguous_coords))
+
         obj = self if inplace else self.copy()
-        obj._update_vars_and_coords(new_variables, new_coord_names,
-                                    needs_copy=inplace)
+        obj._update_vars_and_coords(replace_vars, new_coord_names)
         return obj
 
     def _assert_all_in_dataset(self, names, virtual_okay=False):
diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py
index b77495318c7..5b25f99b27f 100644
--- a/xray/test/test_dataset.py
+++ b/xray/test/test_dataset.py
@@ -141,6 +141,46 @@ def test_constructor_0d(self):
             actual = Dataset({'x': arg})
             self.assertDatasetIdentical(expected, actual)
 
+    def test_constructor_auto_align(self):
+        a = DataArray([1, 2], [('x', [0, 1])])
+        b = DataArray([3, 4], [('x', [1, 2])])
+
+        # verify align uses outer join
+        expected = Dataset({'a': ('x', [1, 2, np.nan]),
+                            'b': ('x', [np.nan, 3, 4])})
+        actual = Dataset({'a': a, 'b': b})
+        self.assertDatasetIdentical(expected, actual)
+
+        # var with different dimensions
+        c = ('y', [3, 4])
+        expected2 = expected.merge({'c': c})
+        actual = Dataset({'a': a, 'b': b, 'c': c})
+        self.assertDatasetIdentical(expected2, actual)
+
+        # var that is only aligned against the aligned variables
+        d = ('x', [3, 2, 1])
+        expected3 = expected.merge({'d': d})
+        actual = Dataset({'a': a, 'b': b, 'd': d})
+        self.assertDatasetIdentical(expected3, actual)
+
+        e = ('x', [0, 0])
+        with self.assertRaisesRegexp(ValueError, 'conflicting sizes'):
+            Dataset({'a': a, 'b': b, 'e': e})
+
+    def test_constructor_compat(self):
+        data = {'x': DataArray(0, coords={'y': 1}), 'y': ('z', [1, 1, 1])}
+        with self.assertRaisesRegexp(ValueError, 'conflicting value'):
+            Dataset(data, compat='equals')
+        expected = Dataset({'x': 0}, {'y': ('z', [1, 1, 1])})
+        actual = Dataset(data)
+        self.assertDatasetIdentical(expected, actual)
+        actual = Dataset(data, compat='broadcast_equals')
+        self.assertDatasetIdentical(expected, actual)
+
+        data = {'x': DataArray(0, coords={'y': 3}), 'y': ('z', [1, 1, 1])}
+        with self.assertRaisesRegexp(ValueError, 'conflicting value'):
+            Dataset(data)
+
     def test_constructor_with_coords(self):
         with self.assertRaisesRegexp(ValueError, 'redundant variables and co'):
             Dataset({'a': ('x', [1])}, {'a': ('x', [1])})
@@ -731,6 +771,28 @@ def test_update(self):
         self.assertIsNot(actual, expected)
         self.assertDatasetIdentical(expected, actual)
 
+        other = Dataset(attrs={'new': 'attr'})
+        actual = data.copy()
+        actual.update(other)
+        self.assertDatasetIdentical(expected, actual)
+
+    def test_update_auto_align(self):
+        ds = Dataset({'x': ('t', [3, 4])})
+
+        expected = Dataset({'x': ('t', [3, 4]), 'y': ('t', [np.nan, 5])})
+        actual = ds.copy()
+        other = {'y': ('t', [5]), 't': [1]}
+        with self.assertRaisesRegexp(ValueError, 'conflicting sizes'):
+            actual.update(other)
+        actual.update(Dataset(other))
+        self.assertDatasetIdentical(expected, actual)
+
+        actual = ds.copy()
+        other = Dataset({'y': ('t', [5]), 't': [100]})
+        actual.update(other)
+        expected = Dataset({'x': ('t', [3, 4]), 'y': ('t', [np.nan] * 2)})
+        self.assertDatasetIdentical(expected, actual)
+
     def test_merge(self):
         data = create_test_data()
         ds1 = data[['var1']]
@@ -749,13 +811,11 @@ def test_merge(self):
         actual = data.merge(data.reset_coords(drop=True))
         self.assertDatasetIdentical(data, actual)
 
-        with self.assertRaises(ValueError):
-            ds1.merge(ds2.isel(dim1=slice(2)))
         with self.assertRaises(ValueError):
             ds1.merge(ds2.rename({'var3': 'var1'}))
-        with self.assertRaisesRegexp(ValueError, 'coordinates with these'):
+        with self.assertRaisesRegexp(ValueError, 'cannot merge'):
             data.reset_coords().merge(data)
-        with self.assertRaisesRegexp(ValueError, 'variables with these'):
+        with self.assertRaisesRegexp(ValueError, 'cannot merge'):
             data.merge(data.reset_coords())
 
     def test_merge_broadcast_equals(self):
@@ -795,6 +855,22 @@ def test_merge_compat(self):
         with self.assertRaisesRegexp(ValueError, 'compat=\S+ invalid'):
             ds1.merge(ds2, compat='foobar')
 
+    def test_merge_auto_align(self):
+        ds1 = Dataset({'a': ('x', [1, 2])})
+        ds2 = Dataset({'b': ('x', [3, 4]), 'x': [1, 2]})
+        expected = Dataset({'a': ('x', [1, 2, np.nan]),
+                            'b': ('x', [np.nan, 3, 4])})
+        self.assertDatasetIdentical(expected, ds1.merge(ds2))
+        self.assertDatasetIdentical(expected, ds2.merge(ds1))
+
+        expected = expected.isel(x=slice(2))
+        self.assertDatasetIdentical(expected, ds1.merge(ds2, join='left'))
+        self.assertDatasetIdentical(expected, ds2.merge(ds1, join='right'))
+
+        expected = expected.isel(x=slice(1, 2))
+        self.assertDatasetIdentical(expected, ds1.merge(ds2, join='inner'))
+        self.assertDatasetIdentical(expected, ds2.merge(ds1, join='inner'))
+
     def test_getitem(self):
         data = create_test_data()
         self.assertIsInstance(data['var1'], DataArray)
@@ -869,7 +945,7 @@ def test_setitem(self):
         data2['scalar'] = ([], 0)
         self.assertDatasetIdentical(data1, data2)
         # can't use the same dimension name as a scalar var
-        with self.assertRaisesRegexp(ValueError, 'already exists as a scalar'):
+        with self.assertRaisesRegexp(ValueError, 'cannot merge'):
             data1['newvar'] = ('scalar', [3, 4, 5])
         # can't resize a used dimension
         with self.assertRaisesRegexp(ValueError, 'conflicting sizes'):
@@ -881,6 +957,29 @@ def test_setitem(self):
         with self.assertRaises(NotImplementedError):
             data1[{'x': 0}] = 0
 
+    def test_setitem_auto_align(self):
+        ds = Dataset()
+        ds['x'] = ('y', range(3))
+        ds['y'] = 1 + np.arange(3)
+        expected = Dataset({'x': ('y', range(3)), 'y': 1 + np.arange(3)})
+        self.assertDatasetIdentical(ds, expected)
+
+        ds['y'] = DataArray(range(3), dims='y')
+        expected = Dataset({'x': ('y', range(3))})
+        self.assertDatasetIdentical(ds, expected)
+
+        ds['x'] = DataArray([1, 2], dims='y')
+        expected = Dataset({'x': ('y', [1, 2, np.nan])})
+        self.assertDatasetIdentical(ds, expected)
+
+        ds['x'] = 42
+        expected = Dataset({'x': 42, 'y': range(3)})
+        self.assertDatasetIdentical(ds, expected)
+
+        ds['x'] = DataArray([4, 5, 6, 7], dims='y')
+        expected = Dataset({'x': ('y', [4, 5, 6])})
+        self.assertDatasetIdentical(ds, expected)
+
     def test_delitem(self):
         data = create_test_data()
         all_items = set(data)
@@ -1486,7 +1585,7 @@ def test_dataset_dataset_math(self):
         self.assertDatasetIdentical(expected, subsampled + ds)
         self.assertDatasetIdentical(expected, ds + subsampled)
 
-    def test_dataset_math_automatic_alignment(self):
+    def test_dataset_math_auto_align(self):
         ds = self.make_example_math_dataset()
         subset = ds.isel(x=slice(2), y=[1, 3])
         expected = 2 * subset

From 6ccf629511dce9ef3fb6074fe2133d3277b506da Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@climate.com>
Date: Fri, 13 Feb 2015 00:54:27 -0800
Subject: [PATCH 3/3] Auto-alignment of variables for Dataset binary ops

---
 doc/whats-new.rst         | 11 +++++++----
 xray/core/dataset.py      | 21 +++++++++++++++------
 xray/test/test_dataset.py | 17 +++++++++++++++--
 3 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 798bbef040e..ebff3eba79d 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -12,10 +12,13 @@ What's New
 v0.4 (unreleased)
 -----------------
 
-.. These need tests:
-.. ``Dataset.update`` no longer updates attributes as well as variables.
-.. ``Dataset.__init__`` is not longer as strict by default
-.. TODO: auto-align Dataset arithmetic
+Highlights
+~~~~~~~~~~
+
+- Automatic alignment of index labels in arithmetic, dataset cosntruction and
+  merging.
+- Aggregation operations skip missing values by default.
+- Lots of bug fixes.
 
 v0.3.2 (23 December, 2014)
 --------------------------
diff --git a/xray/core/dataset.py b/xray/core/dataset.py
index c68e7b07409..d9ac8c0e48b 100644
--- a/xray/core/dataset.py
+++ b/xray/core/dataset.py
@@ -215,6 +215,7 @@ def add_variable(name, var):
                                               variables[name].shape))
                 common_dims.update(zip(var.dims, var.shape))
                 variables[name] = variables[name].set_dims(common_dims)
+                new_coord_names.update(var.dims)
 
     for name, var in iteritems(raw_variables):
         if hasattr(var, 'coords'):
@@ -1718,15 +1719,23 @@ def _calculate_binary_op(f, dataset, other, dest_vars):
     if utils.is_dict_like(other):
         other_arrays = getattr(other, '_arrays', other)
         other_vars = getattr(other, 'vars', other)
-        if set(dataset_vars) != set(other_vars):
-            raise ValueError('Datasets do not have the same variables: '
-                             '%s, %s' % (list(dataset_vars), list(other_vars)))
+        performed_op = False
         for k in dataset_vars:
-            dest_vars[k] = f(dataset_arrays[k], other_arrays[k])
+            if k in other_vars:
+                dest_vars[k] = f(dataset_arrays[k], other_arrays[k])
+                performed_op = True
+            elif k in dest_vars:
+                # we are doing an in-place operation
+                raise ValueError('datasets must have the same variables for '
+                                 'in-place arithmetic operations: %s, %s'
+                                 % (list(dataset_vars), list(other_vars)))
+        if not performed_op:
+            raise ValueError('datasets have no overlapping variables: %s, %s'
+                             % (list(dataset_vars), list(other_vars)))
     else:
-        other_arrays = getattr(other, 'variable', other)
+        other_variable = getattr(other, 'variable', other)
         for k in dataset_vars:
-            dest_vars[k] = f(dataset_arrays[k], other_arrays)
+            dest_vars[k] = f(dataset_arrays[k], other_variable)
 
 
 ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False)
diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py
index 5b25f99b27f..4e918460e72 100644
--- a/xray/test/test_dataset.py
+++ b/xray/test/test_dataset.py
@@ -1595,6 +1595,19 @@ def test_dataset_math_auto_align(self):
         with self.assertRaisesRegexp(ValueError, 'no overlapping labels'):
             ds.isel(x=slice(1)) + ds.isel(x=slice(1, None))
 
+        actual = ds + ds[['bar']]
+        expected = (2 * ds[['bar']]).merge(ds.coords)
+        self.assertDatasetIdentical(expected, actual)
+
+        with self.assertRaisesRegexp(ValueError, 'no overlapping variables'):
+            ds + Dataset()
+
+        with self.assertRaisesRegexp(ValueError, 'no overlapping variables'):
+            Dataset() + Dataset()
+
+        # maybe unary arithmetic with empty datasets should raise instead?
+        self.assertDatasetIdentical(Dataset() + 1, Dataset())
+
     def test_dataset_math_errors(self):
         ds = self.make_example_math_dataset()
 
@@ -1602,8 +1615,8 @@ def test_dataset_math_errors(self):
             ds['foo'] += ds
         with self.assertRaises(TypeError):
             ds['foo'].variable += ds
-        with self.assertRaisesRegexp(ValueError, 'do not have the same'):
-            ds + ds[['bar']]
+        with self.assertRaisesRegexp(ValueError, 'must have the same'):
+            ds += ds[['bar']]
 
         # verify we can rollback in-place operations if something goes wrong
         # nb. inplace datetime64 math actually will work with an integer array