From 88ee12a453af5dce948d2850b215b9d3c00e1cd0 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Mon, 5 Nov 2018 20:09:13 +0000
Subject: [PATCH 01/37] concatenates along a single dimension

---
 xarray/core/combine.py       | 51 ++++++++++++++++++++++++++++++++++++
 xarray/tests/test_combine.py | 40 ++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 6853939c02d..ded6430f4aa 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -1,6 +1,7 @@
 from __future__ import absolute_import, division, print_function
 
 import warnings
+import toolz.itertoolz as itertoolz
 
 import pandas as pd
 
@@ -369,6 +370,56 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
 _CONCAT_DIM_DEFAULT = '__infer_concat_dim__'
 
 
+def _concat_nd(combined_IDs, concat_dims):
+    """
+    Recursively concatenates an N-dimensional structure of datasets.
+
+    No checks are performed on the consistency of the datasets, concat_dims or tile_IDs,
+    because it is assumed that this has already been done.
+
+    Parameters
+    ----------
+    combined_IDs : Dict[Tuple[int, ...]], xarray.Dataset]
+        Structure containing all datasets to be concatenated with "tile_IDs" as keys, which
+        specify position within the desired final concatenated result.
+    concat_dims : sequence of str
+
+    Returns
+    -------
+
+    """
+
+    for dim in concat_dims:
+        combined_IDs = _concat_all_along_last_dim(combined_IDs, dim)
+
+    return combined_IDs.item
+
+
+def _concat_all_along_last_dim(combined_IDs, dim):
+
+    grouped = itertoolz.groupby(_rest_of_tile_id, combined_IDs.items())
+
+    new_combined_IDs = {}
+    for new_ID, group in grouped.items():
+        print(new_ID)
+        print(group)
+        to_concat = [ds for old_ID, ds in group]
+        print(to_concat)
+
+        new_combined_IDs[new_ID] = concat(to_concat, dim)
+
+    return new_combined_IDs
+
+
+def _rest_of_tile_id(single_id_ds_pair):
+
+    # probably replace with something like lambda x: x[0][1:]
+
+    tile_id, ds = single_id_ds_pair
+    tile_id_except_first_element = tile_id[1:]
+    return tile_id_except_first_element
+
+
 def auto_combine(datasets,
                  concat_dim=_CONCAT_DIM_DEFAULT,
                  compat='no_conflicts',
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 2004b1e660f..7edd00dced5 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -1,6 +1,7 @@
 from __future__ import absolute_import, division, print_function
 
 from copy import deepcopy
+from itertools import product
 
 import numpy as np
 import pandas as pd
@@ -8,6 +9,7 @@
 
 from xarray import DataArray, Dataset, Variable, auto_combine, concat
 from xarray.core.pycompat import OrderedDict, iteritems
+from xarray.core.combine import _rest_of_tile_id, _concat_all_along_last_dim
 
 from . import (
     InaccessibleArray, assert_array_equal, assert_equal, assert_identical,
@@ -396,3 +398,41 @@ def test_auto_combine_no_concat(self):
                             'y': (('baz', 'z'), [[1, 2]])},
                            {'baz': [100]})
         assert_identical(expected, actual)
+
+
+@pytest.fixture(scope='module')
+def create_combined_ids():
+    return _create_combined_ids
+
+
+def _create_combined_ids(shape):
+    tile_ids = _create_tile_ids(shape)
+    return {tile_id: create_test_data(0) for tile_id in tile_ids}
+
+
+def _create_tile_ids(shape):
+    tile_ids = product(*(range(i) for i in shape))
+    return list(tile_ids)
+
+
+class TestConcatND(object):
+    def test_get_tile_ids(self, create_combined_ids):
+        shape = (1, 2, 3)
+        combined_ids = _create_combined_ids(shape)
+        print(combined_ids.keys())
+
+        for combined, tile_id in zip(combined_ids.items(), _create_tile_ids(shape)):
+            expected_new_tile_id = tile_id[1:]
+            assert _rest_of_tile_id(combined) == expected_new_tile_id
+
+    def test_concat_once(self, create_combined_ids):
+        shape = (2,)
+        combined_ids = _create_combined_ids(shape)
+        print(combined_ids)
+
+        result = _concat_all_along_last_dim(combined_ids, 'dim1')
+        print('-------------------')
+        print(result[()])
+        expected_ds = concat([create_test_data(0), create_test_data(0)], 'dim1')
+        print(expected_ds)
+        assert_equal(result[()], expected_ds)

From 1aaa0756f6b4d0e865744ab94bf39b55ec5aab02 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Tue, 6 Nov 2018 12:42:34 +0000
Subject: [PATCH 02/37] Wrote function to find correct tile_IDs from nested
 list of datasets

---
 xarray/core/combine.py       | 81 +++++++++++++++++++++++++++---------
 xarray/testing.py            |  8 ++++
 xarray/tests/__init__.py     |  2 +-
 xarray/tests/test_combine.py | 64 ++++++++++++++++++++++++++--
 4 files changed, 130 insertions(+), 25 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index ded6430f4aa..d6e2aef6456 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -367,21 +367,69 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
         return concat(datasets, dim=dim, data_vars=data_vars, coords=coords)
 
 
-_CONCAT_DIM_DEFAULT = '__infer_concat_dim__'
+def _infer_tile_ids_from_nested_list(entry, current_pos, combined_tile_ids):
+    """
+    Given a list of lists (of lists...) of datasets, returns a dictionary
+    with the index of each dataset in the nested list structure as the key.
+
+    Recursively traverses the given structure, while keeping track of the current
+    position.
+
+    Parameters
+    ----------
+    entry : list[list[xarray.Dataset, xarray.Dataset, ...]]
+        List of lists of arbitrary depth, containing datasets in the order they
+        are to be concatenated.
+
+    Returns
+    -------
+    combined_tile_ids : dict[tuple(int, ...), xarray.Dataset]
+    """
+
+    from .dataset import Dataset
+
+    if isinstance(entry, list):
+        # Check if list is redundant
+        if len(entry) == 1:
+            raise TypeError('Redundant list nesting at '
+                            'position ' + str(current_pos))
+
+        # Dive down tree
+        current_pos.append(0)
+        for i, item in enumerate(entry):
+            current_pos[-1] = i
+            combined_tile_ids = _infer_tile_ids_from_nested_list(item, current_pos,
+                                                                 combined_tile_ids)
+        # Move back up tree
+        del current_pos[-1]
+        return combined_tile_ids
+
+    elif isinstance(entry, Dataset):
+        # Termination condition
+        combined_tile_ids[tuple(current_pos)] = entry
+        return combined_tile_ids
+
+    else:
+        raise TypeError("Element at position " + str(current_pos) +
+                        " is neither a list nor an xarray.Dataset")
+
+
+def _check_shape_tile_ids(combined_tile_ids):
+    ...
 
 
 def _concat_nd(combined_IDs, concat_dims):
     """
     Recursively concatenates an N-dimensional structure of datasets.
 
-    No checks are performed on the consistency of the datasets, concat_dims or tile_IDs,
-    because it is assumed that this has already been done.
+    No checks are performed on the consistency of the datasets, concat_dims or
+    tile_IDs, because it is assumed that this has already been done.
 
     Parameters
     ----------
     combined_IDs : Dict[Tuple[int, ...]], xarray.Dataset]
-        Structure containing all datasets to be concatenated with "tile_IDs" as keys, which
-        specify position within the desired final concatenated result.
+        Structure containing all datasets to be concatenated with "tile_IDs" as
+        keys, which specify position within the desired final concatenated result.
     concat_dims : sequence of str
 
     Returns
@@ -390,34 +438,27 @@ def _concat_nd(combined_IDs, concat_dims):
     """
 
     for dim in concat_dims:
-        combined_IDs = _concat_all_along_last_dim(combined_IDs, dim)
+        combined_IDs = _concat_all_along_first_dim(combined_IDs, dim)
 
     return combined_IDs.item
 
 
-def _concat_all_along_last_dim(combined_IDs, dim):
-
-    grouped = itertoolz.groupby(_rest_of_tile_id, combined_IDs.items())
-
+def _concat_all_along_first_dim(combined_IDs, dim):
+    grouped = itertoolz.groupby(_tile_id_except_first_element, combined_IDs.items())
     new_combined_IDs = {}
     for new_ID, group in grouped.items():
-        print(new_ID)
-        print(group)
         to_concat = [ds for old_ID, ds in group]
-        print(to_concat)
-
         new_combined_IDs[new_ID] = concat(to_concat, dim)
-
     return new_combined_IDs
 
 
-def _rest_of_tile_id(single_id_ds_pair):
-
+def _tile_id_except_first_element(single_id_ds_pair):
     # probably replace with something like lambda x: x[0][1:]
-
     tile_id, ds = single_id_ds_pair
-    tile_id_except_first_element = tile_id[1:]
-    return tile_id_except_first_element
+    return tile_id[1:]
+
+
+_CONCAT_DIM_DEFAULT = '__infer_concat_dim__'
 
 
 def auto_combine(datasets,
diff --git a/xarray/testing.py b/xarray/testing.py
index ee5a54cd7dc..03c5354cdff 100644
--- a/xarray/testing.py
+++ b/xarray/testing.py
@@ -138,3 +138,11 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True):
     else:
         raise TypeError('{} not supported by assertion comparison'
                         .format(type(a)))
+
+
+def assert_combined_tile_ids_equal(dict1, dict2):
+    assert len(dict1) == len(dict2)
+    for k, v in dict1.items():
+        assert k in dict2.keys()
+        assert_equal(dict1[k], dict2[k])
+
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index a45f71bbc3b..cd66ad82356 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -15,7 +15,7 @@
 from xarray.core import utils
 from xarray.core.indexing import ExplicitlyIndexed
 from xarray.testing import (assert_equal, assert_identical,  # noqa: F401
-                            assert_allclose)
+                            assert_allclose, assert_combined_tile_ids_equal)
 from xarray.plot.utils import import_seaborn
 
 try:
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 7edd00dced5..3ec602cb6f9 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -9,11 +9,13 @@
 
 from xarray import DataArray, Dataset, Variable, auto_combine, concat
 from xarray.core.pycompat import OrderedDict, iteritems
-from xarray.core.combine import _rest_of_tile_id, _concat_all_along_last_dim
+from xarray.core.combine import (
+    _tile_id_except_first_element, _concat_all_along_first_dim,
+    _infer_tile_ids_from_nested_list)
 
 from . import (
     InaccessibleArray, assert_array_equal, assert_equal, assert_identical,
-    raises_regex, requires_dask)
+    assert_combined_tile_ids_equal, raises_regex, requires_dask)
 from .test_dataset import create_test_data
 
 
@@ -400,6 +402,55 @@ def test_auto_combine_no_concat(self):
         assert_identical(expected, actual)
 
 
+class TestTileIDsFromNestedList(object):
+    # TODO test ordering is correct by seeding datasets differently
+    def test_1d(self):
+        ds = create_test_data(0)
+        input = [ds, ds]
+
+        expected = {(0,): ds, (1,): ds}
+        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        assert_combined_tile_ids_equal(expected, actual)
+
+    def test_2d(self):
+        ds = create_test_data(0)
+        input = [[ds, ds], [ds, ds], [ds, ds]]
+
+        expected = {(0, 0): ds, (0, 1): ds,
+                    (1, 0): ds, (1, 1): ds,
+                    (2, 0): ds, (2, 1): ds}
+        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        assert_combined_tile_ids_equal(expected, actual)
+
+    def test_3d(self):
+        ds = create_test_data(0)
+        input = [[[ds, ds], [ds, ds], [ds, ds]],
+                 [[ds, ds], [ds, ds], [ds, ds]]]
+
+        expected = {(0, 0, 0): ds, (0, 0, 1): ds,
+                    (0, 1, 0): ds, (0, 1, 1): ds,
+                    (0, 2, 0): ds, (0, 2, 1): ds,
+                    (1, 0, 0): ds, (1, 0, 1): ds,
+                    (1, 1, 0): ds, (1, 1, 1): ds,
+                    (1, 2, 0): ds, (1, 2, 1): ds}
+        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        assert_combined_tile_ids_equal(expected, actual)
+
+    def test_redundant_nesting_gotcha(self):
+        ds = create_test_data(0)
+        input = [[ds], [ds]]
+
+        expected = {(0,): ds, (1,): ds}
+        with pytest.raises(TypeError):
+            actual = _infer_tile_ids_from_nested_list(input, [], {})
+
+    def test_bad_element(self):
+        ds = create_test_data()
+        input = [ds, 'bad_element']
+        with pytest.raises(TypeError):
+            _infer_tile_ids_from_nested_list(input, [], {})
+
+
 @pytest.fixture(scope='module')
 def create_combined_ids():
     return _create_combined_ids
@@ -423,16 +474,21 @@ def test_get_tile_ids(self, create_combined_ids):
 
         for combined, tile_id in zip(combined_ids.items(), _create_tile_ids(shape)):
             expected_new_tile_id = tile_id[1:]
-            assert _rest_of_tile_id(combined) == expected_new_tile_id
+            assert _tile_id_except_first_element(combined) == expected_new_tile_id
 
     def test_concat_once(self, create_combined_ids):
         shape = (2,)
         combined_ids = _create_combined_ids(shape)
         print(combined_ids)
 
-        result = _concat_all_along_last_dim(combined_ids, 'dim1')
+        result = _concat_all_along_first_dim(combined_ids, 'dim1')
         print('-------------------')
         print(result[()])
         expected_ds = concat([create_test_data(0), create_test_data(0)], 'dim1')
         print(expected_ds)
         assert_equal(result[()], expected_ds)
+
+    @pytest.mark.skip
+    def test_concat_twice(self, create_combined_ids):
+        shape = (2, 3)
+        combined_ids = _create_combined_ids(shape)
\ No newline at end of file

From dbb371d7209011180d4621b77e93e8b1d70452da Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Wed, 7 Nov 2018 11:41:42 +0000
Subject: [PATCH 03/37] Wrote function to check that combined_tile_ids
 structure is valid

---
 xarray/core/combine.py       | 23 ++++++++++++++---
 xarray/tests/test_combine.py | 48 +++++++++++++++++++++++++++++++++---
 2 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index d6e2aef6456..37cdc3e7f12 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -392,7 +392,7 @@ def _infer_tile_ids_from_nested_list(entry, current_pos, combined_tile_ids):
         # Check if list is redundant
         if len(entry) == 1:
             raise TypeError('Redundant list nesting at '
-                            'position ' + str(current_pos))
+                            'position ' + str(tuple(current_pos)))
 
         # Dive down tree
         current_pos.append(0)
@@ -410,12 +410,29 @@ def _infer_tile_ids_from_nested_list(entry, current_pos, combined_tile_ids):
         return combined_tile_ids
 
     else:
-        raise TypeError("Element at position " + str(current_pos) +
+        raise TypeError("Element at position " + str(tuple(current_pos)) +
                         " is neither a list nor an xarray.Dataset")
 
 
 def _check_shape_tile_ids(combined_tile_ids):
-    ...
+    # TODO create custom exception class instead of using asserts?
+
+    tile_ids = combined_tile_ids.keys()
+
+    # Check all tuples are the same length
+    lengths = [len(id) for id in tile_ids]
+    assert set(lengths) == {lengths[0]}
+
+    # Check each dimension has indices 0 to n represented with no gaps
+    for dim in range(lengths[0]):
+        indices = [id[dim] for id in tile_ids]
+        assert len(indices) > 1
+        assert sorted(indices) == range(max(indices))
+
+    # Check only datasets are contained
+    from .dataset import Dataset
+    for v in combined_tile_ids.values():
+        assert isinstance(v, Dataset)
 
 
 def _concat_nd(combined_IDs, concat_dims):
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 3ec602cb6f9..91742864436 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -11,7 +11,7 @@
 from xarray.core.pycompat import OrderedDict, iteritems
 from xarray.core.combine import (
     _tile_id_except_first_element, _concat_all_along_first_dim,
-    _infer_tile_ids_from_nested_list)
+    _infer_tile_ids_from_nested_list, _check_shape_tile_ids)
 
 from . import (
     InaccessibleArray, assert_array_equal, assert_equal, assert_identical,
@@ -402,6 +402,14 @@ def test_auto_combine_no_concat(self):
         assert_identical(expected, actual)
 
 
+# TODO get this fixture to work!!
+@pytest.fixture(scope='module')
+def ds():
+    def _create_test_data(s=0):
+        return create_test_data(seed=s)
+    return _create_test_data
+
+
 class TestTileIDsFromNestedList(object):
     # TODO test ordering is correct by seeding datasets differently
     def test_1d(self):
@@ -440,9 +448,8 @@ def test_redundant_nesting_gotcha(self):
         ds = create_test_data(0)
         input = [[ds], [ds]]
 
-        expected = {(0,): ds, (1,): ds}
         with pytest.raises(TypeError):
-            actual = _infer_tile_ids_from_nested_list(input, [], {})
+            _infer_tile_ids_from_nested_list(input, [], {})
 
     def test_bad_element(self):
         ds = create_test_data()
@@ -450,6 +457,14 @@ def test_bad_element(self):
         with pytest.raises(TypeError):
             _infer_tile_ids_from_nested_list(input, [], {})
 
+    def test_ragged_input(self):
+        ds = create_test_data(0)
+        input = [ds, [ds, ds]]
+
+        expected = {(0,): ds, (1, 0): ds, (1, 1): ds}
+        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        assert_combined_tile_ids_equal(expected, actual)
+
 
 @pytest.fixture(scope='module')
 def create_combined_ids():
@@ -491,4 +506,29 @@ def test_concat_once(self, create_combined_ids):
     @pytest.mark.skip
     def test_concat_twice(self, create_combined_ids):
         shape = (2, 3)
-        combined_ids = _create_combined_ids(shape)
\ No newline at end of file
+        combined_ids = _create_combined_ids(shape)
+
+
+class TestCheckShapeTileIDs(object):
+    def test_check_lengths(self):
+        ds = create_test_data(0)
+        combined_tile_ids = {(0,): ds, (0, 1): ds}
+        with pytest.raises(AssertionError):
+            _check_shape_tile_ids(combined_tile_ids)
+
+    def test_check_non_zero_length_along_all_dims(self):
+        ds = create_test_data(0)
+        combined_tile_ids = {(0, 0): ds, (1, 0): ds}
+        with pytest.raises(AssertionError):
+            _check_shape_tile_ids(combined_tile_ids)
+
+    def test_check_linearity(self):
+        ds = create_test_data(0)
+        combined_tile_ids = {(0,): ds, (2,): ds}
+        with pytest.raises(AssertionError):
+            _check_shape_tile_ids(combined_tile_ids)
+
+    def test_check_contains_datasets(self):
+        combined_tile_ids = {(0,): 'a', (1,): 'b'}
+        with pytest.raises(AssertionError):
+            _check_shape_tile_ids(combined_tile_ids)

From cc4d7438320d67931d5e5f4d655922d3f129ee2c Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Wed, 7 Nov 2018 16:20:07 +0000
Subject: [PATCH 04/37] Added test of 2d-concatenation

---
 xarray/core/combine.py       | 10 +++++++---
 xarray/tests/test_combine.py | 26 ++++++++++++++------------
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 37cdc3e7f12..4fa98a70858 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -457,19 +457,23 @@ def _concat_nd(combined_IDs, concat_dims):
     for dim in concat_dims:
         combined_IDs = _concat_all_along_first_dim(combined_IDs, dim)
 
-    return combined_IDs.item
+    combined_ds = combined_IDs[()]
+
+    return combined_ds
 
 
 def _concat_all_along_first_dim(combined_IDs, dim):
-    grouped = itertoolz.groupby(_tile_id_except_first_element, combined_IDs.items())
+    grouped = itertoolz.groupby(_new_tile_id, combined_IDs.items())
     new_combined_IDs = {}
+
+    # TODO Would there be any point in parallelizing this concatenation step?
     for new_ID, group in grouped.items():
         to_concat = [ds for old_ID, ds in group]
         new_combined_IDs[new_ID] = concat(to_concat, dim)
     return new_combined_IDs
 
 
-def _tile_id_except_first_element(single_id_ds_pair):
+def _new_tile_id(single_id_ds_pair):
     # probably replace with something like lambda x: x[0][1:]
     tile_id, ds = single_id_ds_pair
     return tile_id[1:]
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 91742864436..392df28ddd2 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -10,8 +10,8 @@
 from xarray import DataArray, Dataset, Variable, auto_combine, concat
 from xarray.core.pycompat import OrderedDict, iteritems
 from xarray.core.combine import (
-    _tile_id_except_first_element, _concat_all_along_first_dim,
-    _infer_tile_ids_from_nested_list, _check_shape_tile_ids)
+    _new_tile_id, _concat_all_along_first_dim,
+    _infer_tile_ids_from_nested_list, _check_shape_tile_ids, _concat_nd)
 
 from . import (
     InaccessibleArray, assert_array_equal, assert_equal, assert_identical,
@@ -485,28 +485,30 @@ class TestConcatND(object):
     def test_get_tile_ids(self, create_combined_ids):
         shape = (1, 2, 3)
         combined_ids = _create_combined_ids(shape)
-        print(combined_ids.keys())
 
         for combined, tile_id in zip(combined_ids.items(), _create_tile_ids(shape)):
             expected_new_tile_id = tile_id[1:]
-            assert _tile_id_except_first_element(combined) == expected_new_tile_id
+            assert _new_tile_id(combined) == expected_new_tile_id
 
     def test_concat_once(self, create_combined_ids):
         shape = (2,)
         combined_ids = _create_combined_ids(shape)
-        print(combined_ids)
-
+        ds = create_test_data(0)
         result = _concat_all_along_first_dim(combined_ids, 'dim1')
-        print('-------------------')
-        print(result[()])
-        expected_ds = concat([create_test_data(0), create_test_data(0)], 'dim1')
-        print(expected_ds)
-        assert_equal(result[()], expected_ds)
 
-    @pytest.mark.skip
+        expected_ds = concat([ds, ds], 'dim1')
+        assert_combined_tile_ids_equal(result, {(): expected_ds})
+
     def test_concat_twice(self, create_combined_ids):
         shape = (2, 3)
         combined_ids = _create_combined_ids(shape)
+        result = _concat_nd(combined_ids, concat_dims=['dim1', 'dim2'])
+
+        ds = create_test_data(0)
+        partway = concat([ds, ds], dim='dim1')
+        expected = concat([partway, partway, partway], dim='dim2')
+
+        assert_equal(result, expected)
 
 
 class TestCheckShapeTileIDs(object):

From d2fc7e723fe4b99032f9ac4445fb3fc4404b4477 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Thu, 8 Nov 2018 08:53:11 +0000
Subject: [PATCH 05/37] Tests now check that dataset ordering is correct

---
 xarray/tests/test_combine.py | 68 ++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 37 deletions(-)

diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 392df28ddd2..f7372165eb2 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -402,45 +402,36 @@ def test_auto_combine_no_concat(self):
         assert_identical(expected, actual)
 
 
-# TODO get this fixture to work!!
-@pytest.fixture(scope='module')
-def ds():
-    def _create_test_data(s=0):
-        return create_test_data(seed=s)
-    return _create_test_data
-
-
 class TestTileIDsFromNestedList(object):
-    # TODO test ordering is correct by seeding datasets differently
     def test_1d(self):
-        ds = create_test_data(0)
-        input = [ds, ds]
+        ds = create_test_data
+        input = [ds(0), ds(1)]
 
-        expected = {(0,): ds, (1,): ds}
+        expected = {(0,): ds(0), (1,): ds(1)}
         actual = _infer_tile_ids_from_nested_list(input, [], {})
         assert_combined_tile_ids_equal(expected, actual)
 
     def test_2d(self):
-        ds = create_test_data(0)
-        input = [[ds, ds], [ds, ds], [ds, ds]]
+        ds = create_test_data
+        input = [[ds(0), ds(1)], [ds(2), ds(3)], [ds(4), ds(5)]]
 
-        expected = {(0, 0): ds, (0, 1): ds,
-                    (1, 0): ds, (1, 1): ds,
-                    (2, 0): ds, (2, 1): ds}
+        expected = {(0, 0): ds(0), (0, 1): ds(1),
+                    (1, 0): ds(2), (1, 1): ds(3),
+                    (2, 0): ds(4), (2, 1): ds(5)}
         actual = _infer_tile_ids_from_nested_list(input, [], {})
         assert_combined_tile_ids_equal(expected, actual)
 
     def test_3d(self):
-        ds = create_test_data(0)
-        input = [[[ds, ds], [ds, ds], [ds, ds]],
-                 [[ds, ds], [ds, ds], [ds, ds]]]
-
-        expected = {(0, 0, 0): ds, (0, 0, 1): ds,
-                    (0, 1, 0): ds, (0, 1, 1): ds,
-                    (0, 2, 0): ds, (0, 2, 1): ds,
-                    (1, 0, 0): ds, (1, 0, 1): ds,
-                    (1, 1, 0): ds, (1, 1, 1): ds,
-                    (1, 2, 0): ds, (1, 2, 1): ds}
+        ds = create_test_data
+        input = [[[ds(0), ds(1)], [ds(2), ds(3)], [ds(4), ds(5)]],
+                 [[ds(6), ds(7)], [ds(8), ds(9)], [ds(10), ds(11)]]]
+
+        expected = {(0, 0, 0): ds(0), (0, 0, 1): ds(1),
+                    (0, 1, 0): ds(2), (0, 1, 1): ds(3),
+                    (0, 2, 0): ds(4), (0, 2, 1): ds(5),
+                    (1, 0, 0): ds(6), (1, 0, 1): ds(7),
+                    (1, 1, 0): ds(8), (1, 1, 1): ds(9),
+                    (1, 2, 0): ds(10), (1, 2, 1): ds(11)}
         actual = _infer_tile_ids_from_nested_list(input, [], {})
         assert_combined_tile_ids_equal(expected, actual)
 
@@ -452,16 +443,16 @@ def test_redundant_nesting_gotcha(self):
             _infer_tile_ids_from_nested_list(input, [], {})
 
     def test_bad_element(self):
-        ds = create_test_data()
+        ds = create_test_data(0)
         input = [ds, 'bad_element']
         with pytest.raises(TypeError):
             _infer_tile_ids_from_nested_list(input, [], {})
 
     def test_ragged_input(self):
-        ds = create_test_data(0)
-        input = [ds, [ds, ds]]
+        ds = create_test_data
+        input = [ds(0), [ds(1), ds(2)]]
 
-        expected = {(0,): ds, (1, 0): ds, (1, 1): ds}
+        expected = {(0,): ds(0), (1, 0): ds(1), (1, 1): ds(2)}
         actual = _infer_tile_ids_from_nested_list(input, [], {})
         assert_combined_tile_ids_equal(expected, actual)
 
@@ -473,7 +464,8 @@ def create_combined_ids():
 
 def _create_combined_ids(shape):
     tile_ids = _create_tile_ids(shape)
-    return {tile_id: create_test_data(0) for tile_id in tile_ids}
+    nums = range(len(tile_ids))
+    return {tile_id: create_test_data(num) for tile_id, num in zip(tile_ids, nums)}
 
 
 def _create_tile_ids(shape):
@@ -493,10 +485,10 @@ def test_get_tile_ids(self, create_combined_ids):
     def test_concat_once(self, create_combined_ids):
         shape = (2,)
         combined_ids = _create_combined_ids(shape)
-        ds = create_test_data(0)
+        ds = create_test_data
         result = _concat_all_along_first_dim(combined_ids, 'dim1')
 
-        expected_ds = concat([ds, ds], 'dim1')
+        expected_ds = concat([ds(0), ds(1)], 'dim1')
         assert_combined_tile_ids_equal(result, {(): expected_ds})
 
     def test_concat_twice(self, create_combined_ids):
@@ -504,9 +496,11 @@ def test_concat_twice(self, create_combined_ids):
         combined_ids = _create_combined_ids(shape)
         result = _concat_nd(combined_ids, concat_dims=['dim1', 'dim2'])
 
-        ds = create_test_data(0)
-        partway = concat([ds, ds], dim='dim1')
-        expected = concat([partway, partway, partway], dim='dim2')
+        ds = create_test_data
+        partway1 = concat([ds(0), ds(3)], dim='dim1')
+        partway2 = concat([ds(1), ds(4)], dim='dim1')
+        partway3 = concat([ds(2), ds(5)], dim='dim1')
+        expected = concat([partway1, partway2, partway3], dim='dim2')
 
         assert_equal(result, expected)
 

From e3f3699a76d2ed2f88605ddd0cc2b46d6f89baa4 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Thu, 8 Nov 2018 09:11:17 +0000
Subject: [PATCH 06/37] Test concatentation along a new dimension

---
 xarray/tests/test_combine.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index f7372165eb2..f171f998a7e 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -482,13 +482,14 @@ def test_get_tile_ids(self, create_combined_ids):
             expected_new_tile_id = tile_id[1:]
             assert _new_tile_id(combined) == expected_new_tile_id
 
-    def test_concat_once(self, create_combined_ids):
+    @pytest.mark.parametrize("concat_dim", ['dim1', 'new_dim'])
+    def test_concat_once(self, create_combined_ids, concat_dim):
         shape = (2,)
         combined_ids = _create_combined_ids(shape)
         ds = create_test_data
-        result = _concat_all_along_first_dim(combined_ids, 'dim1')
+        result = _concat_all_along_first_dim(combined_ids, dim=concat_dim)
 
-        expected_ds = concat([ds(0), ds(1)], 'dim1')
+        expected_ds = concat([ds(0), ds(1)], dim=concat_dim)
         assert_combined_tile_ids_equal(result, {(): expected_ds})
 
     def test_concat_twice(self, create_combined_ids):

From 55bf6853bb41e5c8de1da749859acd9e9aa1e0b9 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Fri, 9 Nov 2018 09:14:32 +0000
Subject: [PATCH 07/37] Started generalising auto_combine to N-D by integrating
 the N-D concatentation algorithm

---
 xarray/core/combine.py | 131 ++++++++++++++++++++++++++++++-----------
 1 file changed, 97 insertions(+), 34 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 4fa98a70858..b7de0d15e0d 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -367,6 +367,30 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
         return concat(datasets, dim=dim, data_vars=data_vars, coords=coords)
 
 
+_CONCAT_DIM_DEFAULT = '__infer_concat_dim__'
+
+
+def _infer_concat_order_from_nested_list(datasets, concat_dims):
+
+    # TODO check that datasets is a list containing multiple elements
+
+    combined_ids = _infer_tile_ids_from_nested_list(datasets, [], {})
+
+    # Currently if concat_dims is not supplied then _auto_concat attempts to deduce it on every call
+    # TODO would be faster in this case to just work out the concat_dims once here
+    tile_id, ds = combined_ids[0]
+    n_dims = len(tile_id)
+    if concat_dims is None:
+        concat_dims = [_CONCAT_DIM_DEFAULT]*n_dims
+    else:
+        if len(concat_dims) != n_dims:
+            raise ValueError("concat_dims is of length " + str(len(concat_dims))
+                             + " but the datasets passed are nested in a " +
+                             str(n_dims) + "-dimensional structure")
+
+    return concat_dims, combined_ids
+
+
 def _infer_tile_ids_from_nested_list(entry, current_pos, combined_tile_ids):
     """
     Given a list of lists (of lists...) of datasets, returns a dictionary
@@ -435,9 +459,15 @@ def _check_shape_tile_ids(combined_tile_ids):
         assert isinstance(v, Dataset)
 
 
-def _concat_nd(combined_IDs, concat_dims):
+def _data_vars(combined_id):
+    id, ds = combined_id
+    return tuple(sorted(ds.data_vars))
+
+
+def _combine_nd(combined_IDs, concat_dims, data_vars='all',
+                coords='different', compat='no_conflicts'):
     """
-    Recursively concatenates an N-dimensional structure of datasets.
+    Concatenates and merges an N-dimensional structure of datasets.
 
     No checks are performed on the consistency of the datasets, concat_dims or
     tile_IDs, because it is assumed that this has already been done.
@@ -446,31 +476,34 @@ def _concat_nd(combined_IDs, concat_dims):
     ----------
     combined_IDs : Dict[Tuple[int, ...]], xarray.Dataset]
         Structure containing all datasets to be concatenated with "tile_IDs" as
-        keys, which specify position within the desired final concatenated result.
+        keys, which specify position within the desired final combined result.
     concat_dims : sequence of str
+        The dimensions along which the datasets should be concatenated. Must be
+        in order, and the length must match
 
     Returns
     -------
 
     """
 
-    for dim in concat_dims:
-        combined_IDs = _concat_all_along_first_dim(combined_IDs, dim)
-
-    combined_ds = combined_IDs[()]
+    # Organise by data variables
+    grouped_by_data_vars = itertoolz.groupby(_data_vars,
+                                             combined_IDs.items()).values()
+    concatenated_datasets = []
+    for tiled_datasets in grouped_by_data_vars:
+        concatenated_ids = tiled_datasets
 
-    return combined_ds
+        # Perform N-D dimensional concatenation
+        for concat_dim in concat_dims:
+            dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
 
+            concatenated_ids = _concat_along_first_dim(concatenated_ids,
+                                                       dim=dim,
+                                                       data_vars=data_vars,
+                                                       coords=coords)
+        concatenated_datasets.append(concatenated_ids.values())
 
-def _concat_all_along_first_dim(combined_IDs, dim):
-    grouped = itertoolz.groupby(_new_tile_id, combined_IDs.items())
-    new_combined_IDs = {}
-
-    # TODO Would there be any point in parallelizing this concatenation step?
-    for new_ID, group in grouped.items():
-        to_concat = [ds for old_ID, ds in group]
-        new_combined_IDs[new_ID] = concat(to_concat, dim)
-    return new_combined_IDs
+    return merge(concatenated_datasets, compat=compat)
 
 
 def _new_tile_id(single_id_ds_pair):
@@ -479,13 +512,25 @@ def _new_tile_id(single_id_ds_pair):
     return tile_id[1:]
 
 
-_CONCAT_DIM_DEFAULT = '__infer_concat_dim__'
+def _concat_along_first_dim(combined_IDs, dim, data_vars='all',
+                                coords='different'):
+    grouped = itertoolz.groupby(_new_tile_id, combined_IDs.items())
+    new_combined_IDs = {}
+
+    # TODO Would there be any point in parallelizing this concatenation step?
+    for new_ID, group in grouped.items():
+        to_concat = [ds for old_ID, ds in group]
+        new_combined_IDs[new_ID] = _auto_concat(to_concat, dim=dim,
+                                                data_vars=data_vars,
+                                                coords=coords)
+    return new_combined_IDs
 
 
 def auto_combine(datasets,
-                 concat_dim=_CONCAT_DIM_DEFAULT,
+                 concat_dims=_CONCAT_DIM_DEFAULT,
                  compat='no_conflicts',
-                 data_vars='all', coords='different'):
+                 data_vars='all', coords='different',
+                 infer_order_from_coords=True):
     """Attempt to auto-magically combine the given datasets into one.
 
     This method attempts to combine a list of datasets into a single entity by
@@ -504,10 +549,10 @@ def auto_combine(datasets,
     ----------
     datasets : sequence of xarray.Dataset
         Dataset objects to merge.
-    concat_dim : str or DataArray or Index, optional
-        Dimension along which to concatenate variables, as used by
+    concat_dims : list of str or DataArray or Index, optional
+        Dimensions along which to concatenate variables, as used by
         :py:func:`xarray.concat`. You only need to provide this argument if
-        the dimension along which you want to concatenate is not a dimension
+        the dimensions along which you want to concatenate is not a dimension
         in the original datasets, e.g., if you want to stack a collection of
         2D arrays along a third dimension.
         By default, xarray attempts to infer this argument by examining
@@ -528,8 +573,14 @@ def auto_combine(datasets,
           of all non-null values.
     data_vars : {'minimal', 'different', 'all' or list of str}, optional
         Details are in the documentation of concat
-    coords : {'minimal', 'different', 'all' o list of str}, optional
+    coords : {'minimal', 'different', 'all' or list of str}, optional
         Details are in the documentation of concat
+    infer_order_from_coords : bool, optional
+        If true attempt to deduce the order in which the datasets should be
+        concatenated from their coordinates. To do this the coordinates should
+        be monotonic along the dimension to be concatenated.
+        If false instead read the order from the structure the datasets are
+        supplied in. This structure should be a nested list of lists.
 
     Returns
     -------
@@ -540,15 +591,27 @@ def auto_combine(datasets,
     concat
     Dataset.merge
     """
-    from toolz import itertoolz
-    if concat_dim is not None:
-        dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
-        grouped = itertoolz.groupby(lambda ds: tuple(sorted(ds.data_vars)),
-                                    datasets).values()
-        concatenated = [_auto_concat(ds, dim=dim,
-                                     data_vars=data_vars, coords=coords)
-                        for ds in grouped]
+    if concat_dims is not None:
+
+        # TODO this could be where we would optionally check alignment, as in #2039
+
+        # Organise datasets in concatentation order in N-D
+        if infer_order_from_coords:
+            # TODO Use coordinates to determine tile_ID for each dataset in N-D
+            # i.e. (shoyer's (1) from discussion in #2159)
+            raise NotImplementedError
+        else:
+            # Determine tile_IDs by structure of input in N-D (i.e. ordering in list-of-lists)
+            concat_dims, combined_ids = _infer_concat_order_from_nested_list(datasets, concat_dims)
+
+        # Check that the combined_ids are sensible
+        _check_shape_tile_ids(combined_ids)
+
+        # Repeatedly concatenate then merge along each dimension
+        combined = _combine_nd(combined_ids, concat_dims, compat=compat,
+                               data_vars=data_vars, coords=coords)
     else:
+        # Case of no concatenation wanted
         concatenated = datasets
-    merged = merge(concatenated, compat=compat)
-    return merged
+        combined = merge(concatenated, compat=compat)
+    return combined

From 845206c96da16e2dd44e04f2b76450904c17965f Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Fri, 9 Nov 2018 18:58:07 +0000
Subject: [PATCH 08/37] All unit tests now passing

---
 xarray/backends/api.py        |   6 +-
 xarray/core/combine.py        |  87 +++++++++++++--------------
 xarray/tests/test_backends.py |   4 +-
 xarray/tests/test_combine.py  | 107 ++++++++++++++++++++++++----------
 4 files changed, 120 insertions(+), 84 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index ca440872d73..d3c5efb6ec2 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -480,7 +480,7 @@ def close(self):
 _CONCAT_DIM_DEFAULT = '__infer_concat_dim__'
 
 
-def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
+def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
                    compat='no_conflicts', preprocess=None, engine=None,
                    lock=None, data_vars='all', coords='different',
                    autoclose=None, parallel=False, **kwargs):
@@ -620,11 +620,11 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
 
     # close datasets in case of a ValueError
     try:
-        if concat_dim is _CONCAT_DIM_DEFAULT:
+        if concat_dims is _CONCAT_DIM_DEFAULT:
             combined = auto_combine(datasets, compat=compat,
                                     data_vars=data_vars, coords=coords)
         else:
-            combined = auto_combine(datasets, concat_dim=concat_dim,
+            combined = auto_combine(datasets, concat_dims=concat_dims,
                                     compat=compat,
                                     data_vars=data_vars, coords=coords)
     except ValueError:
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index b7de0d15e0d..67b514c6e3c 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -372,23 +372,21 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
 
 def _infer_concat_order_from_nested_list(datasets, concat_dims):
 
-    # TODO check that datasets is a list containing multiple elements
-
     combined_ids = _infer_tile_ids_from_nested_list(datasets, [], {})
 
     # Currently if concat_dims is not supplied then _auto_concat attempts to deduce it on every call
-    # TODO would be faster in this case to just work out the concat_dims once here
-    tile_id, ds = combined_ids[0]
+    # TODO might be faster in this case to just work out the concat_dims once here
+    tile_id, ds = list(combined_ids.items())[0]
     n_dims = len(tile_id)
-    if concat_dims is None:
+    if concat_dims is None or concat_dims == _CONCAT_DIM_DEFAULT:
         concat_dims = [_CONCAT_DIM_DEFAULT]*n_dims
     else:
         if len(concat_dims) != n_dims:
-            raise ValueError("concat_dims is of length " + str(len(concat_dims))
+            raise ValueError("concat_dims has length " + str(len(concat_dims))
                              + " but the datasets passed are nested in a " +
                              str(n_dims) + "-dimensional structure")
 
-    return concat_dims, combined_ids
+    return combined_ids, concat_dims
 
 
 def _infer_tile_ids_from_nested_list(entry, current_pos, combined_tile_ids):
@@ -396,8 +394,8 @@ def _infer_tile_ids_from_nested_list(entry, current_pos, combined_tile_ids):
     Given a list of lists (of lists...) of datasets, returns a dictionary
     with the index of each dataset in the nested list structure as the key.
 
-    Recursively traverses the given structure, while keeping track of the current
-    position.
+    Recursively traverses the given structure, while keeping track of the
+    current position.
 
     Parameters
     ----------
@@ -413,17 +411,13 @@ def _infer_tile_ids_from_nested_list(entry, current_pos, combined_tile_ids):
     from .dataset import Dataset
 
     if isinstance(entry, list):
-        # Check if list is redundant
-        if len(entry) == 1:
-            raise TypeError('Redundant list nesting at '
-                            'position ' + str(tuple(current_pos)))
-
-        # Dive down tree
+        # Dive down tree and recursively open the next list
         current_pos.append(0)
         for i, item in enumerate(entry):
             current_pos[-1] = i
-            combined_tile_ids = _infer_tile_ids_from_nested_list(item, current_pos,
-                                                                 combined_tile_ids)
+            combined_tile_ids = _infer_tile_ids_from_nested_list\
+                (item, current_pos, combined_tile_ids)
+
         # Move back up tree
         del current_pos[-1]
         return combined_tile_ids
@@ -435,11 +429,13 @@ def _infer_tile_ids_from_nested_list(entry, current_pos, combined_tile_ids):
 
     else:
         raise TypeError("Element at position " + str(tuple(current_pos)) +
-                        " is neither a list nor an xarray.Dataset")
+                        " is of type " + str(type(entry)) + ", which is "
+                        "neither a list nor an xarray.Dataset")
 
 
 def _check_shape_tile_ids(combined_tile_ids):
     # TODO create custom exception class instead of using asserts?
+    # Is this function even necessary?
 
     tile_ids = combined_tile_ids.keys()
 
@@ -447,12 +443,6 @@ def _check_shape_tile_ids(combined_tile_ids):
     lengths = [len(id) for id in tile_ids]
     assert set(lengths) == {lengths[0]}
 
-    # Check each dimension has indices 0 to n represented with no gaps
-    for dim in range(lengths[0]):
-        indices = [id[dim] for id in tile_ids]
-        assert len(indices) > 1
-        assert sorted(indices) == range(max(indices))
-
     # Check only datasets are contained
     from .dataset import Dataset
     for v in combined_tile_ids.values():
@@ -489,11 +479,16 @@ def _combine_nd(combined_IDs, concat_dims, data_vars='all',
     # Organise by data variables
     grouped_by_data_vars = itertoolz.groupby(_data_vars,
                                              combined_IDs.items()).values()
+
     concatenated_datasets = []
-    for tiled_datasets in grouped_by_data_vars:
-        concatenated_ids = tiled_datasets
+    for tiled_datasets_group in grouped_by_data_vars:
+
+        # Convert list of tuples back into a dictionary
+        concatenated_ids = dict(tiled_datasets_group)
 
         # Perform N-D dimensional concatenation
+        # Each iteration of this loop reduces the length of the tile_IDs tuples
+        # by one. It always removes the first
         for concat_dim in concat_dims:
             dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
 
@@ -501,13 +496,13 @@ def _combine_nd(combined_IDs, concat_dims, data_vars='all',
                                                        dim=dim,
                                                        data_vars=data_vars,
                                                        coords=coords)
-        concatenated_datasets.append(concatenated_ids.values())
-
+        concatenated_datasets = concatenated_datasets \
+                                + list(concatenated_ids.values())
     return merge(concatenated_datasets, compat=compat)
 
 
 def _new_tile_id(single_id_ds_pair):
-    # probably replace with something like lambda x: x[0][1:]
+    # TODO maybe replace with something like lambda x: x[0][1:]?
     tile_id, ds = single_id_ds_pair
     return tile_id[1:]
 
@@ -530,20 +525,12 @@ def auto_combine(datasets,
                  concat_dims=_CONCAT_DIM_DEFAULT,
                  compat='no_conflicts',
                  data_vars='all', coords='different',
-                 infer_order_from_coords=True):
+                 infer_order_from_coords=False):
     """Attempt to auto-magically combine the given datasets into one.
 
-    This method attempts to combine a list of datasets into a single entity by
-    inspecting metadata and using a combination of concat and merge.
-
-    It does not concatenate along more than one dimension or sort data under
-    any circumstances. It does align coordinates, but different variables on
-    datasets can cause it to fail under some scenarios. In complex cases, you
-    may need to clean up your data and use ``concat``/``merge`` explicitly.
-
-    ``auto_combine`` works well if you have N years of data and M data
-    variables, and each combination of a distinct time period and set of data
-    variables is saved its own dataset.
+    This method attempts to combine a list (or nested list of lists) of
+    datasets into a single entity by inspecting metadata and using a
+    combination of concat and merge.
 
     Parameters
     ----------
@@ -552,9 +539,9 @@ def auto_combine(datasets,
     concat_dims : list of str or DataArray or Index, optional
         Dimensions along which to concatenate variables, as used by
         :py:func:`xarray.concat`. You only need to provide this argument if
-        the dimensions along which you want to concatenate is not a dimension
-        in the original datasets, e.g., if you want to stack a collection of
-        2D arrays along a third dimension.
+        any of the dimensions along which you want to concatenate are not a
+        dimension in the original datasets, e.g., if you want to stack a
+        collection of 2D arrays along a third dimension.
         By default, xarray attempts to infer this argument by examining
         component files. Set ``concat_dim=None`` explicitly to disable
         concatenation.
@@ -591,18 +578,24 @@ def auto_combine(datasets,
     concat
     Dataset.merge
     """
+
+    # TODO perform some of the checks from _calc_concat_dim_coord on concat_dims here?
+
     if concat_dims is not None:
 
-        # TODO this could be where we would optionally check alignment, as in #2039
+        # TODO this could be where we would optionally check alignment, as in #2039?
 
         # Organise datasets in concatentation order in N-D
         if infer_order_from_coords:
             # TODO Use coordinates to determine tile_ID for each dataset in N-D
             # i.e. (shoyer's (1) from discussion in #2159)
             raise NotImplementedError
+            # Once this is implemented I think it should be the default
         else:
-            # Determine tile_IDs by structure of input in N-D (i.e. ordering in list-of-lists)
-            concat_dims, combined_ids = _infer_concat_order_from_nested_list(datasets, concat_dims)
+            # Determine tile_IDs by structure of input in N-D
+            # (i.e. ordering in list-of-lists)
+            combined_ids, concat_dims = _infer_concat_order_from_nested_list\
+                (datasets, concat_dims)
 
         # Check that the combined_ids are sensible
         _check_shape_tile_ids(combined_ids)
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index fb9c43c0165..0edc7db4980 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -2234,7 +2234,7 @@ def test_open_mfdataset_concat_dim_none(self):
                 data = Dataset({'x': 0})
                 data.to_netcdf(tmp1)
                 Dataset({'x': np.nan}).to_netcdf(tmp2)
-                with open_mfdataset([tmp1, tmp2], concat_dim=None) as actual:
+                with open_mfdataset([tmp1, tmp2], concat_dims=None) as actual:
                     assert_identical(data, actual)
 
     def test_open_dataset(self):
@@ -2261,7 +2261,7 @@ def test_open_single_dataset(self):
                            {'baz': [100]})
         with create_tmp_file() as tmp:
             original.to_netcdf(tmp)
-            with open_mfdataset([tmp], concat_dim=dim) as actual:
+            with open_mfdataset([tmp], concat_dims=[dim]) as actual:
                 assert_identical(expected, actual)
 
     def test_dask_roundtrip(self):
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index f171f998a7e..8b6912e98d5 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -10,8 +10,9 @@
 from xarray import DataArray, Dataset, Variable, auto_combine, concat
 from xarray.core.pycompat import OrderedDict, iteritems
 from xarray.core.combine import (
-    _new_tile_id, _concat_all_along_first_dim,
-    _infer_tile_ids_from_nested_list, _check_shape_tile_ids, _concat_nd)
+    _new_tile_id, _concat_along_first_dim,
+    _infer_concat_order_from_nested_list, _infer_tile_ids_from_nested_list,
+    _check_shape_tile_ids, _combine_nd)
 
 from . import (
     InaccessibleArray, assert_array_equal, assert_equal, assert_identical,
@@ -308,6 +309,9 @@ def test_auto_combine(self):
         expected = Dataset({'x': [0, 1]})
         assert_identical(expected, actual)
 
+        actual = auto_combine(actual)
+        assert_identical(expected, actual)
+
         actual = auto_combine([actual])
         assert_identical(expected, actual)
 
@@ -354,7 +358,7 @@ def test_auto_combine_previously_failed(self):
         expected = Dataset({'a': (('t', 'x'),
                                   [[np.nan, 2, 3], [1, 2, np.nan]])},
                            {'x': [0, 1, 2]})
-        actual = auto_combine(datasets, concat_dim='t')
+        actual = auto_combine(datasets, concat_dims=['t'])
         assert_identical(expected, actual)
 
     @requires_dask  # only for toolz
@@ -379,14 +383,14 @@ def test_auto_combine_no_concat(self):
         assert_identical(expected, actual)
 
         data = Dataset({'x': 0})
-        actual = auto_combine([data, data, data], concat_dim=None)
+        actual = auto_combine([data, data, data], concat_dims=None)
         assert_identical(data, actual)
 
         # Single object, with a concat_dim explicitly provided
         # Test the issue reported in GH #1988
         objs = [Dataset({'x': 0, 'y': 1})]
         dim = DataArray([100], name='baz', dims='baz')
-        actual = auto_combine(objs, concat_dim=dim)
+        actual = auto_combine(objs, concat_dims=[dim])
         expected = Dataset({'x': ('baz', [0]), 'y': ('baz', [1])},
                            {'baz': [100]})
         assert_identical(expected, actual)
@@ -395,7 +399,7 @@ def test_auto_combine_no_concat(self):
         # expected for non-scalar values, too.
         objs = [Dataset({'x': ('z', [0, 1]), 'y': ('z', [1, 2])})]
         dim = DataArray([100], name='baz', dims='baz')
-        actual = auto_combine(objs, concat_dim=dim)
+        actual = auto_combine(objs, concat_dims=[dim])
         expected = Dataset({'x': (('baz', 'z'), [[0, 1]]),
                             'y': (('baz', 'z'), [[1, 2]])},
                            {'baz': [100]})
@@ -435,20 +439,41 @@ def test_3d(self):
         actual = _infer_tile_ids_from_nested_list(input, [], {})
         assert_combined_tile_ids_equal(expected, actual)
 
-    def test_redundant_nesting_gotcha(self):
+    def test_single_dataset(self):
         ds = create_test_data(0)
-        input = [[ds], [ds]]
+        input = [ds]
 
-        with pytest.raises(TypeError):
-            _infer_tile_ids_from_nested_list(input, [], {})
+        expected = {(0,): ds}
+        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        assert_combined_tile_ids_equal(expected, actual)
 
-    def test_bad_element(self):
+    def test_redundant_nesting(self):
+        ds = create_test_data
+        input = [[ds(0)], [ds(1)]]
+
+        expected = {(0, 0): ds(0), (1, 0): ds(1)}
+        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        assert_combined_tile_ids_equal(expected, actual)
+
+    @pytest.mark.parametrize("bad_element", ['a', 2])
+    def test_bad_element(self, bad_element):
         ds = create_test_data(0)
-        input = [ds, 'bad_element']
-        with pytest.raises(TypeError):
+        input = [ds, bad_element]
+        with raises_regex(TypeError, 'Element at position .* is of type .*, '
+                                     'which is neither a list nor an '
+                                     'xarray.Dataset'):
             _infer_tile_ids_from_nested_list(input, [], {})
 
+    def test_ignore_empty_list(self):
+        ds = create_test_data(0)
+        input = [ds, []]
+        expected = {(0,): ds}
+        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        assert_combined_tile_ids_equal(expected, actual)
+
     def test_ragged_input(self):
+        # Auto_combine won't work on ragged input
+        # but this is just to increase test coverage
         ds = create_test_data
         input = [ds(0), [ds(1), ds(2)]]
 
@@ -456,6 +481,15 @@ def test_ragged_input(self):
         actual = _infer_tile_ids_from_nested_list(input, [], {})
         assert_combined_tile_ids_equal(expected, actual)
 
+    def test_infer_from_datasets(self):
+        ds = create_test_data
+        input = [ds(0), ds(1)]
+
+        expected = {(0,): ds(0), (1,): ds(1)}
+        actual, concat_dims = _infer_concat_order_from_nested_list\
+            (input, ['dim1'])
+        assert_combined_tile_ids_equal(expected, actual)
+
 
 @pytest.fixture(scope='module')
 def create_combined_ids():
@@ -473,10 +507,10 @@ def _create_tile_ids(shape):
     return list(tile_ids)
 
 
-class TestConcatND(object):
-    def test_get_tile_ids(self, create_combined_ids):
+class TestCombineND(object):
+    def test_get_new_tile_ids(self, create_combined_ids):
         shape = (1, 2, 3)
-        combined_ids = _create_combined_ids(shape)
+        combined_ids = create_combined_ids(shape)
 
         for combined, tile_id in zip(combined_ids.items(), _create_tile_ids(shape)):
             expected_new_tile_id = tile_id[1:]
@@ -485,17 +519,17 @@ def test_get_tile_ids(self, create_combined_ids):
     @pytest.mark.parametrize("concat_dim", ['dim1', 'new_dim'])
     def test_concat_once(self, create_combined_ids, concat_dim):
         shape = (2,)
-        combined_ids = _create_combined_ids(shape)
+        combined_ids = create_combined_ids(shape)
         ds = create_test_data
-        result = _concat_all_along_first_dim(combined_ids, dim=concat_dim)
+        result = _concat_along_first_dim(combined_ids, dim=concat_dim)
 
         expected_ds = concat([ds(0), ds(1)], dim=concat_dim)
         assert_combined_tile_ids_equal(result, {(): expected_ds})
 
     def test_concat_twice(self, create_combined_ids):
         shape = (2, 3)
-        combined_ids = _create_combined_ids(shape)
-        result = _concat_nd(combined_ids, concat_dims=['dim1', 'dim2'])
+        combined_ids = create_combined_ids(shape)
+        result = _combine_nd(combined_ids, concat_dims=['dim1', 'dim2'])
 
         ds = create_test_data
         partway1 = concat([ds(0), ds(3)], dim='dim1')
@@ -513,19 +547,28 @@ def test_check_lengths(self):
         with pytest.raises(AssertionError):
             _check_shape_tile_ids(combined_tile_ids)
 
-    def test_check_non_zero_length_along_all_dims(self):
-        ds = create_test_data(0)
-        combined_tile_ids = {(0, 0): ds, (1, 0): ds}
-        with pytest.raises(AssertionError):
-            _check_shape_tile_ids(combined_tile_ids)
-
-    def test_check_linearity(self):
-        ds = create_test_data(0)
-        combined_tile_ids = {(0,): ds, (2,): ds}
-        with pytest.raises(AssertionError):
-            _check_shape_tile_ids(combined_tile_ids)
-
     def test_check_contains_datasets(self):
         combined_tile_ids = {(0,): 'a', (1,): 'b'}
         with pytest.raises(AssertionError):
             _check_shape_tile_ids(combined_tile_ids)
+
+
+class TestAutoCombineND(object):
+    # TODO there should be a lot more tests in here testing different cases
+
+    def test_auto_combine_2d(self):
+        ds = create_test_data
+
+        partway1 = concat([ds(0), ds(3)], dim='dim1')
+        partway2 = concat([ds(1), ds(4)], dim='dim1')
+        partway3 = concat([ds(2), ds(5)], dim='dim1')
+        expected = concat([partway1, partway2, partway3], dim='dim2')
+
+        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]]
+        result = auto_combine(datasets, concat_dims=['dim1', 'dim2'])
+
+        assert_equal(result, expected)
+
+    def test_ragged_input(self):
+        # TODO should throw an informative error if you try this
+        ...

From f4e9aad81f154d5edbc0fc422678d6fa9025c0cf Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sat, 10 Nov 2018 12:53:12 +0000
Subject: [PATCH 09/37] Fixed a failing test which I didn't notice because I
 don't have pseudoNetCDF

---
 xarray/tests/test_backends.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 0edc7db4980..d9109c4b67f 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -2625,7 +2625,7 @@ def test_uamiv_format_mfread(self):
             ['example.uamiv',
              'example.uamiv'],
             engine='pseudonetcdf',
-            concat_dim='TSTEP',
+            concat_dims=['TSTEP'],
             backend_kwargs={'format': 'uamiv'})
 
         data1 = np.arange(20, dtype='f').reshape(1, 1, 4, 5)

From 00004a14304d461c8784fca75a4c848cccbc7011 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Wed, 14 Nov 2018 12:44:00 +0000
Subject: [PATCH 10/37] Began updating open_mfdataset to handle N-D input

---
 xarray/backends/api.py | 44 ++++++++++++++++++++++++++++++++++--------
 xarray/core/combine.py | 11 ++++++-----
 2 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index d3c5efb6ec2..89aa989624b 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -10,7 +10,8 @@
 
 from .. import Dataset, backends, conventions
 from ..core import indexing
-from ..core.combine import auto_combine
+from ..core.combine import (_infer_concat_order_from_positions, _combine_nd,\
+                            _check_shape_tile_ids, merge)
 from ..core.pycompat import basestring, path_type
 from ..core.utils import close_on_error, is_remote_uri, is_grib_path
 from .common import ArrayWriter
@@ -483,6 +484,7 @@ def close(self):
 def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
                    compat='no_conflicts', preprocess=None, engine=None,
                    lock=None, data_vars='all', coords='different',
+                   infer_order_from_coords=False,
                    autoclose=None, parallel=False, **kwargs):
     """Open multiple files as a single dataset.
 
@@ -502,7 +504,7 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
         By default, chunks will be chosen to load entire input files into
         memory at once. This has a major impact on performance: please see the
         full documentation for more details [2].
-    concat_dim : None, str, DataArray or Index, optional
+    concat_dims : None, str, DataArray or Index, optional
         Dimension to concatenate files along. This argument is passed on to
         :py:func:`xarray.auto_combine` along with the dataset objects. You only
         need to provide this argument if the dimension along which you want to
@@ -561,6 +563,12 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
             those corresponding to other dimensions.
           * list of str: The listed coordinate variables will be concatenated,
             in addition the 'minimal' coordinates.
+    infer_order_from_coords : bool, optional
+        If true attempt to deduce the order in which the datasets should be
+        concatenated from their coordinates. To do this the coordinates should
+        be monotonic along the dimension to be concatenated.
+        If false instead read the order from the structure the datasets are
+        supplied in. This structure should be a nested list of lists.
     parallel : bool, optional
         If True, the open and preprocess steps of this function will be
         performed in parallel using ``dask.delayed``. Default is False.
@@ -594,6 +602,11 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
     if not paths:
         raise IOError('no files to open')
 
+    # If infer_order_from_coords=True then this is uneccessary, but quick as
+    # it will just loop over one list
+    combined_ids_paths, concat_dims = _infer_concat_order_from_positions(paths, concat_dims)  # Use an OrderedDict?
+    ids, paths = list(combined_ids_paths.keys()), list(combined_ids_paths.values())  # Is this in order??
+
     open_kwargs = dict(engine=engine, chunks=chunks or {}, lock=lock,
                        autoclose=autoclose, **kwargs)
 
@@ -620,13 +633,28 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
 
     # close datasets in case of a ValueError
     try:
-        if concat_dims is _CONCAT_DIM_DEFAULT:
-            combined = auto_combine(datasets, compat=compat,
-                                    data_vars=data_vars, coords=coords)
+        # TODO refactor this section to avoid duplicating any logic with auto_combine
+        if concat_dims is not None:
+            # Arrange datasets for concatenation
+            if infer_order_from_coords:
+                # Use coordinates to determine tile_ID for each dataset in N-D
+                # Ignore how they were ordered previously
+                raise NotImplementedError
+                # combined_ids, concat_dims = _infer_tile_ids_from_coords(datasets, concat_dims)
+            else:
+                # Already sorted so just use the ids already determined from the input shape
+                combined_ids = dict(zip(ids, datasets))
+
+            # Check that the combined_ids are sensible
+            _check_shape_tile_ids(combined_ids)
+
+            # Repeatedly concatenate then merge along each dimension
+            combined = _combine_nd(combined_ids, concat_dims, compat=compat,
+                                   data_vars=data_vars, coords=coords)
         else:
-            combined = auto_combine(datasets, concat_dims=concat_dims,
-                                    compat=compat,
-                                    data_vars=data_vars, coords=coords)
+            # Case of no concatenation wanted
+            concatenated = datasets
+            combined = merge(concatenated, compat=compat)
     except ValueError:
         for ds in datasets:
             ds.close()
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 67b514c6e3c..a05d0ac31ee 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -370,7 +370,7 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
 _CONCAT_DIM_DEFAULT = '__infer_concat_dim__'
 
 
-def _infer_concat_order_from_nested_list(datasets, concat_dims):
+def _infer_concat_order_from_positions(datasets, concat_dims):
 
     combined_ids = _infer_tile_ids_from_nested_list(datasets, [], {})
 
@@ -585,24 +585,25 @@ def auto_combine(datasets,
 
         # TODO this could be where we would optionally check alignment, as in #2039?
 
-        # Organise datasets in concatentation order in N-D
+        # Arrange datasets for concatenation
         if infer_order_from_coords:
             # TODO Use coordinates to determine tile_ID for each dataset in N-D
             # i.e. (shoyer's (1) from discussion in #2159)
             raise NotImplementedError
             # Once this is implemented I think it should be the default
+            # combined_ids, concat_dims = _infer_tile_ids_from_coords(datasets, concat_dims)
         else:
             # Determine tile_IDs by structure of input in N-D
             # (i.e. ordering in list-of-lists)
-            combined_ids, concat_dims = _infer_concat_order_from_nested_list\
-                (datasets, concat_dims)
+            combined_ids, concat_dims = _infer_concat_order_from_positions \
+                        (datasets, concat_dims)
 
         # Check that the combined_ids are sensible
         _check_shape_tile_ids(combined_ids)
 
         # Repeatedly concatenate then merge along each dimension
         combined = _combine_nd(combined_ids, concat_dims, compat=compat,
-                               data_vars=data_vars, coords=coords)
+                                    data_vars=data_vars, coords=coords)
     else:
         # Case of no concatenation wanted
         concatenated = datasets

From b41e37494dea8224c544b04533222c9bfdc7f6a6 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Wed, 14 Nov 2018 13:58:45 +0000
Subject: [PATCH 11/37] Refactored to remove duplicate logic in open_mfdataset
 & auto_combine

---
 xarray/backends/api.py | 42 +++++++++---------------
 xarray/core/combine.py | 73 +++++++++++++++++++++++++-----------------
 2 files changed, 59 insertions(+), 56 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 89aa989624b..672e43e9e58 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -10,8 +10,7 @@
 
 from .. import Dataset, backends, conventions
 from ..core import indexing
-from ..core.combine import (_infer_concat_order_from_positions, _combine_nd,\
-                            _check_shape_tile_ids, merge)
+from ..core.combine import _infer_concat_order_from_positions, _auto_combine
 from ..core.pycompat import basestring, path_type
 from ..core.utils import close_on_error, is_remote_uri, is_grib_path
 from .common import ArrayWriter
@@ -602,8 +601,10 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
     if not paths:
         raise IOError('no files to open')
 
-    # If infer_order_from_coords=True then this is uneccessary, but quick as
-    # it will just loop over one list
+    # If infer_order_from_coords=True then this is uneccessary, but that's fine
+    # as it should be quick - in this case it will just loop over one list
+    # If infer_order_from_coords=False then this creates a flat list which is
+    # easier to iterate over, while saving the originally-supplied structure
     combined_ids_paths, concat_dims = _infer_concat_order_from_positions(paths, concat_dims)  # Use an OrderedDict?
     ids, paths = list(combined_ids_paths.keys()), list(combined_ids_paths.values())  # Is this in order??
 
@@ -631,30 +632,17 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
         # the underlying datasets will still be stored as dask arrays
         datasets, file_objs = dask.compute(datasets, file_objs)
 
-    # close datasets in case of a ValueError
+    # Close datasets in case of a ValueError
     try:
-        # TODO refactor this section to avoid duplicating any logic with auto_combine
-        if concat_dims is not None:
-            # Arrange datasets for concatenation
-            if infer_order_from_coords:
-                # Use coordinates to determine tile_ID for each dataset in N-D
-                # Ignore how they were ordered previously
-                raise NotImplementedError
-                # combined_ids, concat_dims = _infer_tile_ids_from_coords(datasets, concat_dims)
-            else:
-                # Already sorted so just use the ids already determined from the input shape
-                combined_ids = dict(zip(ids, datasets))
-
-            # Check that the combined_ids are sensible
-            _check_shape_tile_ids(combined_ids)
-
-            # Repeatedly concatenate then merge along each dimension
-            combined = _combine_nd(combined_ids, concat_dims, compat=compat,
-                                   data_vars=data_vars, coords=coords)
-        else:
-            # Case of no concatenation wanted
-            concatenated = datasets
-            combined = merge(concatenated, compat=compat)
+        if infer_order_from_coords:
+            # Discard ordering because it should be redone from coordinates
+            ids = False
+
+        combined = _auto_combine(datasets, concat_dims=concat_dims,
+                                 compat=compat,
+                                 data_vars=data_vars, coords=coords,
+                                 infer_order_from_coords=infer_order_from_coords,
+                                 ids=ids)
     except ValueError:
         for ds in datasets:
             ds.close()
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index a05d0ac31ee..caa6d805232 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -512,7 +512,6 @@ def _concat_along_first_dim(combined_IDs, dim, data_vars='all',
     grouped = itertoolz.groupby(_new_tile_id, combined_IDs.items())
     new_combined_IDs = {}
 
-    # TODO Would there be any point in parallelizing this concatenation step?
     for new_ID, group in grouped.items():
         to_concat = [ds for old_ID, ds in group]
         new_combined_IDs[new_ID] = _auto_concat(to_concat, dim=dim,
@@ -521,6 +520,45 @@ def _concat_along_first_dim(combined_IDs, dim, data_vars='all',
     return new_combined_IDs
 
 
+def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
+                  infer_order_from_coords, ids):
+    """
+    This function decides if any concatenation is necessary, and if so it calls
+    the logic to decide their concatenation order before concatenating.
+    """
+
+    if concat_dims is not None:
+        # Arrange datasets for concatenation
+        if infer_order_from_coords:
+            # Use coordinates to determine tile_ID for each dataset in N-D
+            # Ignore how they were ordered previously
+            raise NotImplementedError
+            # combined_ids, concat_dims = _infer_tile_ids_from_coords(datasets, concat_dims)
+        else:
+            # Use information from the shape of the user input
+            if not ids:
+                # Determine tile_IDs by structure of input in N-D
+                # (i.e. ordering in list-of-lists)
+                combined_ids, concat_dims = _infer_concat_order_from_positions\
+                                                        (datasets, concat_dims)
+            else:
+                # Already sorted so just use the ids already passed
+                combined_ids = dict(zip(ids, datasets))
+
+        # Check that the combined_ids are sensible
+        _check_shape_tile_ids(combined_ids)
+
+        # Repeatedly concatenate then merge along each dimension
+        combined = _combine_nd(combined_ids, concat_dims, compat=compat,
+                               data_vars=data_vars, coords=coords)
+    else:
+        # Case of no concatenation wanted
+        concatenated = datasets
+        combined = merge(concatenated, compat=compat)
+
+    return combined
+
+
 def auto_combine(datasets,
                  concat_dims=_CONCAT_DIM_DEFAULT,
                  compat='no_conflicts',
@@ -581,31 +619,8 @@ def auto_combine(datasets,
 
     # TODO perform some of the checks from _calc_concat_dim_coord on concat_dims here?
 
-    if concat_dims is not None:
-
-        # TODO this could be where we would optionally check alignment, as in #2039?
-
-        # Arrange datasets for concatenation
-        if infer_order_from_coords:
-            # TODO Use coordinates to determine tile_ID for each dataset in N-D
-            # i.e. (shoyer's (1) from discussion in #2159)
-            raise NotImplementedError
-            # Once this is implemented I think it should be the default
-            # combined_ids, concat_dims = _infer_tile_ids_from_coords(datasets, concat_dims)
-        else:
-            # Determine tile_IDs by structure of input in N-D
-            # (i.e. ordering in list-of-lists)
-            combined_ids, concat_dims = _infer_concat_order_from_positions \
-                        (datasets, concat_dims)
-
-        # Check that the combined_ids are sensible
-        _check_shape_tile_ids(combined_ids)
-
-        # Repeatedly concatenate then merge along each dimension
-        combined = _combine_nd(combined_ids, concat_dims, compat=compat,
-                                    data_vars=data_vars, coords=coords)
-    else:
-        # Case of no concatenation wanted
-        concatenated = datasets
-        combined = merge(concatenated, compat=compat)
-    return combined
+    # The IDs argument tells _auto_combine that the datasets are not sorted
+    return _auto_combine(datasets, concat_dims=concat_dims, compat=compat,
+                         data_vars=data_vars, coords=coords,
+                         infer_order_from_coords=infer_order_from_coords,
+                         ids=False)
\ No newline at end of file

From 8672a79833236a893aba85771c96b02db06008ef Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Wed, 14 Nov 2018 16:32:28 +0000
Subject: [PATCH 12/37] Implemented Shoyers suggestion in #2553 to rewrite the
 recursive nested list traverser as an iterator

---
 xarray/core/combine.py       | 46 +++++++++++++-----------------------
 xarray/tests/test_combine.py | 27 +++++++--------------
 2 files changed, 25 insertions(+), 48 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index caa6d805232..66ece818470 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -372,7 +372,7 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
 
 def _infer_concat_order_from_positions(datasets, concat_dims):
 
-    combined_ids = _infer_tile_ids_from_nested_list(datasets, [], {})
+    combined_ids = dict(_infer_tile_ids_from_nested_list(datasets, ()))
 
     # Currently if concat_dims is not supplied then _auto_concat attempts to deduce it on every call
     # TODO might be faster in this case to just work out the concat_dims once here
@@ -389,48 +389,34 @@ def _infer_concat_order_from_positions(datasets, concat_dims):
     return combined_ids, concat_dims
 
 
-def _infer_tile_ids_from_nested_list(entry, current_pos, combined_tile_ids):
+def _infer_tile_ids_from_nested_list(entry, current_pos):
     """
-    Given a list of lists (of lists...) of datasets, returns a dictionary
-    with the index of each dataset in the nested list structure as the key.
+    Given a list of lists (of lists...) of objects, returns a iterator
+    which returns a tuple containing the index of each object in the nested
+    list structure as the key, and the object. This can then be called by the
+    dict constructor to create a dictionary of the objects organised byt their
+    position in the original nested list.
 
     Recursively traverses the given structure, while keeping track of the
-    current position.
+    current position. Should work for any type of object which isn't a list.
 
     Parameters
     ----------
-    entry : list[list[xarray.Dataset, xarray.Dataset, ...]]
-        List of lists of arbitrary depth, containing datasets in the order they
-        are to be concatenated.
+    entry : list[list[obj, obj, ...]]
+        List of lists of arbitrary depth, containing objects in the order
+        they are to be concatenated.
 
     Returns
     -------
-    combined_tile_ids : dict[tuple(int, ...), xarray.Dataset]
+    combined_tile_ids : dict[tuple(int, ...), obj]
     """
 
-    from .dataset import Dataset
-
     if isinstance(entry, list):
-        # Dive down tree and recursively open the next list
-        current_pos.append(0)
         for i, item in enumerate(entry):
-            current_pos[-1] = i
-            combined_tile_ids = _infer_tile_ids_from_nested_list\
-                (item, current_pos, combined_tile_ids)
-
-        # Move back up tree
-        del current_pos[-1]
-        return combined_tile_ids
-
-    elif isinstance(entry, Dataset):
-        # Termination condition
-        combined_tile_ids[tuple(current_pos)] = entry
-        return combined_tile_ids
-
+            for result in _infer_tile_ids_from_nested_list(item, current_pos + (i,)):
+                yield result
     else:
-        raise TypeError("Element at position " + str(tuple(current_pos)) +
-                        " is of type " + str(type(entry)) + ", which is "
-                        "neither a list nor an xarray.Dataset")
+        yield current_pos, entry
 
 
 def _check_shape_tile_ids(combined_tile_ids):
@@ -619,7 +605,7 @@ def auto_combine(datasets,
 
     # TODO perform some of the checks from _calc_concat_dim_coord on concat_dims here?
 
-    # The IDs argument tells _auto_combine that the datasets are not sorted
+    # The IDs argument tells _auto_combine that the datasets are not yet sorted
     return _auto_combine(datasets, concat_dims=concat_dims, compat=compat,
                          data_vars=data_vars, coords=coords,
                          infer_order_from_coords=infer_order_from_coords,
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 8b6912e98d5..0efeaeec30b 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -11,7 +11,7 @@
 from xarray.core.pycompat import OrderedDict, iteritems
 from xarray.core.combine import (
     _new_tile_id, _concat_along_first_dim,
-    _infer_concat_order_from_nested_list, _infer_tile_ids_from_nested_list,
+    _infer_concat_order_from_positions, _infer_tile_ids_from_nested_list,
     _check_shape_tile_ids, _combine_nd)
 
 from . import (
@@ -412,7 +412,7 @@ def test_1d(self):
         input = [ds(0), ds(1)]
 
         expected = {(0,): ds(0), (1,): ds(1)}
-        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        actual = dict(_infer_tile_ids_from_nested_list(input, ()))
         assert_combined_tile_ids_equal(expected, actual)
 
     def test_2d(self):
@@ -422,7 +422,7 @@ def test_2d(self):
         expected = {(0, 0): ds(0), (0, 1): ds(1),
                     (1, 0): ds(2), (1, 1): ds(3),
                     (2, 0): ds(4), (2, 1): ds(5)}
-        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        actual = dict(_infer_tile_ids_from_nested_list(input, ()))
         assert_combined_tile_ids_equal(expected, actual)
 
     def test_3d(self):
@@ -436,7 +436,7 @@ def test_3d(self):
                     (1, 0, 0): ds(6), (1, 0, 1): ds(7),
                     (1, 1, 0): ds(8), (1, 1, 1): ds(9),
                     (1, 2, 0): ds(10), (1, 2, 1): ds(11)}
-        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        actual = dict(_infer_tile_ids_from_nested_list(input, ()))
         assert_combined_tile_ids_equal(expected, actual)
 
     def test_single_dataset(self):
@@ -444,7 +444,7 @@ def test_single_dataset(self):
         input = [ds]
 
         expected = {(0,): ds}
-        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        actual = dict(_infer_tile_ids_from_nested_list(input, ()))
         assert_combined_tile_ids_equal(expected, actual)
 
     def test_redundant_nesting(self):
@@ -452,23 +452,14 @@ def test_redundant_nesting(self):
         input = [[ds(0)], [ds(1)]]
 
         expected = {(0, 0): ds(0), (1, 0): ds(1)}
-        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        actual = dict(_infer_tile_ids_from_nested_list(input, ()))
         assert_combined_tile_ids_equal(expected, actual)
 
-    @pytest.mark.parametrize("bad_element", ['a', 2])
-    def test_bad_element(self, bad_element):
-        ds = create_test_data(0)
-        input = [ds, bad_element]
-        with raises_regex(TypeError, 'Element at position .* is of type .*, '
-                                     'which is neither a list nor an '
-                                     'xarray.Dataset'):
-            _infer_tile_ids_from_nested_list(input, [], {})
-
     def test_ignore_empty_list(self):
         ds = create_test_data(0)
         input = [ds, []]
         expected = {(0,): ds}
-        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        actual = dict(_infer_tile_ids_from_nested_list(input, ()))
         assert_combined_tile_ids_equal(expected, actual)
 
     def test_ragged_input(self):
@@ -478,7 +469,7 @@ def test_ragged_input(self):
         input = [ds(0), [ds(1), ds(2)]]
 
         expected = {(0,): ds(0), (1, 0): ds(1), (1, 1): ds(2)}
-        actual = _infer_tile_ids_from_nested_list(input, [], {})
+        actual = dict(_infer_tile_ids_from_nested_list(input, ()))
         assert_combined_tile_ids_equal(expected, actual)
 
     def test_infer_from_datasets(self):
@@ -486,7 +477,7 @@ def test_infer_from_datasets(self):
         input = [ds(0), ds(1)]
 
         expected = {(0,): ds(0), (1,): ds(1)}
-        actual, concat_dims = _infer_concat_order_from_nested_list\
+        actual, concat_dims = _infer_concat_order_from_positions\
             (input, ['dim1'])
         assert_combined_tile_ids_equal(expected, actual)
 

From 4f56b240216849044d8d68c17e0ce88256393017 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Wed, 14 Nov 2018 16:33:08 +0000
Subject: [PATCH 13/37] --amend

---
 xarray/core/combine.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 66ece818470..d724a8dfdce 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -413,7 +413,8 @@ def _infer_tile_ids_from_nested_list(entry, current_pos):
 
     if isinstance(entry, list):
         for i, item in enumerate(entry):
-            for result in _infer_tile_ids_from_nested_list(item, current_pos + (i,)):
+            for result in _infer_tile_ids_from_nested_list(item,
+                                                           current_pos + (i,)):
                 yield result
     else:
         yield current_pos, entry

From 4cfaf2e1fb02dbb390cf64bbdfb37fca562afab9 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Wed, 14 Nov 2018 19:38:26 +0000
Subject: [PATCH 14/37] Now raises ValueError if input not ordered correctly
 before concatenation

---
 xarray/core/combine.py       | 59 +++++++++++++++++++++++++-----------
 xarray/tests/test_combine.py |  8 ++---
 2 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index d724a8dfdce..c0a264f6335 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -374,8 +374,9 @@ def _infer_concat_order_from_positions(datasets, concat_dims):
 
     combined_ids = dict(_infer_tile_ids_from_nested_list(datasets, ()))
 
-    # Currently if concat_dims is not supplied then _auto_concat attempts to deduce it on every call
-    # TODO might be faster in this case to just work out the concat_dims once here
+    # Currently if concat_dims is not supplied then _auto_concat attempts to
+    # deduce it on every call
+    # TODO might be faster in this case to just work out concat_dims once here
     tile_id, ds = list(combined_ids.items())[0]
     n_dims = len(tile_id)
     if concat_dims is None or concat_dims == _CONCAT_DIM_DEFAULT:
@@ -420,20 +421,39 @@ def _infer_tile_ids_from_nested_list(entry, current_pos):
         yield current_pos, entry
 
 
-def _check_shape_tile_ids(combined_tile_ids):
-    # TODO create custom exception class instead of using asserts?
-    # Is this function even necessary?
-
+def _check_shape_tile_ids(combined_tile_ids, contains='datasets'):
     tile_ids = combined_tile_ids.keys()
 
-    # Check all tuples are the same length
-    lengths = [len(id) for id in tile_ids]
-    assert set(lengths) == {lengths[0]}
+    # TODO cover all of these with separate unit tests
 
-    # Check only datasets are contained
-    from .dataset import Dataset
-    for v in combined_tile_ids.values():
-        assert isinstance(v, Dataset)
+    # Check all tuples are the same length
+    # i.e. check that all lists are nested to the same depth
+    nesting_depths = [len(id) for id in tile_ids]
+    if not set(nesting_depths) == {nesting_depths[0]}:
+        raise ValueError("The supplied objects do not form a hypercube because"
+                         " sub-lists do not have consistent depths")
+
+    # Check objects form a hypercube
+    # i.e. check all lists along one dimension are same length, monotonically-
+    # increasing with no repetitions
+    for dim in range(nesting_depths[0]):
+        try:
+            indices_along_dim = [id[dim] for id in tile_ids]
+        except IndexError:
+            raise ValueError("The supplied objects do not form a hypercube "
+                             "because sub-lists do not have consistent "
+                             "lengths along dimension {}".format(str(dim)))
+
+        # TODO work out if this actually means something is wrong
+        if not set(indices_along_dim) == indices_along_dim:
+            raise ValueError("The supplied objects do not form a hypercube "
+                             "because there are repeated concatenation "
+                             "positions along concatenation dimension "
+                             "{}".format(str(dim)))
+
+        if not sorted(indices_along_dim) == indices_along_dim:
+            raise ValueError("The supplied objects have not been successfully "
+                             "ordered along dimension {}".format(str(dim)))
 
 
 def _data_vars(combined_id):
@@ -473,6 +493,7 @@ def _combine_nd(combined_IDs, concat_dims, data_vars='all',
         # Convert list of tuples back into a dictionary
         concatenated_ids = dict(tiled_datasets_group)
 
+        # TODO refactor this logic, possibly using method in np.blocks
         # Perform N-D dimensional concatenation
         # Each iteration of this loop reduces the length of the tile_IDs tuples
         # by one. It always removes the first
@@ -517,10 +538,12 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
     if concat_dims is not None:
         # Arrange datasets for concatenation
         if infer_order_from_coords:
-            # Use coordinates to determine tile_ID for each dataset in N-D
-            # Ignore how they were ordered previously
             raise NotImplementedError
-            # combined_ids, concat_dims = _infer_tile_ids_from_coords(datasets, concat_dims)
+            # TODO Use coordinates to determine tile_ID for each dataset in N-D
+            # Ignore how they were ordered previously
+            # Shoould look like
+            # combined_ids, concat_dims = _infer_tile_ids_from_coords(datasets,
+            # concat_dims)
         else:
             # Use information from the shape of the user input
             if not ids:
@@ -604,10 +627,10 @@ def auto_combine(datasets,
     Dataset.merge
     """
 
-    # TODO perform some of the checks from _calc_concat_dim_coord on concat_dims here?
+    # TODO do some of _calc_concat_dim_coord's checks on concat_dims here?
 
     # The IDs argument tells _auto_combine that the datasets are not yet sorted
     return _auto_combine(datasets, concat_dims=concat_dims, compat=compat,
                          data_vars=data_vars, coords=coords,
                          infer_order_from_coords=infer_order_from_coords,
-                         ids=False)
\ No newline at end of file
+                         ids=False)
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 0efeaeec30b..6aff75aefd4 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -532,15 +532,11 @@ def test_concat_twice(self, create_combined_ids):
 
 
 class TestCheckShapeTileIDs(object):
+    # TODO test all types of ValueErrors from _check_shape_tile_id
     def test_check_lengths(self):
         ds = create_test_data(0)
         combined_tile_ids = {(0,): ds, (0, 1): ds}
-        with pytest.raises(AssertionError):
-            _check_shape_tile_ids(combined_tile_ids)
-
-    def test_check_contains_datasets(self):
-        combined_tile_ids = {(0,): 'a', (1,): 'b'}
-        with pytest.raises(AssertionError):
+        with pytest.raises(ValueError):
             _check_shape_tile_ids(combined_tile_ids)
 
 

From 9fd1413692cec1cfd46c3898c413b0c16c822b8a Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Thu, 22 Nov 2018 10:20:45 +0000
Subject: [PATCH 15/37] Added some more prototype tests defining desired
 behaviour more clearly

---
 xarray/tests/test_combine.py | 43 +++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 6aff75aefd4..e129ccb041f 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -462,7 +462,7 @@ def test_ignore_empty_list(self):
         actual = dict(_infer_tile_ids_from_nested_list(input, ()))
         assert_combined_tile_ids_equal(expected, actual)
 
-    def test_ragged_input(self):
+    def test_uneven_depth_input(self):
         # Auto_combine won't work on ragged input
         # but this is just to increase test coverage
         ds = create_test_data
@@ -472,6 +472,17 @@ def test_ragged_input(self):
         actual = dict(_infer_tile_ids_from_nested_list(input, ()))
         assert_combined_tile_ids_equal(expected, actual)
 
+    def test_uneven_length_input(self):
+        # Auto_combine won't work on ragged input
+        # but this is just to increase test coverage
+        ds = create_test_data
+        input = [[ds(0)], [ds(1), ds(2)]]
+
+        expected = {(0, 0): ds(0), (1, 0): ds(1), (1, 1): ds(2)}
+        actual = dict(_infer_tile_ids_from_nested_list(input, ()))
+        print(actual)
+        assert_combined_tile_ids_equal(expected, actual)
+
     def test_infer_from_datasets(self):
         ds = create_test_data
         input = [ds(0), ds(1)]
@@ -533,10 +544,20 @@ def test_concat_twice(self, create_combined_ids):
 
 class TestCheckShapeTileIDs(object):
     # TODO test all types of ValueErrors from _check_shape_tile_id
-    def test_check_lengths(self):
+    def test_check_depths(self):
         ds = create_test_data(0)
         combined_tile_ids = {(0,): ds, (0, 1): ds}
-        with pytest.raises(ValueError):
+        with raises_regex(ValueError, 'sub-lists do not have '
+                                      'consistent depths'):
+            _check_shape_tile_ids(combined_tile_ids)
+
+    def test_check_lengths(self):
+        ds = create_test_data(0)
+        combined_tile_ids = {(0, 0): ds, (0, 1): ds,
+                             (1, 0): ds, (1, 1): ds,
+                             (0, 1): ds}
+        with raises_regex(ValueError, 'sub-lists do not have '
+                                      'consistent lengths'):
             _check_shape_tile_ids(combined_tile_ids)
 
 
@@ -556,6 +577,22 @@ def test_auto_combine_2d(self):
 
         assert_equal(result, expected)
 
+    @pytest.mark.skip
     def test_ragged_input(self):
         # TODO should throw an informative error if you try this
         ...
+
+    def test_combine_redundant_nesting(self):
+        objs = [[Dataset({'x': [0]}), Dataset({'x': [1]})]]
+        actual = auto_combine(objs, concat_dims=[None, 'x'])
+        expected = Dataset({'x': [0, 1]})
+        assert_identical(expected, actual)
+
+        objs = [[Dataset({'x': [0]})], [Dataset({'x': [1]})]]
+        actual = auto_combine(objs, concat_dims=['x', None])
+        expected = Dataset({'x': [0, 1]})
+        assert_identical(expected, actual)
+
+    @pytest.mark.skip
+    def test_mixed_default_concat_dims(self):
+        ...

From 8ad01211609005e455e8f32149578c133ba06f74 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sat, 24 Nov 2018 13:27:12 +0000
Subject: [PATCH 16/37] Now raises informative errors on invalid forms of input

---
 xarray/core/combine.py       | 22 +++++-----------------
 xarray/tests/test_combine.py | 30 +++++++++++++++++-------------
 2 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index c0a264f6335..c829d570124 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -2,6 +2,7 @@
 
 import warnings
 import toolz.itertoolz as itertoolz
+from collections import Counter
 
 import pandas as pd
 
@@ -433,28 +434,15 @@ def _check_shape_tile_ids(combined_tile_ids, contains='datasets'):
         raise ValueError("The supplied objects do not form a hypercube because"
                          " sub-lists do not have consistent depths")
 
-    # Check objects form a hypercube
-    # i.e. check all lists along one dimension are same length, monotonically-
-    # increasing with no repetitions
+    # Check all lists along one dimension are same length
     for dim in range(nesting_depths[0]):
-        try:
-            indices_along_dim = [id[dim] for id in tile_ids]
-        except IndexError:
+        indices_along_dim = [id[dim] for id in tile_ids]
+        occurrences = Counter(indices_along_dim)
+        if len(set(occurrences.values())) != 1:
             raise ValueError("The supplied objects do not form a hypercube "
                              "because sub-lists do not have consistent "
                              "lengths along dimension {}".format(str(dim)))
 
-        # TODO work out if this actually means something is wrong
-        if not set(indices_along_dim) == indices_along_dim:
-            raise ValueError("The supplied objects do not form a hypercube "
-                             "because there are repeated concatenation "
-                             "positions along concatenation dimension "
-                             "{}".format(str(dim)))
-
-        if not sorted(indices_along_dim) == indices_along_dim:
-            raise ValueError("The supplied objects have not been successfully "
-                             "ordered along dimension {}".format(str(dim)))
-
 
 def _data_vars(combined_id):
     id, ds = combined_id
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index e129ccb041f..3fa9650c2ca 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -406,6 +406,8 @@ def test_auto_combine_no_concat(self):
         assert_identical(expected, actual)
 
 
+# TODO should we use @requires_dask? only for toolz?
+
 class TestTileIDsFromNestedList(object):
     def test_1d(self):
         ds = create_test_data
@@ -543,7 +545,6 @@ def test_concat_twice(self, create_combined_ids):
 
 
 class TestCheckShapeTileIDs(object):
-    # TODO test all types of ValueErrors from _check_shape_tile_id
     def test_check_depths(self):
         ds = create_test_data(0)
         combined_tile_ids = {(0,): ds, (0, 1): ds}
@@ -553,9 +554,8 @@ def test_check_depths(self):
 
     def test_check_lengths(self):
         ds = create_test_data(0)
-        combined_tile_ids = {(0, 0): ds, (0, 1): ds,
-                             (1, 0): ds, (1, 1): ds,
-                             (0, 1): ds}
+        combined_tile_ids = {(0, 0): ds, (0, 1): ds , (0, 2): ds,
+                             (1, 0): ds, (1, 1): ds}
         with raises_regex(ValueError, 'sub-lists do not have '
                                       'consistent lengths'):
             _check_shape_tile_ids(combined_tile_ids)
@@ -577,12 +577,20 @@ def test_auto_combine_2d(self):
 
         assert_equal(result, expected)
 
-    @pytest.mark.skip
-    def test_ragged_input(self):
-        # TODO should throw an informative error if you try this
-        ...
+    def test_invalid_hypercube_input(self):
+        ds = create_test_data
+
+        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4)]]
+        with raises_regex(ValueError, 'sub-lists do not have '
+                                      'consistent lengths'):
+            auto_combine(datasets, concat_dims=['dim1', 'dim2'])
 
-    def test_combine_redundant_nesting(self):
+        datasets = [[ds(0), ds(1)], [[ds(3), ds(4)]]]
+        with raises_regex(ValueError, 'sub-lists do not have '
+                                      'consistent depths'):
+            auto_combine(datasets, concat_dims=['dim1', 'dim2'])
+
+    def test_combine_concat_one_dim_merge_another(self):
         objs = [[Dataset({'x': [0]}), Dataset({'x': [1]})]]
         actual = auto_combine(objs, concat_dims=[None, 'x'])
         expected = Dataset({'x': [0, 1]})
@@ -592,7 +600,3 @@ def test_combine_redundant_nesting(self):
         actual = auto_combine(objs, concat_dims=['x', None])
         expected = Dataset({'x': [0, 1]})
         assert_identical(expected, actual)
-
-    @pytest.mark.skip
-    def test_mixed_default_concat_dims(self):
-        ...

From 4b2c5443a637ce5ca1fe1f756921d0898b91cda4 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sun, 25 Nov 2018 16:59:42 +0000
Subject: [PATCH 17/37] Refactoring to alos merge along each dimension

---
 xarray/backends/api.py       |   8 ++-
 xarray/core/combine.py       | 124 ++++++++++++++++++++++++-----------
 xarray/tests/test_combine.py |  19 +++++-
 3 files changed, 107 insertions(+), 44 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 672e43e9e58..cc5aad2bfaf 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -601,12 +601,14 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
     if not paths:
         raise IOError('no files to open')
 
-    # If infer_order_from_coords=True then this is uneccessary, but that's fine
+    # If infer_order_from_coords=True then this is unnecessary, but that's fine
     # as it should be quick - in this case it will just loop over one list
     # If infer_order_from_coords=False then this creates a flat list which is
     # easier to iterate over, while saving the originally-supplied structure
-    combined_ids_paths, concat_dims = _infer_concat_order_from_positions(paths, concat_dims)  # Use an OrderedDict?
-    ids, paths = list(combined_ids_paths.keys()), list(combined_ids_paths.values())  # Is this in order??
+    combined_ids_paths, concat_dims = _infer_concat_order_from_positions\
+        (paths, concat_dims)
+    ids, paths = list(combined_ids_paths.keys()), \
+                 list(combined_ids_paths.values())
 
     open_kwargs = dict(engine=engine, chunks=chunks or {}, lock=lock,
                        autoclose=autoclose, **kwargs)
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index c829d570124..e0e18c8149d 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -380,8 +380,11 @@ def _infer_concat_order_from_positions(datasets, concat_dims):
     # TODO might be faster in this case to just work out concat_dims once here
     tile_id, ds = list(combined_ids.items())[0]
     n_dims = len(tile_id)
-    if concat_dims is None or concat_dims == _CONCAT_DIM_DEFAULT:
+    if concat_dims == _CONCAT_DIM_DEFAULT:
         concat_dims = [_CONCAT_DIM_DEFAULT]*n_dims
+    elif concat_dims is None:
+        # TODO should this be a list of n Nones?
+        pass
     else:
         if len(concat_dims) != n_dims:
             raise ValueError("concat_dims has length " + str(len(concat_dims))
@@ -425,8 +428,6 @@ def _infer_tile_ids_from_nested_list(entry, current_pos):
 def _check_shape_tile_ids(combined_tile_ids, contains='datasets'):
     tile_ids = combined_tile_ids.keys()
 
-    # TODO cover all of these with separate unit tests
-
     # Check all tuples are the same length
     # i.e. check that all lists are nested to the same depth
     nesting_depths = [len(id) for id in tile_ids]
@@ -449,7 +450,7 @@ def _data_vars(combined_id):
     return tuple(sorted(ds.data_vars))
 
 
-def _combine_nd(combined_IDs, concat_dims, data_vars='all',
+def _combine_nd(combined_ids, concat_dims, data_vars='all',
                 coords='different', compat='no_conflicts'):
     """
     Concatenates and merges an N-dimensional structure of datasets.
@@ -459,7 +460,7 @@ def _combine_nd(combined_IDs, concat_dims, data_vars='all',
 
     Parameters
     ----------
-    combined_IDs : Dict[Tuple[int, ...]], xarray.Dataset]
+    combined_ids : Dict[Tuple[int, ...]], xarray.Dataset]
         Structure containing all datasets to be concatenated with "tile_IDs" as
         keys, which specify position within the desired final combined result.
     concat_dims : sequence of str
@@ -471,49 +472,94 @@ def _combine_nd(combined_IDs, concat_dims, data_vars='all',
 
     """
 
+    # TODO merge, don't concat if concat_dim is None
+
+    # TODO refactor this logic, possibly using method in np.blocks
+    # Perform N-D dimensional concatenation
+    # Each iteration of this loop reduces the length of the tile_IDs tuples
+    # by one. It always removes the first
+    for concat_dim in concat_dims:
+        dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
+
+        combined_ids = _auto_combine_along_first_dim(combined_ids,
+                                                     dim=dim,
+                                                     data_vars=data_vars,
+                                                     coords=coords,
+                                                     compat=compat)
+    combined_ds = next(iter(combined_ids.values()))
+    #print(combined_ds)
+    return combined_ds
+
+
+def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars,
+                                      coords, compat):
     # Organise by data variables
     grouped_by_data_vars = itertoolz.groupby(_data_vars,
-                                             combined_IDs.items()).values()
+                                             combined_ids.items()).values()
 
-    concatenated_datasets = []
+    new_combined_ids_all_vars = []
     for tiled_datasets_group in grouped_by_data_vars:
 
-        # Convert list of tuples back into a dictionary
-        concatenated_ids = dict(tiled_datasets_group)
+        # Groupby returns list of tuples - convert back into a dictionary
+        combined_ids = dict(tiled_datasets_group)
 
-        # TODO refactor this logic, possibly using method in np.blocks
-        # Perform N-D dimensional concatenation
-        # Each iteration of this loop reduces the length of the tile_IDs tuples
-        # by one. It always removes the first
-        for concat_dim in concat_dims:
-            dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
+        # Group into lines of datasets which must be combined along dim
+        grouped = itertoolz.groupby(_new_tile_id, combined_ids.items())
 
-            concatenated_ids = _concat_along_first_dim(concatenated_ids,
-                                                       dim=dim,
-                                                       data_vars=data_vars,
-                                                       coords=coords)
-        concatenated_datasets = concatenated_datasets \
-                                + list(concatenated_ids.values())
-    return merge(concatenated_datasets, compat=compat)
+        new_combined_ids = {}
+        for new_id, group in grouped.items():
+            new_combined_ids[new_id] = _auto_combine_1d(group, dim, data_vars,
+                                                        coords, compat)
 
+        print("New combined IDs:")
+        print(new_combined_ids)
+        new_combined_ids_all_vars.append(new_combined_ids)
 
-def _new_tile_id(single_id_ds_pair):
-    # TODO maybe replace with something like lambda x: x[0][1:]?
-    tile_id, ds = single_id_ds_pair
-    return tile_id[1:]
+    # merge the new_combined_ids dicts by using xarray.merge on elements with the same key
+    return _merge_by_new_ids(new_combined_ids_all_vars, compat=compat)
+
+
+def _auto_combine_1d(to_combine, dim, data_vars, coords, compat):
+    if dim is not None:
+        combined = _auto_concat(to_combine, dim=dim, data_vars=data_vars,
+                                coords=coords)
+    else:
+        print(to_combine)
+        combined = merge(to_combine, compat=compat)
+
+    return combined
+
+
+def _merge_by_new_ids(new_combined_ids_all_vars, compat):
+    # Merge different variables back together
+    # Merging a list of dicts
+    # Group by indexes
+    grouped_by_new_id = itertoolz.groupby(_tile_id, new_combined_ids_all_vars)
+
+    print("Grouped by new ID:")
+    print(grouped_by_new_id)
+
+    # Merge different variables back together, while retaining new tile IDs
+    merged_new_combined_ids = {}
+    for tile_id, group in grouped_by_new_id.items():
+
+        print(group)
+        to_merge = [list(combined_ds.values())[0] for combined_ds in group]
+        print(to_merge)
+        merged = merge(to_merge, compat=compat)
+        merged_new_combined_ids[tile_id] = merged
 
+    return merged_new_combined_ids
 
-def _concat_along_first_dim(combined_IDs, dim, data_vars='all',
-                                coords='different'):
-    grouped = itertoolz.groupby(_new_tile_id, combined_IDs.items())
-    new_combined_IDs = {}
 
-    for new_ID, group in grouped.items():
-        to_concat = [ds for old_ID, ds in group]
-        new_combined_IDs[new_ID] = _auto_concat(to_concat, dim=dim,
-                                                data_vars=data_vars,
-                                                coords=coords)
-    return new_combined_IDs
+def _tile_id(new_combined_ids):
+    return next(iter(new_combined_ids.keys()))
+
+
+def _new_tile_id(single_id_ds_pair):
+    # TODO maybe replace with something like lambda x: x[0][1:]?
+    tile_id, ds = single_id_ds_pair 
+    return tile_id[1:]
 
 
 def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
@@ -529,7 +575,7 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
             raise NotImplementedError
             # TODO Use coordinates to determine tile_ID for each dataset in N-D
             # Ignore how they were ordered previously
-            # Shoould look like
+            # Should look like
             # combined_ids, concat_dims = _infer_tile_ids_from_coords(datasets,
             # concat_dims)
         else:
@@ -546,11 +592,13 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
         # Check that the combined_ids are sensible
         _check_shape_tile_ids(combined_ids)
 
+        print(concat_dims)
+
         # Repeatedly concatenate then merge along each dimension
         combined = _combine_nd(combined_ids, concat_dims, compat=compat,
                                data_vars=data_vars, coords=coords)
     else:
-        # Case of no concatenation wanted
+        # Case of no concatenation wanted at all
         concatenated = datasets
         combined = merge(concatenated, compat=compat)
 
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 3fa9650c2ca..dbd2c070251 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -10,7 +10,7 @@
 from xarray import DataArray, Dataset, Variable, auto_combine, concat
 from xarray.core.pycompat import OrderedDict, iteritems
 from xarray.core.combine import (
-    _new_tile_id, _concat_along_first_dim,
+    _new_tile_id, _auto_combine_along_first_dim,
     _infer_concat_order_from_positions, _infer_tile_ids_from_nested_list,
     _check_shape_tile_ids, _combine_nd)
 
@@ -386,6 +386,13 @@ def test_auto_combine_no_concat(self):
         actual = auto_combine([data, data, data], concat_dims=None)
         assert_identical(data, actual)
 
+        tmp1 = Dataset({'x': 0})
+        tmp2 = Dataset({'x': np.nan})
+        actual = auto_combine([tmp1, tmp2], concat_dims=None)
+        assert_identical(tmp1, actual)
+        actual = auto_combine([tmp1, tmp2], concat_dims=[None])
+        assert_identical(tmp1, actual)
+
         # Single object, with a concat_dim explicitly provided
         # Test the issue reported in GH #1988
         objs = [Dataset({'x': 0, 'y': 1})]
@@ -520,12 +527,18 @@ def test_get_new_tile_ids(self, create_combined_ids):
             expected_new_tile_id = tile_id[1:]
             assert _new_tile_id(combined) == expected_new_tile_id
 
+    def test_merge_by_new_ids(self):
+        ...
+
     @pytest.mark.parametrize("concat_dim", ['dim1', 'new_dim'])
     def test_concat_once(self, create_combined_ids, concat_dim):
         shape = (2,)
         combined_ids = create_combined_ids(shape)
         ds = create_test_data
-        result = _concat_along_first_dim(combined_ids, dim=concat_dim)
+        result = _auto_combine_along_first_dim(combined_ids, dim=concat_dim,
+                                               data_vars='all',
+                                               coords='different',
+                                               compat='no_conflicts')
 
         expected_ds = concat([ds(0), ds(1)], dim=concat_dim)
         assert_combined_tile_ids_equal(result, {(): expected_ds})
@@ -590,7 +603,7 @@ def test_invalid_hypercube_input(self):
                                       'consistent depths'):
             auto_combine(datasets, concat_dims=['dim1', 'dim2'])
 
-    def test_combine_concat_one_dim_merge_another(self):
+    def test_combine_concat_over_redundant_nesting(self):
         objs = [[Dataset({'x': [0]}), Dataset({'x': [1]})]]
         actual = auto_combine(objs, concat_dims=[None, 'x'])
         expected = Dataset({'x': [0, 1]})

From 3d0061e382dc51badc1f731eb53df8f42df9a7c1 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sun, 25 Nov 2018 17:57:05 +0000
Subject: [PATCH 18/37] Refactored to literally just apply the old auto_combine
 along each dimension

---
 xarray/core/combine.py       | 116 +++++++++++------------------------
 xarray/tests/test_combine.py |  51 ++++++++-------
 2 files changed, 64 insertions(+), 103 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index e0e18c8149d..b476e77e850 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -380,11 +380,8 @@ def _infer_concat_order_from_positions(datasets, concat_dims):
     # TODO might be faster in this case to just work out concat_dims once here
     tile_id, ds = list(combined_ids.items())[0]
     n_dims = len(tile_id)
-    if concat_dims == _CONCAT_DIM_DEFAULT:
-        concat_dims = [_CONCAT_DIM_DEFAULT]*n_dims
-    elif concat_dims is None:
-        # TODO should this be a list of n Nones?
-        pass
+    if concat_dims == _CONCAT_DIM_DEFAULT or None:
+        concat_dims = [concat_dims]*n_dims
     else:
         if len(concat_dims) != n_dims:
             raise ValueError("concat_dims has length " + str(len(concat_dims))
@@ -428,6 +425,8 @@ def _infer_tile_ids_from_nested_list(entry, current_pos):
 def _check_shape_tile_ids(combined_tile_ids, contains='datasets'):
     tile_ids = combined_tile_ids.keys()
 
+    # TODO a check that only the expected types of objects are contained
+
     # Check all tuples are the same length
     # i.e. check that all lists are nested to the same depth
     nesting_depths = [len(id) for id in tile_ids]
@@ -472,93 +471,54 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
 
     """
 
-    # TODO merge, don't concat if concat_dim is None
-
     # TODO refactor this logic, possibly using method in np.blocks
     # Perform N-D dimensional concatenation
     # Each iteration of this loop reduces the length of the tile_IDs tuples
     # by one. It always removes the first
     for concat_dim in concat_dims:
-        dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
-
-        combined_ids = _auto_combine_along_first_dim(combined_ids,
-                                                     dim=dim,
-                                                     data_vars=data_vars,
-                                                     coords=coords,
-                                                     compat=compat)
+        combined_ids = _auto_combine_all_along_first_dim(combined_ids,
+                                                         dim=concat_dim,
+                                                         data_vars=data_vars,
+                                                         coords=coords,
+                                                         compat=compat)
     combined_ds = next(iter(combined_ids.values()))
-    #print(combined_ds)
     return combined_ds
 
 
 def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars,
                                       coords, compat):
-    # Organise by data variables
-    grouped_by_data_vars = itertoolz.groupby(_data_vars,
-                                             combined_ids.items()).values()
-
-    new_combined_ids_all_vars = []
-    for tiled_datasets_group in grouped_by_data_vars:
-
-        # Groupby returns list of tuples - convert back into a dictionary
-        combined_ids = dict(tiled_datasets_group)
+    # Group into lines of datasets which must be combined along dim
+    grouped = itertoolz.groupby(_new_tile_id, combined_ids.items())
 
-        # Group into lines of datasets which must be combined along dim
-        grouped = itertoolz.groupby(_new_tile_id, combined_ids.items())
+    new_combined_ids = {}
+    for new_id, group in grouped.items():
+        combined_ids = dict(group)
+        datasets = list(combined_ids.values())
+        new_combined_ids[new_id] = _auto_combine_1d(datasets, dim, compat,
+                                                    data_vars, coords)
 
-        new_combined_ids = {}
-        for new_id, group in grouped.items():
-            new_combined_ids[new_id] = _auto_combine_1d(group, dim, data_vars,
-                                                        coords, compat)
+    return new_combined_ids
 
-        print("New combined IDs:")
-        print(new_combined_ids)
-        new_combined_ids_all_vars.append(new_combined_ids)
 
-    # merge the new_combined_ids dicts by using xarray.merge on elements with the same key
-    return _merge_by_new_ids(new_combined_ids_all_vars, compat=compat)
-
-
-def _auto_combine_1d(to_combine, dim, data_vars, coords, compat):
-    if dim is not None:
-        combined = _auto_concat(to_combine, dim=dim, data_vars=data_vars,
-                                coords=coords)
+def _auto_combine_1d(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
+                     compat='no_conflicts',
+                     data_vars='all', coords='different'):
+    # This is just the old auto_combine function (which only worked along 1D)
+    if concat_dim is not None:
+        dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
+        grouped = itertoolz.groupby(lambda ds: tuple(sorted(ds.data_vars)),
+                                    datasets).values()
+        concatenated = [_auto_concat(ds, dim=dim,
+                                     data_vars=data_vars, coords=coords)
+                        for ds in grouped]
     else:
-        print(to_combine)
-        combined = merge(to_combine, compat=compat)
-
-    return combined
-
-
-def _merge_by_new_ids(new_combined_ids_all_vars, compat):
-    # Merge different variables back together
-    # Merging a list of dicts
-    # Group by indexes
-    grouped_by_new_id = itertoolz.groupby(_tile_id, new_combined_ids_all_vars)
-
-    print("Grouped by new ID:")
-    print(grouped_by_new_id)
-
-    # Merge different variables back together, while retaining new tile IDs
-    merged_new_combined_ids = {}
-    for tile_id, group in grouped_by_new_id.items():
-
-        print(group)
-        to_merge = [list(combined_ds.values())[0] for combined_ds in group]
-        print(to_merge)
-        merged = merge(to_merge, compat=compat)
-        merged_new_combined_ids[tile_id] = merged
-
-    return merged_new_combined_ids
-
-
-def _tile_id(new_combined_ids):
-    return next(iter(new_combined_ids.keys()))
+        concatenated = datasets
+    merged = merge(concatenated, compat=compat)
+    return merged
 
 
 def _new_tile_id(single_id_ds_pair):
-    # TODO maybe replace with something like lambda x: x[0][1:]?
-    tile_id, ds = single_id_ds_pair 
+    tile_id, ds = single_id_ds_pair
     return tile_id[1:]
 
 
@@ -589,19 +549,15 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
                 # Already sorted so just use the ids already passed
                 combined_ids = dict(zip(ids, datasets))
 
-        # Check that the combined_ids are sensible
+        # Check that the inferred shape is combinable
         _check_shape_tile_ids(combined_ids)
 
-        print(concat_dims)
-
         # Repeatedly concatenate then merge along each dimension
         combined = _combine_nd(combined_ids, concat_dims, compat=compat,
                                data_vars=data_vars, coords=coords)
     else:
         # Case of no concatenation wanted at all
-        concatenated = datasets
-        combined = merge(concatenated, compat=compat)
-
+        combined = merge(datasets, compat=compat)
     return combined
 
 
@@ -663,8 +619,6 @@ def auto_combine(datasets,
     Dataset.merge
     """
 
-    # TODO do some of _calc_concat_dim_coord's checks on concat_dims here?
-
     # The IDs argument tells _auto_combine that the datasets are not yet sorted
     return _auto_combine(datasets, concat_dims=concat_dims, compat=compat,
                          data_vars=data_vars, coords=coords,
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index dbd2c070251..8abe0ac0f5d 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -10,9 +10,9 @@
 from xarray import DataArray, Dataset, Variable, auto_combine, concat
 from xarray.core.pycompat import OrderedDict, iteritems
 from xarray.core.combine import (
-    _new_tile_id, _auto_combine_along_first_dim,
+    _new_tile_id, _auto_combine_all_along_first_dim,
     _infer_concat_order_from_positions, _infer_tile_ids_from_nested_list,
-    _check_shape_tile_ids, _combine_nd)
+    _check_shape_tile_ids, _combine_nd, _auto_combine_1d)
 
 from . import (
     InaccessibleArray, assert_array_equal, assert_equal, assert_identical,
@@ -302,42 +302,41 @@ def test_concat_lazy(self):
 
 class TestAutoCombine(object):
 
+    @pytest.mark.parametrize("combine", [_auto_combine_1d, auto_combine])
     @requires_dask  # only for toolz
-    def test_auto_combine(self):
+    def test_auto_combine(self, combine):
         objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
-        actual = auto_combine(objs)
+        actual = combine(objs)
         expected = Dataset({'x': [0, 1]})
         assert_identical(expected, actual)
 
-        actual = auto_combine(actual)
-        assert_identical(expected, actual)
-
-        actual = auto_combine([actual])
+        actual = combine([actual])
         assert_identical(expected, actual)
 
         objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})]
-        actual = auto_combine(objs)
+        actual = combine(objs)
         expected = Dataset({'x': [0, 1, 2]})
         assert_identical(expected, actual)
 
+        # TODO find out why this fails!!
         # ensure auto_combine handles non-sorted variables
         objs = [Dataset(OrderedDict([('x', ('a', [0])), ('y', ('a', [0]))])),
                 Dataset(OrderedDict([('y', ('a', [1])), ('x', ('a', [1]))]))]
-        actual = auto_combine(objs)
+        actual = combine(objs)
         expected = Dataset({'x': ('a', [0, 1]), 'y': ('a', [0, 1])})
         assert_identical(expected, actual)
 
         objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})]
         with raises_regex(ValueError, 'too many .* dimensions'):
-            auto_combine(objs)
+            combine(objs)
 
         objs = [Dataset({'x': 0}), Dataset({'x': 1})]
         with raises_regex(ValueError, 'cannot infer dimension'):
-            auto_combine(objs)
+            combine(objs)
 
         objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})]
         with pytest.raises(KeyError):
-            auto_combine(objs)
+            combine(objs)
 
     @requires_dask  # only for toolz
     def test_auto_combine_previously_failed(self):
@@ -413,8 +412,6 @@ def test_auto_combine_no_concat(self):
         assert_identical(expected, actual)
 
 
-# TODO should we use @requires_dask? only for toolz?
-
 class TestTileIDsFromNestedList(object):
     def test_1d(self):
         ds = create_test_data
@@ -518,6 +515,7 @@ def _create_tile_ids(shape):
     return list(tile_ids)
 
 
+@requires_dask  # only for toolz
 class TestCombineND(object):
     def test_get_new_tile_ids(self, create_combined_ids):
         shape = (1, 2, 3)
@@ -527,18 +525,16 @@ def test_get_new_tile_ids(self, create_combined_ids):
             expected_new_tile_id = tile_id[1:]
             assert _new_tile_id(combined) == expected_new_tile_id
 
-    def test_merge_by_new_ids(self):
-        ...
-
     @pytest.mark.parametrize("concat_dim", ['dim1', 'new_dim'])
     def test_concat_once(self, create_combined_ids, concat_dim):
         shape = (2,)
         combined_ids = create_combined_ids(shape)
         ds = create_test_data
-        result = _auto_combine_along_first_dim(combined_ids, dim=concat_dim,
-                                               data_vars='all',
-                                               coords='different',
-                                               compat='no_conflicts')
+        result = _auto_combine_all_along_first_dim(combined_ids,
+                                                   dim=concat_dim,
+                                                   data_vars='all',
+                                                   coords='different',
+                                                   compat='no_conflicts')
 
         expected_ds = concat([ds(0), ds(1)], dim=concat_dim)
         assert_combined_tile_ids_equal(result, {(): expected_ds})
@@ -574,9 +570,20 @@ def test_check_lengths(self):
             _check_shape_tile_ids(combined_tile_ids)
 
 
+@requires_dask  # only for toolz
 class TestAutoCombineND(object):
     # TODO there should be a lot more tests in here testing different cases
 
+    def test_single_dataset(self):
+
+        objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
+        actual = auto_combine(objs)
+        expected = Dataset({'x': [0, 1]})
+        assert_identical(expected, actual)
+
+        actual = auto_combine(actual)
+        assert_identical(expected, actual)
+
     def test_auto_combine_2d(self):
         ds = create_test_data
 

From 60c93ba59fd6636a9f6ebe2bb460b9924d9ebec7 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Mon, 26 Nov 2018 11:28:23 +0000
Subject: [PATCH 19/37] Added unit tests for open_mfdatset

---
 xarray/backends/api.py        | 13 ++++---
 xarray/core/combine.py        |  8 ++---
 xarray/tests/test_backends.py | 65 +++++++++++++++++++++++++++++++++++
 xarray/tests/test_combine.py  | 10 +++++-
 4 files changed, 84 insertions(+), 12 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index cc5aad2bfaf..45ceb814a18 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -503,12 +503,12 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
         By default, chunks will be chosen to load entire input files into
         memory at once. This has a major impact on performance: please see the
         full documentation for more details [2].
-    concat_dims : None, str, DataArray or Index, optional
-        Dimension to concatenate files along. This argument is passed on to
+    concat_dims : list of str, DataArray, Index or None, optional
+        Dimensions to concatenate files along. This argument is passed on to
         :py:func:`xarray.auto_combine` along with the dataset objects. You only
-        need to provide this argument if the dimension along which you want to
-        concatenate is not a dimension in the original datasets, e.g., if you
-        want to stack a collection of 2D arrays along a third dimension.
+        need to provide this argument if any of the dimensions along which you
+        want to concatenate is not a dimension in the original datasets, e.g.,
+        if you want to stack a collection of 2D arrays along a third dimension.
         By default, xarray attempts to infer this argument by examining
         component files. Set ``concat_dim=None`` explicitly to disable
         concatenation.
@@ -601,8 +601,7 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
     if not paths:
         raise IOError('no files to open')
 
-    # If infer_order_from_coords=True then this is unnecessary, but that's fine
-    # as it should be quick - in this case it will just loop over one list
+    # If infer_order_from_coords=True then this is unnecessary, but quick.
     # If infer_order_from_coords=False then this creates a flat list which is
     # easier to iterate over, while saving the originally-supplied structure
     combined_ids_paths, concat_dims = _infer_concat_order_from_positions\
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index b476e77e850..bf44997e9f6 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -380,7 +380,7 @@ def _infer_concat_order_from_positions(datasets, concat_dims):
     # TODO might be faster in this case to just work out concat_dims once here
     tile_id, ds = list(combined_ids.items())[0]
     n_dims = len(tile_id)
-    if concat_dims == _CONCAT_DIM_DEFAULT or None:
+    if concat_dims == _CONCAT_DIM_DEFAULT or concat_dims == None:
         concat_dims = [concat_dims]*n_dims
     else:
         if len(concat_dims) != n_dims:
@@ -468,7 +468,7 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
 
     Returns
     -------
-
+    combined_ds : xarray.Dataset
     """
 
     # TODO refactor this logic, possibly using method in np.blocks
@@ -481,7 +481,7 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
                                                          data_vars=data_vars,
                                                          coords=coords,
                                                          compat=compat)
-    combined_ds = next(iter(combined_ids.values()))
+    combined_ds = list(combined_ids.values())[0]
     return combined_ds
 
 
@@ -576,7 +576,7 @@ def auto_combine(datasets,
     ----------
     datasets : sequence of xarray.Dataset
         Dataset objects to merge.
-    concat_dims : list of str or DataArray or Index, optional
+    concat_dims : list of str, DataArray, Index or None, optional
         Dimensions along which to concatenate variables, as used by
         :py:func:`xarray.concat`. You only need to provide this argument if
         any of the dimensions along which you want to concatenate are not a
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index d9109c4b67f..42893cf5d9e 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -2135,12 +2135,42 @@ def test_open_mfdataset(self):
                     assert actual.foo.variable.data.chunks == \
                         ((3, 2, 3, 2),)
 
+
         with raises_regex(IOError, 'no files to open'):
             open_mfdataset('foo-bar-baz-*.nc')
 
         with raises_regex(ValueError, 'wild-card'):
             open_mfdataset('http://some/remote/uri')
 
+    def test_open_mfdataset_2d(self):
+        original = Dataset({'foo': (['x', 'y'], np.random.randn(10, 8))})
+        with create_tmp_file() as tmp1:
+            with create_tmp_file() as tmp2:
+                with create_tmp_file() as tmp3:
+                    with create_tmp_file() as tmp4:
+                        original.isel(x=slice(5),
+                                      y=slice(4)).to_netcdf(tmp1)
+                        original.isel(x=slice(5, 10),
+                                      y=slice(4)).to_netcdf(tmp2)
+                        original.isel(x=slice(5),
+                                      y=slice(4, 8)).to_netcdf(tmp3)
+                        original.isel(x=slice(5, 10),
+                                      y=slice(4, 8)).to_netcdf(tmp4)
+                        with open_mfdataset([[tmp1, tmp2],
+                                             [tmp3, tmp4]],
+                                            concat_dims=['y', 'x']) as actual:
+                            assert isinstance(actual.foo.variable.data,
+                                              da.Array)
+                            assert actual.foo.variable.data.chunks == \
+                                   ((5, 5), (4, 4))
+                            assert_identical(original, actual)
+                        with open_mfdataset([[tmp1, tmp2],
+                                             [tmp3, tmp4]],
+                                            concat_dims=['y', 'x'],
+                                            chunks={'x': 3, 'y': 2}) as actual:
+                            assert actual.foo.variable.data.chunks == \
+                                   ((3, 2, 3, 2), (2, 2, 2, 2),)
+
     @requires_pathlib
     def test_open_mfdataset_pathlib(self):
         original = Dataset({'foo': ('x', np.random.randn(10))})
@@ -2153,6 +2183,41 @@ def test_open_mfdataset_pathlib(self):
                 with open_mfdataset([tmp1, tmp2]) as actual:
                     assert_identical(original, actual)
 
+    @requires_pathlib
+    def test_open_mfdataset_2d_pathlib(self):
+        original = Dataset({'foo': (['x', 'y'], np.random.randn(10, 8))})
+        with create_tmp_file() as tmp1:
+            with create_tmp_file() as tmp2:
+                with create_tmp_file() as tmp3:
+                    with create_tmp_file() as tmp4:
+                        tmp1 = Path(tmp1)
+                        tmp2 = Path(tmp2)
+                        tmp3 = Path(tmp3)
+                        tmp4 = Path(tmp4)
+                        original.isel(x=slice(5),
+                                      y=slice(4)).to_netcdf(tmp1)
+                        original.isel(x=slice(5, 10),
+                                      y=slice(4)).to_netcdf(tmp2)
+                        original.isel(x=slice(5),
+                                      y=slice(4, 8)).to_netcdf(tmp3)
+                        original.isel(x=slice(5, 10),
+                                      y=slice(4, 8)).to_netcdf(tmp4)
+                        with open_mfdataset([[tmp1, tmp2],
+                                             [tmp3, tmp4]],
+                                            concat_dims=['y', 'x']) as actual:
+                            assert_identical(original, actual)
+
+    @pytest.mark.xfail(reason="Not yet implemented")
+    def test_open_mfdataset(self):
+        original = Dataset({'foo': ('x', np.random.randn(10))})
+        with create_tmp_file() as tmp1:
+            with create_tmp_file() as tmp2:
+                original.isel(x=slice(5)).to_netcdf(tmp1)
+                original.isel(x=slice(5, 10)).to_netcdf(tmp2)
+                with open_mfdataset([tmp1, tmp2],
+                                    infer_order_from_coords=True) as actual:
+                    assert_identical(original, actual)
+
     def test_attrs_mfdataset(self):
         original = Dataset({'foo': ('x', np.random.randn(10))})
         with create_tmp_file() as tmp1:
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 8abe0ac0f5d..0edc715698a 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -318,7 +318,6 @@ def test_auto_combine(self, combine):
         expected = Dataset({'x': [0, 1, 2]})
         assert_identical(expected, actual)
 
-        # TODO find out why this fails!!
         # ensure auto_combine handles non-sorted variables
         objs = [Dataset(OrderedDict([('x', ('a', [0])), ('y', ('a', [0]))])),
                 Dataset(OrderedDict([('y', ('a', [1])), ('x', ('a', [1]))]))]
@@ -411,6 +410,15 @@ def test_auto_combine_no_concat(self):
                            {'baz': [100]})
         assert_identical(expected, actual)
 
+    @pytest.mark.xfail(reason="Not yet implemented")
+    def test_infer_order_from_coords(self):
+        data = create_test_data()
+        print(data)
+        objs = [data.isel(x=slice(4, 9)), data.isel(x=slice(4))]
+        actual = auto_combine(objs, infer_order_from_coords=True)
+        expected = data
+        assert_identical(expected, actual)
+
 
 class TestTileIDsFromNestedList(object):
     def test_1d(self):

From 18245384c12d77177c0c230b7826739ca5089234 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Mon, 26 Nov 2018 13:31:51 +0000
Subject: [PATCH 20/37] Removed TODOs

---
 xarray/core/combine.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index bf44997e9f6..2d5da1053fb 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -375,9 +375,6 @@ def _infer_concat_order_from_positions(datasets, concat_dims):
 
     combined_ids = dict(_infer_tile_ids_from_nested_list(datasets, ()))
 
-    # Currently if concat_dims is not supplied then _auto_concat attempts to
-    # deduce it on every call
-    # TODO might be faster in this case to just work out concat_dims once here
     tile_id, ds = list(combined_ids.items())[0]
     n_dims = len(tile_id)
     if concat_dims == _CONCAT_DIM_DEFAULT or concat_dims == None:
@@ -422,11 +419,9 @@ def _infer_tile_ids_from_nested_list(entry, current_pos):
         yield current_pos, entry
 
 
-def _check_shape_tile_ids(combined_tile_ids, contains='datasets'):
+def _check_shape_tile_ids(combined_tile_ids):
     tile_ids = combined_tile_ids.keys()
 
-    # TODO a check that only the expected types of objects are contained
-
     # Check all tuples are the same length
     # i.e. check that all lists are nested to the same depth
     nesting_depths = [len(id) for id in tile_ids]

From d380815de26510fd977269e35b8b9648db279769 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Fri, 30 Nov 2018 18:58:02 +0000
Subject: [PATCH 21/37] Removed format strings

---
 xarray/core/combine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 2d5da1053fb..d81849e037b 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -436,7 +436,7 @@ def _check_shape_tile_ids(combined_tile_ids):
         if len(set(occurrences.values())) != 1:
             raise ValueError("The supplied objects do not form a hypercube "
                              "because sub-lists do not have consistent "
-                             "lengths along dimension {}".format(str(dim)))
+                             "lengths along dimension" + str(dim))
 
 
 def _data_vars(combined_id):

From c4bb8d0396498154da3521ffe7c9a81a4199cf7b Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Fri, 30 Nov 2018 20:07:54 +0000
Subject: [PATCH 22/37] test_get_new_tile_ids now doesn't assume dicts are
 ordered

---
 xarray/tests/test_combine.py | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 0edc715698a..b4c3c36f46a 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -7,7 +7,7 @@
 import pandas as pd
 import pytest
 
-from xarray import DataArray, Dataset, Variable, auto_combine, concat
+from xarray import DataArray, Dataset, Variable, auto_combine, concat, merge
 from xarray.core.pycompat import OrderedDict, iteritems
 from xarray.core.combine import (
     _new_tile_id, _auto_combine_all_along_first_dim,
@@ -529,9 +529,9 @@ def test_get_new_tile_ids(self, create_combined_ids):
         shape = (1, 2, 3)
         combined_ids = create_combined_ids(shape)
 
-        for combined, tile_id in zip(combined_ids.items(), _create_tile_ids(shape)):
-            expected_new_tile_id = tile_id[1:]
-            assert _new_tile_id(combined) == expected_new_tile_id
+        expected_tile_ids = sorted(combined_ids.keys())
+        actual_tile_ids = _create_tile_ids(shape)
+        assert expected_tile_ids == actual_tile_ids
 
     @pytest.mark.parametrize("concat_dim", ['dim1', 'new_dim'])
     def test_concat_once(self, create_combined_ids, concat_dim):
@@ -547,16 +547,36 @@ def test_concat_once(self, create_combined_ids, concat_dim):
         expected_ds = concat([ds(0), ds(1)], dim=concat_dim)
         assert_combined_tile_ids_equal(result, {(): expected_ds})
 
-    def test_concat_twice(self, create_combined_ids):
+    def test_concat_only_first_dim(self, create_combined_ids):
         shape = (2, 3)
         combined_ids = create_combined_ids(shape)
-        result = _combine_nd(combined_ids, concat_dims=['dim1', 'dim2'])
+        ds = create_test_data
+        result = _auto_combine_all_along_first_dim(combined_ids,
+                                                   dim='dim1',
+                                                   data_vars='all',
+                                                   coords='different',
+                                                   compat='no_conflicts')
 
         ds = create_test_data
         partway1 = concat([ds(0), ds(3)], dim='dim1')
         partway2 = concat([ds(1), ds(4)], dim='dim1')
         partway3 = concat([ds(2), ds(5)], dim='dim1')
-        expected = concat([partway1, partway2, partway3], dim='dim2')
+        expected_datasets = [partway1, partway2, partway3]
+        expected = {(i,): ds for i, ds in enumerate(expected_datasets)}
+        
+        assert_combined_tile_ids_equal(result, expected)
+
+    @pytest.mark.parametrize("concat_dim", ['dim1', 'new_dim'])
+    def test_concat_twice(self, create_combined_ids, concat_dim):
+        shape = (2, 3)
+        combined_ids = create_combined_ids(shape)
+        result = _combine_nd(combined_ids, concat_dims=['dim1', concat_dim])
+
+        ds = create_test_data
+        partway1 = concat([ds(0), ds(3)], dim='dim1')
+        partway2 = concat([ds(1), ds(4)], dim='dim1')
+        partway3 = concat([ds(2), ds(5)], dim='dim1')
+        expected = concat([partway1, partway2, partway3], dim=concat_dim)
 
         assert_equal(result, expected)
 

From 6b7f8891c9e3e113b511fded3e6b7d2801e9873e Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Fri, 30 Nov 2018 21:08:42 +0000
Subject: [PATCH 23/37] Fixed failing tests on python3.5 caused by accidentally
 assuming dict was ordered

---
 xarray/core/combine.py       |  7 ++++---
 xarray/tests/test_combine.py | 38 ++++++++++++++++++++++++------------
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index d81849e037b..aaef8cd4162 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -2,7 +2,7 @@
 
 import warnings
 import toolz.itertoolz as itertoolz
-from collections import Counter
+from collections import Counter, OrderedDict
 
 import pandas as pd
 
@@ -470,6 +470,7 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
     # Perform N-D dimensional concatenation
     # Each iteration of this loop reduces the length of the tile_IDs tuples
     # by one. It always removes the first
+
     for concat_dim in concat_dims:
         combined_ids = _auto_combine_all_along_first_dim(combined_ids,
                                                          dim=concat_dim,
@@ -487,11 +488,11 @@ def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars,
 
     new_combined_ids = {}
     for new_id, group in grouped.items():
-        combined_ids = dict(group)
+        # TODO is there a way to unpack this object without using OrderedDict?
+        combined_ids = OrderedDict(sorted(group))
         datasets = list(combined_ids.values())
         new_combined_ids[new_id] = _auto_combine_1d(datasets, dim, compat,
                                                     data_vars, coords)
-
     return new_combined_ids
 
 
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index b4c3c36f46a..d44ba20ba1c 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -12,7 +12,7 @@
 from xarray.core.combine import (
     _new_tile_id, _auto_combine_all_along_first_dim,
     _infer_concat_order_from_positions, _infer_tile_ids_from_nested_list,
-    _check_shape_tile_ids, _combine_nd, _auto_combine_1d)
+    _check_shape_tile_ids, _combine_nd, _auto_combine_1d, _auto_combine)
 
 from . import (
     InaccessibleArray, assert_array_equal, assert_equal, assert_identical,
@@ -410,15 +410,6 @@ def test_auto_combine_no_concat(self):
                            {'baz': [100]})
         assert_identical(expected, actual)
 
-    @pytest.mark.xfail(reason="Not yet implemented")
-    def test_infer_order_from_coords(self):
-        data = create_test_data()
-        print(data)
-        objs = [data.isel(x=slice(4, 9)), data.isel(x=slice(4))]
-        actual = auto_combine(objs, infer_order_from_coords=True)
-        expected = data
-        assert_identical(expected, actual)
-
 
 class TestTileIDsFromNestedList(object):
     def test_1d(self):
@@ -515,7 +506,8 @@ def create_combined_ids():
 def _create_combined_ids(shape):
     tile_ids = _create_tile_ids(shape)
     nums = range(len(tile_ids))
-    return {tile_id: create_test_data(num) for tile_id, num in zip(tile_ids, nums)}
+    return {tile_id: create_test_data(num)
+            for tile_id, num in zip(tile_ids, nums)}
 
 
 def _create_tile_ids(shape):
@@ -551,6 +543,7 @@ def test_concat_only_first_dim(self, create_combined_ids):
         shape = (2, 3)
         combined_ids = create_combined_ids(shape)
         ds = create_test_data
+        print(combined_ids)
         result = _auto_combine_all_along_first_dim(combined_ids,
                                                    dim='dim1',
                                                    data_vars='all',
@@ -563,7 +556,7 @@ def test_concat_only_first_dim(self, create_combined_ids):
         partway3 = concat([ds(2), ds(5)], dim='dim1')
         expected_datasets = [partway1, partway2, partway3]
         expected = {(i,): ds for i, ds in enumerate(expected_datasets)}
-        
+
         assert_combined_tile_ids_equal(result, expected)
 
     @pytest.mark.parametrize("concat_dim", ['dim1', 'new_dim'])
@@ -648,3 +641,24 @@ def test_combine_concat_over_redundant_nesting(self):
         actual = auto_combine(objs, concat_dims=['x', None])
         expected = Dataset({'x': [0, 1]})
         assert_identical(expected, actual)
+
+
+# TODO test for _new_tile_id
+
+class TestAutoCombineUsingCoords(object):
+    @pytest.mark.xfail(reason="Not yet implemented")
+    def test_infer_order_from_coords(self):
+        # Should pass once inferring order from coords is implemented
+        data = create_test_data()
+        objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
+        actual = auto_combine(objs, infer_order_from_coords=True)
+        expected = data
+        assert_identical(expected, actual)
+
+    def test_order_inferred_from_coords(self):
+        data = create_test_data()
+        objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
+        with pytest.raises(NotImplementedError):
+            _auto_combine(objs, concat_dims=['dim2'],compat='no_conflicts',
+                          data_vars='all', coords='different',
+                          infer_order_from_coords=True, ids=True)

From 58a3648cc2b4a7465f5cacdaa8e6eb67e54ff583 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Fri, 30 Nov 2018 21:21:23 +0000
Subject: [PATCH 24/37] Test for getting new tile id

---
 xarray/tests/test_combine.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index d44ba20ba1c..87d4efb57f5 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -517,6 +517,15 @@ def _create_tile_ids(shape):
 
 @requires_dask  # only for toolz
 class TestCombineND(object):
+    @pytest.mark.parametrize("old_id, new_id", [((3,0,1), (0,1)),
+                                                ((0, 0),  (0,)),
+                                                ((1,),    ()),
+                                                ((0,),    ()),
+                                                ((1, 0),  (0,))])
+    def test_new_tile_id(self, old_id, new_id):
+        ds = create_test_data
+        assert _new_tile_id((old_id, ds)) == new_id
+
     def test_get_new_tile_ids(self, create_combined_ids):
         shape = (1, 2, 3)
         combined_ids = create_combined_ids(shape)
@@ -643,8 +652,6 @@ def test_combine_concat_over_redundant_nesting(self):
         assert_identical(expected, actual)
 
 
-# TODO test for _new_tile_id
-
 class TestAutoCombineUsingCoords(object):
     @pytest.mark.xfail(reason="Not yet implemented")
     def test_infer_order_from_coords(self):

From a12a34a9be51fb16cfebcf13ad0ccb0ef26a4edd Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Fri, 30 Nov 2018 21:51:38 +0000
Subject: [PATCH 25/37] Fixed itertoolz import so that it's compatible with
 older versions

---
 xarray/core/combine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index aaef8cd4162..274659b2d61 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -1,7 +1,7 @@
 from __future__ import absolute_import, division, print_function
 
 import warnings
-import toolz.itertoolz as itertoolz
+from toolz import itertoolz
 from collections import Counter, OrderedDict
 
 import pandas as pd

From ada1f4a4ef501acb571c1724a84f3da2e9b05ac2 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sat, 1 Dec 2018 13:43:54 +0000
Subject: [PATCH 26/37] Increased test coverage

---
 xarray/core/combine.py        |  7 +------
 xarray/tests/test_backends.py |  8 ++++++--
 xarray/tests/test_combine.py  | 34 +++++++++++++++++++++++++---------
 3 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 274659b2d61..cf832b1a9d2 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -2,7 +2,7 @@
 
 import warnings
 from toolz import itertoolz
-from collections import Counter, OrderedDict
+from collections import Counter
 
 import pandas as pd
 
@@ -439,11 +439,6 @@ def _check_shape_tile_ids(combined_tile_ids):
                              "lengths along dimension" + str(dim))
 
 
-def _data_vars(combined_id):
-    id, ds = combined_id
-    return tuple(sorted(ds.data_vars))
-
-
 def _combine_nd(combined_ids, concat_dims, data_vars='all',
                 coords='different', compat='no_conflicts'):
     """
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 42893cf5d9e..2dc950b9234 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -2214,8 +2214,12 @@ def test_open_mfdataset(self):
             with create_tmp_file() as tmp2:
                 original.isel(x=slice(5)).to_netcdf(tmp1)
                 original.isel(x=slice(5, 10)).to_netcdf(tmp2)
-                with open_mfdataset([tmp1, tmp2],
-                                    infer_order_from_coords=True) as actual:
+
+                with pytest.raises(NotImplementedError):
+                    open_mfdataset([tmp1, tmp2], infer_order_from_coords=True)
+
+                # With infer_order_from_coords=True this should pass in future
+                with open_mfdataset([tmp1, tmp2]) as actual:
                     assert_identical(original, actual)
 
     def test_attrs_mfdataset(self):
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 87d4efb57f5..9258dc0b5ee 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -497,6 +497,10 @@ def test_infer_from_datasets(self):
             (input, ['dim1'])
         assert_combined_tile_ids_equal(expected, actual)
 
+        input = [ds(0), ds(1)]
+        with pytest.raises(ValueError):
+            _infer_concat_order_from_positions(input, ['dim1', 'extra_dim'])
+
 
 @pytest.fixture(scope='module')
 def create_combined_ids():
@@ -602,10 +606,7 @@ def test_check_lengths(self):
 
 @requires_dask  # only for toolz
 class TestAutoCombineND(object):
-    # TODO there should be a lot more tests in here testing different cases
-
     def test_single_dataset(self):
-
         objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
         actual = auto_combine(objs)
         expected = Dataset({'x': [0, 1]})
@@ -640,6 +641,10 @@ def test_invalid_hypercube_input(self):
                                       'consistent depths'):
             auto_combine(datasets, concat_dims=['dim1', 'dim2'])
 
+        datasets = [[ds(0), ds(1)], [ds(3), ds(4)]]
+        with raises_regex(ValueError, 'concat_dims has length'):
+            auto_combine(datasets, concat_dims=['dim1'])
+
     def test_combine_concat_over_redundant_nesting(self):
         objs = [[Dataset({'x': [0]}), Dataset({'x': [1]})]]
         actual = auto_combine(objs, concat_dims=[None, 'x'])
@@ -651,16 +656,18 @@ def test_combine_concat_over_redundant_nesting(self):
         expected = Dataset({'x': [0, 1]})
         assert_identical(expected, actual)
 
+        objs = [[Dataset({'x': [0]})]]
+        actual = auto_combine(objs, concat_dims=[None, None])
+        expected = Dataset({'x': [0]})
+        assert_identical(expected, actual)
+
 
 class TestAutoCombineUsingCoords(object):
-    @pytest.mark.xfail(reason="Not yet implemented")
-    def test_infer_order_from_coords(self):
-        # Should pass once inferring order from coords is implemented
+    def test_infer_order_from_coords_not_implemented(self):
         data = create_test_data()
         objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
-        actual = auto_combine(objs, infer_order_from_coords=True)
-        expected = data
-        assert_identical(expected, actual)
+        with pytest.raises(NotImplementedError):
+            auto_combine(objs, infer_order_from_coords=True)
 
     def test_order_inferred_from_coords(self):
         data = create_test_data()
@@ -669,3 +676,12 @@ def test_order_inferred_from_coords(self):
             _auto_combine(objs, concat_dims=['dim2'],compat='no_conflicts',
                           data_vars='all', coords='different',
                           infer_order_from_coords=True, ids=True)
+
+    @pytest.mark.xfail(reason="Not yet implemented")
+    def test_infer_order_from_coords(self):
+        # Should pass once inferring order from coords is implemented
+        data = create_test_data()
+        objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
+        actual = auto_combine(objs)  # but with infer_order_from_coords=True
+        expected = data
+        assert_identical(expected, actual)

From ef0a30e789521e03a7f5322f488be1dafc208481 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sat, 1 Dec 2018 15:15:11 +0000
Subject: [PATCH 27/37] Added toolz as an explicit dependency to pass tests on
 python2.7

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 3b56d9265af..ebc69b18f1c 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@
     'Topic :: Scientific/Engineering',
 ]
 
-INSTALL_REQUIRES = ['numpy >= 1.12', 'pandas >= 0.19.2']
+INSTALL_REQUIRES = ['numpy >= 1.12', 'pandas >= 0.19.2', 'toolz >= 0.9.0']
 TESTS_REQUIRE = ['pytest >= 2.7.1']
 if sys.version_info[0] < 3:
     TESTS_REQUIRE.append('mock')

From 3be70bc2f181c1bc1cf04dc4da0154854bc78239 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sat, 1 Dec 2018 15:40:06 +0000
Subject: [PATCH 28/37] Updated 'what's new'

---
 doc/whats-new.rst | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 1da1da700e7..1d021a7f98e 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -21,7 +21,7 @@ What's New
     always be available to python 2.7 users. For more information see the
     following references
 
-  - `Xarray Github issue discussing dropping Python 2 <https://github.com/pydata/xarray/issues/1829>`__
+     - `Xarray Github issue discussing dropping Python 2 <https://github.com/pydata/xarray/issues/1829>`__
   - `Python 3 Statement <http://www.python3statement.org/>`__
   - `Tips on porting to Python 3 <https://docs.python.org/3/howto/pyporting.html>`__
 
@@ -33,6 +33,19 @@ v0.11.1 (unreleased)
 Breaking changes
 ~~~~~~~~~~~~~~~~
 
+- Auto-combine along N dimensions:
+
+  - ``open_mfdataset`` and ``auto_combine`` can now combine datasets along any
+    number of dimensions, instead of just a 1D list of datasets. To combine
+    along multiple dimensions the datasets must be passed as a nested
+    list-of-lists.
+
+    Breaking because ``open_mfdataset`` and ``auto_combine`` now expect an
+    argument ``concat_dims`` instead of ``concat_dim``. ``concat_dims`` accepts
+    a list of valid ``concat_dim`` arguments, e.g. ``['dim1', 'dim2']``.
+    (:issue:`2159`)
+    By `Tom Nicholas <http://github.com/TomNicholas>`_.
+
 Enhancements
 ~~~~~~~~~~~~
 

From f266bc36f082951f2f6b418f57b49ea45668f240 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sat, 1 Dec 2018 15:52:40 +0000
Subject: [PATCH 29/37] No longer attempts to shortcut all concatenation at
 once if concat_dims=None

---
 xarray/core/combine.py       | 53 ++++++++++++++++--------------------
 xarray/tests/test_combine.py |  5 ++++
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index cf832b1a9d2..668348ccafe 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -377,7 +377,7 @@ def _infer_concat_order_from_positions(datasets, concat_dims):
 
     tile_id, ds = list(combined_ids.items())[0]
     n_dims = len(tile_id)
-    if concat_dims == _CONCAT_DIM_DEFAULT or concat_dims == None:
+    if concat_dims == _CONCAT_DIM_DEFAULT or concat_dims is None:
         concat_dims = [concat_dims]*n_dims
     else:
         if len(concat_dims) != n_dims:
@@ -516,39 +516,34 @@ def _new_tile_id(single_id_ds_pair):
 def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
                   infer_order_from_coords, ids):
     """
-    This function decides if any concatenation is necessary, and if so it calls
-    the logic to decide their concatenation order before concatenating.
+    Calls logic to decide concatenation order before concatenating.
     """
 
-    if concat_dims is not None:
-        # Arrange datasets for concatenation
-        if infer_order_from_coords:
-            raise NotImplementedError
-            # TODO Use coordinates to determine tile_ID for each dataset in N-D
-            # Ignore how they were ordered previously
-            # Should look like
-            # combined_ids, concat_dims = _infer_tile_ids_from_coords(datasets,
-            # concat_dims)
-        else:
-            # Use information from the shape of the user input
-            if not ids:
-                # Determine tile_IDs by structure of input in N-D
-                # (i.e. ordering in list-of-lists)
-                combined_ids, concat_dims = _infer_concat_order_from_positions\
+    # Arrange datasets for concatenation
+    if infer_order_from_coords:
+        raise NotImplementedError
+        # TODO Use coordinates to determine tile_ID for each dataset in N-D
+        # Ignore how they were ordered previously
+        # Should look like:
+        # combined_ids, concat_dims = _infer_tile_ids_from_coords(datasets,
+        # concat_dims)
+    else:
+        # Use information from the shape of the user input
+        if not ids:
+            # Determine tile_IDs by structure of input in N-D
+            # (i.e. ordering in list-of-lists)
+            combined_ids, concat_dims = _infer_concat_order_from_positions\
                                                         (datasets, concat_dims)
-            else:
-                # Already sorted so just use the ids already passed
-                combined_ids = dict(zip(ids, datasets))
+        else:
+            # Already sorted so just use the ids already passed
+            combined_ids = dict(zip(ids, datasets))
 
-        # Check that the inferred shape is combinable
-        _check_shape_tile_ids(combined_ids)
+    # Check that the inferred shape is combinable
+    _check_shape_tile_ids(combined_ids)
 
-        # Repeatedly concatenate then merge along each dimension
-        combined = _combine_nd(combined_ids, concat_dims, compat=compat,
-                               data_vars=data_vars, coords=coords)
-    else:
-        # Case of no concatenation wanted at all
-        combined = merge(datasets, compat=compat)
+    # Repeatedly concatenate then merge along each dimension
+    combined = _combine_nd(combined_ids, concat_dims, compat=compat,
+                           data_vars=data_vars, coords=coords)
     return combined
 
 
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 9258dc0b5ee..4225099aa05 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -661,6 +661,11 @@ def test_combine_concat_over_redundant_nesting(self):
         expected = Dataset({'x': [0]})
         assert_identical(expected, actual)
 
+        objs = [[Dataset({'x': [0]})]]
+        actual = auto_combine(objs, concat_dims=None)
+        expected = Dataset({'x': [0]})
+        assert_identical(expected, actual)
+
 
 class TestAutoCombineUsingCoords(object):
     def test_infer_order_from_coords_not_implemented(self):

From 878e1f9d3650d0e090acc9d9395015d9046fb950 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sat, 1 Dec 2018 20:07:50 +0000
Subject: [PATCH 30/37] Rewrote using itertools.groupby instead of
 toolz.itertoolz.groupby to remove hidden dependency on toolz

---
 setup.py                     |  2 +-
 xarray/core/combine.py       | 32 +++++++++++++++++---------------
 xarray/tests/test_combine.py |  3 ---
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/setup.py b/setup.py
index ebc69b18f1c..3b56d9265af 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@
     'Topic :: Scientific/Engineering',
 ]
 
-INSTALL_REQUIRES = ['numpy >= 1.12', 'pandas >= 0.19.2', 'toolz >= 0.9.0']
+INSTALL_REQUIRES = ['numpy >= 1.12', 'pandas >= 0.19.2']
 TESTS_REQUIRE = ['pytest >= 2.7.1']
 if sys.version_info[0] < 3:
     TESTS_REQUIRE.append('mock')
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 668348ccafe..4ae7b519a84 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -1,12 +1,11 @@
 from __future__ import absolute_import, division, print_function
 
 import warnings
-from toolz import itertoolz
+import itertools
 from collections import Counter
 
 import pandas as pd
 
-from . import utils
 from .alignment import align
 from .merge import merge
 from .pycompat import OrderedDict, basestring, iteritems
@@ -373,7 +372,7 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
 
 def _infer_concat_order_from_positions(datasets, concat_dims):
 
-    combined_ids = dict(_infer_tile_ids_from_nested_list(datasets, ()))
+    combined_ids = OrderedDict(_infer_tile_ids_from_nested_list(datasets, ()))
 
     tile_id, ds = list(combined_ids.items())[0]
     n_dims = len(tile_id)
@@ -463,9 +462,9 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
 
     # TODO refactor this logic, possibly using method in np.blocks
     # Perform N-D dimensional concatenation
-    # Each iteration of this loop reduces the length of the tile_IDs tuples
-    # by one. It always removes the first
-
+    # Each iteration of this loop reduces the length of the tile_ids tuples
+    # by one. It always combines along the first dimension, removing the first
+    # element of the tuple
     for concat_dim in concat_dims:
         combined_ids = _auto_combine_all_along_first_dim(combined_ids,
                                                          dim=concat_dim,
@@ -479,13 +478,15 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
 def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars,
                                       coords, compat):
     # Group into lines of datasets which must be combined along dim
-    grouped = itertoolz.groupby(_new_tile_id, combined_ids.items())
+    # need to sort by _new_tile_id first for groupby to work
+    # TODO remove all these sorted OrderedDicts once python >= 3.6 only
+    combined_ids = OrderedDict(sorted(combined_ids.items(), key=_new_tile_id))
+    grouped = itertools.groupby(combined_ids.items(), key=_new_tile_id)
 
     new_combined_ids = {}
-    for new_id, group in grouped.items():
-        # TODO is there a way to unpack this object without using OrderedDict?
+    for new_id, group in grouped:
         combined_ids = OrderedDict(sorted(group))
-        datasets = list(combined_ids.values())
+        datasets = combined_ids.values()
         new_combined_ids[new_id] = _auto_combine_1d(datasets, dim, compat,
                                                     data_vars, coords)
     return new_combined_ids
@@ -497,11 +498,12 @@ def _auto_combine_1d(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
     # This is just the old auto_combine function (which only worked along 1D)
     if concat_dim is not None:
         dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
-        grouped = itertoolz.groupby(lambda ds: tuple(sorted(ds.data_vars)),
-                                    datasets).values()
-        concatenated = [_auto_concat(ds, dim=dim,
+        grouped = itertools.groupby(datasets,
+                                    key=lambda ds: tuple(sorted(ds.data_vars)),
+                                    )
+        concatenated = [_auto_concat(list(ds_group), dim=dim,
                                      data_vars=data_vars, coords=coords)
-                        for ds in grouped]
+                        for id, ds_group in grouped]
     else:
         concatenated = datasets
     merged = merge(concatenated, compat=compat)
@@ -536,7 +538,7 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
                                                         (datasets, concat_dims)
         else:
             # Already sorted so just use the ids already passed
-            combined_ids = dict(zip(ids, datasets))
+            combined_ids = OrderedDict(zip(ids, datasets))
 
     # Check that the inferred shape is combinable
     _check_shape_tile_ids(combined_ids)
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 4225099aa05..1c1313ccf5a 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -485,7 +485,6 @@ def test_uneven_length_input(self):
 
         expected = {(0, 0): ds(0), (1, 0): ds(1), (1, 1): ds(2)}
         actual = dict(_infer_tile_ids_from_nested_list(input, ()))
-        print(actual)
         assert_combined_tile_ids_equal(expected, actual)
 
     def test_infer_from_datasets(self):
@@ -555,8 +554,6 @@ def test_concat_once(self, create_combined_ids, concat_dim):
     def test_concat_only_first_dim(self, create_combined_ids):
         shape = (2, 3)
         combined_ids = create_combined_ids(shape)
-        ds = create_test_data
-        print(combined_ids)
         result = _auto_combine_all_along_first_dim(combined_ids,
                                                    dim='dim1',
                                                    data_vars='all',

From e6f25a31799b75eaf22a2d05a730aa400736800c Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sat, 1 Dec 2018 20:25:35 +0000
Subject: [PATCH 31/37] Fixed erroneous removal of utils import

---
 xarray/core/combine.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 50607392b76..adb9ed32986 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -6,6 +6,7 @@
 
 import pandas as pd
 
+from . import utils
 from .alignment import align
 from .merge import merge
 from .pycompat import OrderedDict, basestring, iteritems
@@ -44,7 +45,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
           * list of str: The listed data variables will be concatenated, in
             addition to the 'minimal' data variables.
         If objects are DataArrays, data_vars must be 'all'.
-    coords : {'minimal', 'different', 'all' or list of str}, optional
+    coords : {'minimal', 'different', 'all' o list of str}, optional
         These coordinate variables will be concatenated together:
           * 'minimal': Only coordinates in which the dimension already appears
             are included.

From f85648548e325d9a352ffd8ac60261ef33cc2329 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Sun, 2 Dec 2018 19:31:22 +0000
Subject: [PATCH 32/37] Updated docstrings to include an example of
 multidimensional concatenation

---
 xarray/backends/api.py | 18 ++++++-------
 xarray/core/combine.py | 58 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 57 insertions(+), 19 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 417e2c016ee..dd176b33310 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -488,14 +488,18 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
     """Open multiple files as a single dataset.
 
     Requires dask to be installed. See documentation for details on dask [1].
+    Uses ``auto_combine`` to combine the opened datasets - see
+    ``auto_combine`` for details.
     Attributes from the first dataset file are used for the combined dataset.
 
     Parameters
     ----------
     paths : str or sequence
         Either a string glob in the form "path/to/my/files/*.nc" or an explicit
-        list of files to open.  Paths can be given as strings or as pathlib
-        Paths.
+        list of files to open. Paths can be given as strings or as pathlib
+        Paths. If concatenation along more than one dimension is desired, then
+        ``paths`` must be a nested list-of-lists (see ``auto_combine`` for
+        details).
     chunks : int or dict, optional
         Dictionary with keys given by dimension names and values given by chunk
         sizes. In general, these should divide the dimensions of each dataset.
@@ -510,8 +514,8 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
         want to concatenate is not a dimension in the original datasets, e.g.,
         if you want to stack a collection of 2D arrays along a third dimension.
         By default, xarray attempts to infer this argument by examining
-        component files. Set ``concat_dim=None`` explicitly to disable
-        concatenation.
+        component files. Set ``concat_dims=[..., None, ...]`` explicitly to
+        disable concatenation along a particular dimension.
     compat : {'identical', 'equals', 'broadcast_equals',
               'no_conflicts'}, optional
         String indicating how to compare variables of the same name for
@@ -562,12 +566,6 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
             those corresponding to other dimensions.
           * list of str: The listed coordinate variables will be concatenated,
             in addition the 'minimal' coordinates.
-    infer_order_from_coords : bool, optional
-        If true attempt to deduce the order in which the datasets should be
-        concatenated from their coordinates. To do this the coordinates should
-        be monotonic along the dimension to be concatenated.
-        If false instead read the order from the structure the datasets are
-        supplied in. This structure should be a nested list of lists.
     parallel : bool, optional
         If True, the open and preprocess steps of this function will be
         performed in parallel using ``dask.delayed``. Default is False.
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index adb9ed32986..5d14bf76520 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -561,6 +561,20 @@ def auto_combine(datasets,
     datasets into a single entity by inspecting metadata and using a
     combination of concat and merge.
 
+    Does not sort data under any circumstances. It does align coordinates, but
+    different variables on datasets can cause it to fail under some scenarios.
+    In complex cases, you may need to clean up your data and use concat/merge
+    explicitly.
+
+    Works well if, for example, you have N years of data and M data variables,
+    and each combination of a distinct time period and set of data variables is
+    saved as its own dataset.
+
+    Can concatenate along multiple dimensions. To do this the datasets must be
+    passed as a nested list-of-lists, with a depth equal to the length of
+    ``concat_dims``. ``auto_combine`` will concatenate along the top-level list
+    first.
+
     Parameters
     ----------
     datasets : sequence of xarray.Dataset
@@ -572,8 +586,8 @@ def auto_combine(datasets,
         dimension in the original datasets, e.g., if you want to stack a
         collection of 2D arrays along a third dimension.
         By default, xarray attempts to infer this argument by examining
-        component files. Set ``concat_dim=None`` explicitly to disable
-        concatenation.
+        component files. Set ``concat_dims=[..., None, ...]`` explicitly to
+        disable concatenation along a particular dimension.
     compat : {'identical', 'equals', 'broadcast_equals',
               'no_conflicts'}, optional
         String indicating how to compare variables of the same name for
@@ -591,21 +605,47 @@ def auto_combine(datasets,
         Details are in the documentation of concat
     coords : {'minimal', 'different', 'all' or list of str}, optional
         Details are in the documentation of concat
-    infer_order_from_coords : bool, optional
-        If true attempt to deduce the order in which the datasets should be
-        concatenated from their coordinates. To do this the coordinates should
-        be monotonic along the dimension to be concatenated.
-        If false instead read the order from the structure the datasets are
-        supplied in. This structure should be a nested list of lists.
 
     Returns
     -------
     combined : xarray.Dataset
 
+    Examples
+    --------
+
+    Collecting output from a parallel simulation:
+
+    Collecting data from a simulation which decomposes its domain into 4 parts,
+    2 each along both the x and y axes, requires organising the datasets into a
+    nested list, e.g.
+
+    >>> x1y1
+    <xarray.Dataset>
+    Dimensions:         (x: 2, y: 2)
+    Coordinates:
+      lon               (x, y) float64 -99.83 -99.32 -99.79 -99.23
+      lat               (x, y) float64 42.25 42.21 42.63 42.59
+    Dimensions without coordinates: x, y
+    Data variables:
+      temperature       (x, y) float64 11.04 23.57 20.77 ...
+      precipitation     (x, y) float64 5.904 2.453 3.404 ...
+
+    >>> ds_grid = [[x1y1, x1y2], [x2y1, x2y2]]
+    >>> combined = xr.auto_combine(ds_grid, concat_dims=['x', 'y'])
+    <xarray.Dataset>
+    Dimensions:         (x: 4, y: 4)
+    Coordinates:
+      lon               (x, y) float64 -99.83 -99.32 -99.79 -99.23
+      lat               (x, y) float64 42.25 42.21 42.63 42.59
+    Dimensions without coordinates: x, y
+    Data variables:
+      temperature       (x, y) float64 11.04 23.57 20.77 ...
+      precipitation     (x, y) float64 5.904 2.453 3.404 ...
+
     See also
     --------
     concat
-    Dataset.merge
+    merge
     """
 
     # The IDs argument tells _auto_combine that the datasets are not yet sorted

From 6305d83988be8d7807d90c580d5e01ce72293012 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Wed, 5 Dec 2018 16:26:15 +0000
Subject: [PATCH 33/37] Clarified auto_combine docstring for N-D behaviour

---
 xarray/core/combine.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 5d14bf76520..19ad58b5224 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -461,7 +461,6 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
     combined_ds : xarray.Dataset
     """
 
-    # TODO refactor this logic, possibly using method in np.blocks
     # Perform N-D dimensional concatenation
     # Each iteration of this loop reduces the length of the tile_ids tuples
     # by one. It always combines along the first dimension, removing the first
@@ -557,8 +556,8 @@ def auto_combine(datasets,
                  infer_order_from_coords=False):
     """Attempt to auto-magically combine the given datasets into one.
 
-    This method attempts to combine a list (or nested list of lists) of
-    datasets into a single entity by inspecting metadata and using a
+    This method attempts to combine a group of datasets along any number of
+    dimensions into a single entity by inspecting metadata and using a
     combination of concat and merge.
 
     Does not sort data under any circumstances. It does align coordinates, but
@@ -577,8 +576,11 @@ def auto_combine(datasets,
 
     Parameters
     ----------
-    datasets : sequence of xarray.Dataset
-        Dataset objects to merge.
+    datasets : sequence of xarray.Dataset, or nested list of xarray.Dataset
+        objects.
+        Dataset objects to combine.
+        If concatenation along more than one dimension is desired, then
+        datasets must be supplied in a nested list-of-lists.
     concat_dims : list of str, DataArray, Index or None, optional
         Dimensions along which to concatenate variables, as used by
         :py:func:`xarray.concat`. You only need to provide this argument if
@@ -588,6 +590,8 @@ def auto_combine(datasets,
         By default, xarray attempts to infer this argument by examining
         component files. Set ``concat_dims=[..., None, ...]`` explicitly to
         disable concatenation along a particular dimension.
+        Must be the same length as the depth of the list passed to
+        ``datasets``.
     compat : {'identical', 'equals', 'broadcast_equals',
               'no_conflicts'}, optional
         String indicating how to compare variables of the same name for

From ce59da139c678b5d20dedf9367635e7d12c6cd08 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Mon, 10 Dec 2018 15:14:01 +0000
Subject: [PATCH 34/37] Added unit test for nested list of Datasets with
 different variables

---
 xarray/tests/test_combine.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 1c1313ccf5a..b523ec11e50 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -642,6 +642,24 @@ def test_invalid_hypercube_input(self):
         with raises_regex(ValueError, 'concat_dims has length'):
             auto_combine(datasets, concat_dims=['dim1'])
 
+    def test_merge_one_dim_concat_another(self):
+        objs = [[Dataset({'foo': ('x', [0, 1])}), Dataset({'bar': ('x', [10, 20])})],
+                [Dataset({'foo': ('x', [2, 3])}), Dataset({'bar': ('x', [30, 40])})]]
+        expected = Dataset({'foo': ('x', [0, 1, 2, 3]),
+                            'bar': ('x', [10, 20, 30, 40])})
+
+        actual = auto_combine(objs, concat_dims=['x', None])
+        assert_identical(expected, actual)
+
+        actual = auto_combine(objs)
+        assert_identical(expected, actual)
+
+        # Proving it works symmetrically
+        objs = [[Dataset({'foo': ('x', [0, 1])}), Dataset({'foo': ('x', [2, 3])})],
+                [Dataset({'bar': ('x', [10, 20])}), Dataset({'bar': ('x', [30, 40])})]]
+        actual = auto_combine(objs, concat_dims=[None, 'x'])
+        assert_identical(expected, actual)
+
     def test_combine_concat_over_redundant_nesting(self):
         objs = [[Dataset({'x': [0]}), Dataset({'x': [1]})]]
         actual = auto_combine(objs, concat_dims=[None, 'x'])

From 9fb34cf8e402a02dbf374ca3084d74cf08bf1f71 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Mon, 10 Dec 2018 15:33:09 +0000
Subject: [PATCH 35/37] Minor spelling and pep8 fixes

---
 xarray/backends/api.py |  8 ++++----
 xarray/core/combine.py | 12 +++++-------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index dd176b33310..1b4935f23b0 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -602,10 +602,10 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
     # If infer_order_from_coords=True then this is unnecessary, but quick.
     # If infer_order_from_coords=False then this creates a flat list which is
     # easier to iterate over, while saving the originally-supplied structure
-    combined_ids_paths, concat_dims = _infer_concat_order_from_positions\
-        (paths, concat_dims)
-    ids, paths = list(combined_ids_paths.keys()), \
-                 list(combined_ids_paths.values())
+    combined_ids_paths, concat_dims = _infer_concat_order_from_positions(
+        paths, concat_dims)
+    ids, paths = (
+        list(combined_ids_paths.keys()), list(combined_ids_paths.values()))
 
     open_kwargs = dict(engine=engine, chunks=chunks or {}, lock=lock,
                        autoclose=autoclose, **kwargs)
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 19ad58b5224..a0e01cda2b7 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -45,7 +45,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
           * list of str: The listed data variables will be concatenated, in
             addition to the 'minimal' data variables.
         If objects are DataArrays, data_vars must be 'all'.
-    coords : {'minimal', 'different', 'all' o list of str}, optional
+    coords : {'minimal', 'different', 'all' or list of str}, optional
         These coordinate variables will be concatenated together:
           * 'minimal': Only coordinates in which the dimension already appears
             are included.
@@ -393,7 +393,7 @@ def _infer_tile_ids_from_nested_list(entry, current_pos):
     Given a list of lists (of lists...) of objects, returns a iterator
     which returns a tuple containing the index of each object in the nested
     list structure as the key, and the object. This can then be called by the
-    dict constructor to create a dictionary of the objects organised byt their
+    dict constructor to create a dictionary of the objects organised by their
     position in the original nested list.
 
     Recursively traverses the given structure, while keeping track of the
@@ -424,14 +424,14 @@ def _check_shape_tile_ids(combined_tile_ids):
 
     # Check all tuples are the same length
     # i.e. check that all lists are nested to the same depth
-    nesting_depths = [len(id) for id in tile_ids]
+    nesting_depths = [len(tile_id) for tile_id in tile_ids]
     if not set(nesting_depths) == {nesting_depths[0]}:
         raise ValueError("The supplied objects do not form a hypercube because"
                          " sub-lists do not have consistent depths")
 
     # Check all lists along one dimension are same length
     for dim in range(nesting_depths[0]):
-        indices_along_dim = [id[dim] for id in tile_ids]
+        indices_along_dim = [tile_id[dim] for tile_id in tile_ids]
         occurrences = Counter(indices_along_dim)
         if len(set(occurrences.values())) != 1:
             raise ValueError("The supplied objects do not form a hypercube "
@@ -498,9 +498,7 @@ def _auto_combine_1d(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
     # This is just the old auto_combine function (which only worked along 1D)
     if concat_dim is not None:
         dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
-        grouped = itertools.groupby(datasets,
-                                    key=lambda ds: tuple(sorted(ds.data_vars)),
-                                    )
+        grouped = itertools.groupby(datasets, key=lambda ds: tuple(sorted(ds)))
         concatenated = [_auto_concat(list(ds_group), dim=dim,
                                      data_vars=data_vars, coords=coords)
                         for id, ds_group in grouped]

From f47486f7552909d0f724350dddf6402419f0e24d Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Tue, 11 Dec 2018 10:50:16 +0000
Subject: [PATCH 36/37] Reverted API so that N-D generalisation is hidden

---
 doc/whats-new.rst             |  13 -----
 setup.py                      |   1 -
 xarray/backends/api.py        |  38 +++++++------
 xarray/core/combine.py        | 101 +++++++++++-----------------------
 xarray/tests/test_backends.py |  12 ++--
 xarray/tests/test_combine.py  |  32 +++++------
 6 files changed, 74 insertions(+), 123 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 45c3ce35639..c058c5ce068 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -33,19 +33,6 @@ v0.11.1 (unreleased)
 Breaking changes
 ~~~~~~~~~~~~~~~~
 
-- Auto-combine along N dimensions:
-
-  - ``open_mfdataset`` and ``auto_combine`` can now combine datasets along any
-    number of dimensions, instead of just a 1D list of datasets. To combine
-    along multiple dimensions the datasets must be passed as a nested
-    list-of-lists.
-
-    Breaking because ``open_mfdataset`` and ``auto_combine`` now expect an
-    argument ``concat_dims`` instead of ``concat_dim``. ``concat_dims`` accepts
-    a list of valid ``concat_dim`` arguments, e.g. ``['dim1', 'dim2']``.
-    (:issue:`2159`)
-    By `Tom Nicholas <http://github.com/TomNicholas>`_.
-
 Enhancements
 ~~~~~~~~~~~~
 
diff --git a/setup.py b/setup.py
index a9104156764..eaf57dff81d 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,6 @@
     'Topic :: Scientific/Engineering',
 ]
 
-
 INSTALL_REQUIRES = ['numpy >= 1.12', 'pandas >= 0.19.2']
 SETUP_REQUIRES = ['pytest-runner >= 4.2']
 TESTS_REQUIRE = ['pytest >= 2.7.1']
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 1b4935f23b0..008606d6eb8 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -480,26 +480,20 @@ def close(self):
 _CONCAT_DIM_DEFAULT = '__infer_concat_dim__'
 
 
-def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
+def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
                    compat='no_conflicts', preprocess=None, engine=None,
                    lock=None, data_vars='all', coords='different',
                    infer_order_from_coords=False,
                    autoclose=None, parallel=False, **kwargs):
     """Open multiple files as a single dataset.
-
     Requires dask to be installed. See documentation for details on dask [1].
-    Uses ``auto_combine`` to combine the opened datasets - see
-    ``auto_combine`` for details.
     Attributes from the first dataset file are used for the combined dataset.
-
     Parameters
     ----------
     paths : str or sequence
         Either a string glob in the form "path/to/my/files/*.nc" or an explicit
-        list of files to open. Paths can be given as strings or as pathlib
-        Paths. If concatenation along more than one dimension is desired, then
-        ``paths`` must be a nested list-of-lists (see ``auto_combine`` for
-        details).
+        list of files to open.  Paths can be given as strings or as pathlib
+        Paths.
     chunks : int or dict, optional
         Dictionary with keys given by dimension names and values given by chunk
         sizes. In general, these should divide the dimensions of each dataset.
@@ -507,20 +501,19 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
         By default, chunks will be chosen to load entire input files into
         memory at once. This has a major impact on performance: please see the
         full documentation for more details [2].
-    concat_dims : list of str, DataArray, Index or None, optional
-        Dimensions to concatenate files along. This argument is passed on to
+    concat_dim : None, str, DataArray or Index, optional
+        Dimension to concatenate files along. This argument is passed on to
         :py:func:`xarray.auto_combine` along with the dataset objects. You only
-        need to provide this argument if any of the dimensions along which you
-        want to concatenate is not a dimension in the original datasets, e.g.,
-        if you want to stack a collection of 2D arrays along a third dimension.
+        need to provide this argument if the dimension along which you want to
+        concatenate is not a dimension in the original datasets, e.g., if you
+        want to stack a collection of 2D arrays along a third dimension.
         By default, xarray attempts to infer this argument by examining
-        component files. Set ``concat_dims=[..., None, ...]`` explicitly to
-        disable concatenation along a particular dimension.
+        component files. Set ``concat_dim=None`` explicitly to disable
+        concatenation.
     compat : {'identical', 'equals', 'broadcast_equals',
               'no_conflicts'}, optional
         String indicating how to compare variables of the same name for
         potential conflicts when merging:
-
         - 'broadcast_equals': all values must be equal when variables are
           broadcast against each other to ensure common dimensions.
         - 'equals': all values and dimensions must be the same.
@@ -583,6 +576,7 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
 
     References
     ----------
+
     .. [1] http://xarray.pydata.org/en/stable/dask.html
     .. [2] http://xarray.pydata.org/en/stable/dask.html#chunking-and-performance
     """
@@ -599,6 +593,16 @@ def open_mfdataset(paths, chunks=None, concat_dims=_CONCAT_DIM_DEFAULT,
     if not paths:
         raise IOError('no files to open')
 
+    # Coerce 1D input into ND to maintain backwards-compatible API until API
+    # for N-D combine decided
+    # (see https://github.com/pydata/xarray/pull/2553/#issuecomment-445892746)
+    if concat_dim is None or concat_dim == _CONCAT_DIM_DEFAULT:
+        concat_dims = concat_dim
+    elif not isinstance(concat_dim, list):
+        concat_dims = [concat_dim]
+    else:
+        concat_dims = concat_dim
+
     # If infer_order_from_coords=True then this is unnecessary, but quick.
     # If infer_order_from_coords=False then this creates a flat list which is
     # easier to iterate over, while saving the originally-supplied structure
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index a0e01cda2b7..6b033dc20a9 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -548,53 +548,37 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
 
 
 def auto_combine(datasets,
-                 concat_dims=_CONCAT_DIM_DEFAULT,
+                 concat_dim=_CONCAT_DIM_DEFAULT,
                  compat='no_conflicts',
                  data_vars='all', coords='different',
                  infer_order_from_coords=False):
     """Attempt to auto-magically combine the given datasets into one.
-
-    This method attempts to combine a group of datasets along any number of
-    dimensions into a single entity by inspecting metadata and using a
-    combination of concat and merge.
-
-    Does not sort data under any circumstances. It does align coordinates, but
-    different variables on datasets can cause it to fail under some scenarios.
-    In complex cases, you may need to clean up your data and use concat/merge
-    explicitly.
-
-    Works well if, for example, you have N years of data and M data variables,
-    and each combination of a distinct time period and set of data variables is
-    saved as its own dataset.
-
-    Can concatenate along multiple dimensions. To do this the datasets must be
-    passed as a nested list-of-lists, with a depth equal to the length of
-    ``concat_dims``. ``auto_combine`` will concatenate along the top-level list
-    first.
-
+    This method attempts to combine a list of datasets into a single entity by
+    inspecting metadata and using a combination of concat and merge.
+    It does not concatenate along more than one dimension or sort data under
+    any circumstances. It does align coordinates, but different variables on
+    datasets can cause it to fail under some scenarios. In complex cases, you
+    may need to clean up your data and use ``concat``/``merge`` explicitly.
+    ``auto_combine`` works well if you have N years of data and M data
+    variables, and each combination of a distinct time period and set of data
+    variables is saved its own dataset.
     Parameters
     ----------
-    datasets : sequence of xarray.Dataset, or nested list of xarray.Dataset
-        objects.
-        Dataset objects to combine.
-        If concatenation along more than one dimension is desired, then
-        datasets must be supplied in a nested list-of-lists.
-    concat_dims : list of str, DataArray, Index or None, optional
-        Dimensions along which to concatenate variables, as used by
+    datasets : sequence of xarray.Dataset
+        Dataset objects to merge.
+    concat_dim : str or DataArray or Index, optional
+        Dimension along which to concatenate variables, as used by
         :py:func:`xarray.concat`. You only need to provide this argument if
-        any of the dimensions along which you want to concatenate are not a
-        dimension in the original datasets, e.g., if you want to stack a
-        collection of 2D arrays along a third dimension.
+        the dimension along which you want to concatenate is not a dimension
+        in the original datasets, e.g., if you want to stack a collection of
+        2D arrays along a third dimension.
         By default, xarray attempts to infer this argument by examining
-        component files. Set ``concat_dims=[..., None, ...]`` explicitly to
-        disable concatenation along a particular dimension.
-        Must be the same length as the depth of the list passed to
-        ``datasets``.
+        component files. Set ``concat_dim=None`` explicitly to disable
+        concatenation.
     compat : {'identical', 'equals', 'broadcast_equals',
               'no_conflicts'}, optional
         String indicating how to compare variables of the same name for
         potential conflicts:
-
         - 'broadcast_equals': all values must be equal when variables are
           broadcast against each other to ensure common dimensions.
         - 'equals': all values and dimensions must be the same.
@@ -606,50 +590,27 @@ def auto_combine(datasets,
     data_vars : {'minimal', 'different', 'all' or list of str}, optional
         Details are in the documentation of concat
     coords : {'minimal', 'different', 'all' or list of str}, optional
-        Details are in the documentation of concat
-
+        Details are in the documentation of conca
     Returns
     -------
     combined : xarray.Dataset
 
-    Examples
-    --------
-
-    Collecting output from a parallel simulation:
-
-    Collecting data from a simulation which decomposes its domain into 4 parts,
-    2 each along both the x and y axes, requires organising the datasets into a
-    nested list, e.g.
-
-    >>> x1y1
-    <xarray.Dataset>
-    Dimensions:         (x: 2, y: 2)
-    Coordinates:
-      lon               (x, y) float64 -99.83 -99.32 -99.79 -99.23
-      lat               (x, y) float64 42.25 42.21 42.63 42.59
-    Dimensions without coordinates: x, y
-    Data variables:
-      temperature       (x, y) float64 11.04 23.57 20.77 ...
-      precipitation     (x, y) float64 5.904 2.453 3.404 ...
-
-    >>> ds_grid = [[x1y1, x1y2], [x2y1, x2y2]]
-    >>> combined = xr.auto_combine(ds_grid, concat_dims=['x', 'y'])
-    <xarray.Dataset>
-    Dimensions:         (x: 4, y: 4)
-    Coordinates:
-      lon               (x, y) float64 -99.83 -99.32 -99.79 -99.23
-      lat               (x, y) float64 42.25 42.21 42.63 42.59
-    Dimensions without coordinates: x, y
-    Data variables:
-      temperature       (x, y) float64 11.04 23.57 20.77 ...
-      precipitation     (x, y) float64 5.904 2.453 3.404 ...
-
     See also
     --------
     concat
-    merge
+    Dataset.merge
     """
 
+    # Coerce 1D input into ND to maintain backwards-compatible API until API
+    # for N-D combine decided
+    # (see https://github.com/pydata/xarray/pull/2553/#issuecomment-445892746)
+    if concat_dim is None or concat_dim == _CONCAT_DIM_DEFAULT:
+        concat_dims = concat_dim
+    elif not isinstance(concat_dim, list):
+        concat_dims = [concat_dim]
+    else:
+        concat_dims = concat_dim
+
     # The IDs argument tells _auto_combine that the datasets are not yet sorted
     return _auto_combine(datasets, concat_dims=concat_dims, compat=compat,
                          data_vars=data_vars, coords=coords,
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 2dc950b9234..ea3d099e6ad 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -2158,7 +2158,7 @@ def test_open_mfdataset_2d(self):
                                       y=slice(4, 8)).to_netcdf(tmp4)
                         with open_mfdataset([[tmp1, tmp2],
                                              [tmp3, tmp4]],
-                                            concat_dims=['y', 'x']) as actual:
+                                            concat_dim=['y', 'x']) as actual:
                             assert isinstance(actual.foo.variable.data,
                                               da.Array)
                             assert actual.foo.variable.data.chunks == \
@@ -2166,7 +2166,7 @@ def test_open_mfdataset_2d(self):
                             assert_identical(original, actual)
                         with open_mfdataset([[tmp1, tmp2],
                                              [tmp3, tmp4]],
-                                            concat_dims=['y', 'x'],
+                                            concat_dim=['y', 'x'],
                                             chunks={'x': 3, 'y': 2}) as actual:
                             assert actual.foo.variable.data.chunks == \
                                    ((3, 2, 3, 2), (2, 2, 2, 2),)
@@ -2204,7 +2204,7 @@ def test_open_mfdataset_2d_pathlib(self):
                                       y=slice(4, 8)).to_netcdf(tmp4)
                         with open_mfdataset([[tmp1, tmp2],
                                              [tmp3, tmp4]],
-                                            concat_dims=['y', 'x']) as actual:
+                                            concat_dim=['y', 'x']) as actual:
                             assert_identical(original, actual)
 
     @pytest.mark.xfail(reason="Not yet implemented")
@@ -2303,7 +2303,7 @@ def test_open_mfdataset_concat_dim_none(self):
                 data = Dataset({'x': 0})
                 data.to_netcdf(tmp1)
                 Dataset({'x': np.nan}).to_netcdf(tmp2)
-                with open_mfdataset([tmp1, tmp2], concat_dims=None) as actual:
+                with open_mfdataset([tmp1, tmp2], concat_dim=None) as actual:
                     assert_identical(data, actual)
 
     def test_open_dataset(self):
@@ -2330,7 +2330,7 @@ def test_open_single_dataset(self):
                            {'baz': [100]})
         with create_tmp_file() as tmp:
             original.to_netcdf(tmp)
-            with open_mfdataset([tmp], concat_dims=[dim]) as actual:
+            with open_mfdataset([tmp], concat_dim=dim) as actual:
                 assert_identical(expected, actual)
 
     def test_dask_roundtrip(self):
@@ -2694,7 +2694,7 @@ def test_uamiv_format_mfread(self):
             ['example.uamiv',
              'example.uamiv'],
             engine='pseudonetcdf',
-            concat_dims=['TSTEP'],
+            concat_dim=['TSTEP'],
             backend_kwargs={'format': 'uamiv'})
 
         data1 = np.arange(20, dtype='f').reshape(1, 1, 4, 5)
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index b523ec11e50..8e90b705124 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -356,7 +356,7 @@ def test_auto_combine_previously_failed(self):
         expected = Dataset({'a': (('t', 'x'),
                                   [[np.nan, 2, 3], [1, 2, np.nan]])},
                            {'x': [0, 1, 2]})
-        actual = auto_combine(datasets, concat_dims=['t'])
+        actual = auto_combine(datasets, concat_dim='t')
         assert_identical(expected, actual)
 
     @requires_dask  # only for toolz
@@ -381,21 +381,21 @@ def test_auto_combine_no_concat(self):
         assert_identical(expected, actual)
 
         data = Dataset({'x': 0})
-        actual = auto_combine([data, data, data], concat_dims=None)
+        actual = auto_combine([data, data, data], concat_dim=None)
         assert_identical(data, actual)
 
         tmp1 = Dataset({'x': 0})
         tmp2 = Dataset({'x': np.nan})
-        actual = auto_combine([tmp1, tmp2], concat_dims=None)
+        actual = auto_combine([tmp1, tmp2], concat_dim=None)
         assert_identical(tmp1, actual)
-        actual = auto_combine([tmp1, tmp2], concat_dims=[None])
+        actual = auto_combine([tmp1, tmp2], concat_dim=[None])
         assert_identical(tmp1, actual)
 
         # Single object, with a concat_dim explicitly provided
         # Test the issue reported in GH #1988
         objs = [Dataset({'x': 0, 'y': 1})]
         dim = DataArray([100], name='baz', dims='baz')
-        actual = auto_combine(objs, concat_dims=[dim])
+        actual = auto_combine(objs, concat_dim=dim)
         expected = Dataset({'x': ('baz', [0]), 'y': ('baz', [1])},
                            {'baz': [100]})
         assert_identical(expected, actual)
@@ -404,7 +404,7 @@ def test_auto_combine_no_concat(self):
         # expected for non-scalar values, too.
         objs = [Dataset({'x': ('z', [0, 1]), 'y': ('z', [1, 2])})]
         dim = DataArray([100], name='baz', dims='baz')
-        actual = auto_combine(objs, concat_dims=[dim])
+        actual = auto_combine(objs, concat_dim=dim)
         expected = Dataset({'x': (('baz', 'z'), [[0, 1]]),
                             'y': (('baz', 'z'), [[1, 2]])},
                            {'baz': [100]})
@@ -621,7 +621,7 @@ def test_auto_combine_2d(self):
         expected = concat([partway1, partway2, partway3], dim='dim2')
 
         datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]]
-        result = auto_combine(datasets, concat_dims=['dim1', 'dim2'])
+        result = auto_combine(datasets, concat_dim=['dim1', 'dim2'])
 
         assert_equal(result, expected)
 
@@ -631,16 +631,16 @@ def test_invalid_hypercube_input(self):
         datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4)]]
         with raises_regex(ValueError, 'sub-lists do not have '
                                       'consistent lengths'):
-            auto_combine(datasets, concat_dims=['dim1', 'dim2'])
+            auto_combine(datasets, concat_dim=['dim1', 'dim2'])
 
         datasets = [[ds(0), ds(1)], [[ds(3), ds(4)]]]
         with raises_regex(ValueError, 'sub-lists do not have '
                                       'consistent depths'):
-            auto_combine(datasets, concat_dims=['dim1', 'dim2'])
+            auto_combine(datasets, concat_dim=['dim1', 'dim2'])
 
         datasets = [[ds(0), ds(1)], [ds(3), ds(4)]]
         with raises_regex(ValueError, 'concat_dims has length'):
-            auto_combine(datasets, concat_dims=['dim1'])
+            auto_combine(datasets, concat_dim=['dim1'])
 
     def test_merge_one_dim_concat_another(self):
         objs = [[Dataset({'foo': ('x', [0, 1])}), Dataset({'bar': ('x', [10, 20])})],
@@ -648,7 +648,7 @@ def test_merge_one_dim_concat_another(self):
         expected = Dataset({'foo': ('x', [0, 1, 2, 3]),
                             'bar': ('x', [10, 20, 30, 40])})
 
-        actual = auto_combine(objs, concat_dims=['x', None])
+        actual = auto_combine(objs, concat_dim=['x', None])
         assert_identical(expected, actual)
 
         actual = auto_combine(objs)
@@ -657,27 +657,27 @@ def test_merge_one_dim_concat_another(self):
         # Proving it works symmetrically
         objs = [[Dataset({'foo': ('x', [0, 1])}), Dataset({'foo': ('x', [2, 3])})],
                 [Dataset({'bar': ('x', [10, 20])}), Dataset({'bar': ('x', [30, 40])})]]
-        actual = auto_combine(objs, concat_dims=[None, 'x'])
+        actual = auto_combine(objs, concat_dim=[None, 'x'])
         assert_identical(expected, actual)
 
     def test_combine_concat_over_redundant_nesting(self):
         objs = [[Dataset({'x': [0]}), Dataset({'x': [1]})]]
-        actual = auto_combine(objs, concat_dims=[None, 'x'])
+        actual = auto_combine(objs, concat_dim=[None, 'x'])
         expected = Dataset({'x': [0, 1]})
         assert_identical(expected, actual)
 
         objs = [[Dataset({'x': [0]})], [Dataset({'x': [1]})]]
-        actual = auto_combine(objs, concat_dims=['x', None])
+        actual = auto_combine(objs, concat_dim=['x', None])
         expected = Dataset({'x': [0, 1]})
         assert_identical(expected, actual)
 
         objs = [[Dataset({'x': [0]})]]
-        actual = auto_combine(objs, concat_dims=[None, None])
+        actual = auto_combine(objs, concat_dim=[None, None])
         expected = Dataset({'x': [0]})
         assert_identical(expected, actual)
 
         objs = [[Dataset({'x': [0]})]]
-        actual = auto_combine(objs, concat_dims=None)
+        actual = auto_combine(objs, concat_dim=None)
         expected = Dataset({'x': [0]})
         assert_identical(expected, actual)
 

From ebbe47f450ed4407655bd9a4ed45274b140452dd Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <tegn500@york.ac.uk>
Date: Wed, 12 Dec 2018 00:52:51 +0000
Subject: [PATCH 37/37] Removed infer_order_from_coords argument

---
 xarray/backends/api.py       |  2 +-
 xarray/core/combine.py       | 15 +++++++--------
 xarray/tests/test_combine.py |  8 +-------
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 008606d6eb8..fdc7badde20 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -483,7 +483,6 @@ def close(self):
 def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
                    compat='no_conflicts', preprocess=None, engine=None,
                    lock=None, data_vars='all', coords='different',
-                   infer_order_from_coords=False,
                    autoclose=None, parallel=False, **kwargs):
     """Open multiple files as a single dataset.
     Requires dask to be installed. See documentation for details on dask [1].
@@ -602,6 +601,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
         concat_dims = [concat_dim]
     else:
         concat_dims = concat_dim
+    infer_order_from_coords = False
 
     # If infer_order_from_coords=True then this is unnecessary, but quick.
     # If infer_order_from_coords=False then this creates a flat list which is
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 6b033dc20a9..c9924b2ad1e 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -381,9 +381,10 @@ def _infer_concat_order_from_positions(datasets, concat_dims):
         concat_dims = [concat_dims]*n_dims
     else:
         if len(concat_dims) != n_dims:
-            raise ValueError("concat_dims has length " + str(len(concat_dims))
-                             + " but the datasets passed are nested in a " +
-                             str(n_dims) + "-dimensional structure")
+            raise ValueError("concat_dims has length {} but the datasets "
+                             "passed are nested in a {}-dimensional "
+                             "structure".format(str(len(concat_dims)),
+                                                str(n_dims)))
 
     return combined_ids, concat_dims
 
@@ -547,11 +548,8 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
     return combined
 
 
-def auto_combine(datasets,
-                 concat_dim=_CONCAT_DIM_DEFAULT,
-                 compat='no_conflicts',
-                 data_vars='all', coords='different',
-                 infer_order_from_coords=False):
+def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
+                 compat='no_conflicts', data_vars='all', coords='different'):
     """Attempt to auto-magically combine the given datasets into one.
     This method attempts to combine a list of datasets into a single entity by
     inspecting metadata and using a combination of concat and merge.
@@ -610,6 +608,7 @@ def auto_combine(datasets,
         concat_dims = [concat_dim]
     else:
         concat_dims = concat_dim
+    infer_order_from_coords = False
 
     # The IDs argument tells _auto_combine that the datasets are not yet sorted
     return _auto_combine(datasets, concat_dims=concat_dims, compat=compat,
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 8e90b705124..ec2288b1d2d 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -683,17 +683,11 @@ def test_combine_concat_over_redundant_nesting(self):
 
 
 class TestAutoCombineUsingCoords(object):
-    def test_infer_order_from_coords_not_implemented(self):
-        data = create_test_data()
-        objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
-        with pytest.raises(NotImplementedError):
-            auto_combine(objs, infer_order_from_coords=True)
-
     def test_order_inferred_from_coords(self):
         data = create_test_data()
         objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
         with pytest.raises(NotImplementedError):
-            _auto_combine(objs, concat_dims=['dim2'],compat='no_conflicts',
+            _auto_combine(objs, concat_dims=['dim2'], compat='no_conflicts',
                           data_vars='all', coords='different',
                           infer_order_from_coords=True, ids=True)