pydata · dcherian · Apr 19, 2019 · Apr 15, 2019 · Apr 17, 2019 · Apr 17, 2019
diff --git a/doc/io.rst b/doc/io.rst
@@ -302,16 +302,23 @@ to using encoded character arrays. Character arrays can be selected even for
 netCDF4 files by setting the ``dtype`` field in ``encoding`` to ``S1``
 (corresponding to NumPy's single-character bytes dtype).
 
-If character arrays are used, the string encoding that was used is stored on
-disk in the ``_Encoding`` attribute, which matches an ad-hoc convention
-`adopted by the netCDF4-Python library <https://github.com/Unidata/netcdf4-python/pull/665>`_.
-At the time of this writing (October 2017), a standard convention for indicating
-string encoding for character arrays in netCDF files was
-`still under discussion <https://github.com/Unidata/netcdf-c/issues/402>`_.
-Technically, you can use
-`any string encoding recognized by Python <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ if you feel the need to deviate from UTF-8,
-by setting the ``_Encoding`` field in ``encoding``. But
-`we don't recommend it <http://utf8everywhere.org/>`_.
+If character arrays are used:
+
+- The string encoding that was used is stored on
+  disk in the ``_Encoding`` attribute, which matches an ad-hoc convention
+  `adopted by the netCDF4-Python library <https://github.com/Unidata/netcdf4-python/pull/665>`_.
+  At the time of this writing (October 2017), a standard convention for indicating
+  string encoding for character arrays in netCDF files was
+  `still under discussion <https://github.com/Unidata/netcdf-c/issues/402>`_.
+  Technically, you can use
+  `any string encoding recognized by Python <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ if you feel the need to deviate from UTF-8,
+  by setting the ``_Encoding`` field in ``encoding``. But
+  `we don't recommend it <http://utf8everywhere.org/>`_.
+- The character dimension name can be specifed by the ``char_dim_name`` field of a variable's
+  ``encoding``. If this is not specified the default name for the character dimension is
+  ``'string%s' % data.shape[-1]``. When decoding character arrays from existing files, the
+  ``char_dim_name`` is added to the variables ``encoding`` to preserve if encoding happens, but
+  the field can be edited by the user.
 
 .. warning::
 

diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py
@@ -103,16 +103,21 @@ def encode(self, variable, name=None):
         dims, data, attrs, encoding = unpack_for_encoding(variable)
         if data.dtype.kind == 'S' and encoding.get('dtype') is not str:
             data = bytes_to_char(data)
-            dims = dims + ('string%s' % data.shape[-1],)
+            if 'char_dim_name' in encoding.keys():
+                dims = dims + (encoding['char_dim_name'],)
+            else:
+                default_char_dim_name = 'string%s' % data.shape[-1]
+                dims = dims + (default_char_dim_name,)
+                encoding['char_dim_name'] = default_char_dim_name
         return Variable(dims, data, attrs, encoding)
 
     def decode(self, variable, name=None):
         dims, data, attrs, encoding = unpack_for_decoding(variable)
 
         if data.dtype == 'S1' and dims:
+            encoding['char_dim_name'] = dims[-1]
             dims = dims[:-1]
             data = char_to_bytes(data)
-
         return Variable(dims, data, attrs, encoding)
 
 

diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py
@@ -107,6 +107,35 @@ def test_CharacterArrayCoder_encode(data):
     assert_identical(actual, expected)
 
 
+@pytest.mark.parametrize(
+    'original',
+    [
+        Variable(('x',), [b'ab', b'cdef']),
+        Variable(('x',), [b'ab', b'cdef'], encoding={'char_dim_name': 'foo'})
+    ]
+)
+def test_CharacterArrayCoder_char_dim_name(original):
+    coder = strings.CharacterArrayCoder()
+
+    encoded = coder.encode(original)
+    roundtripped = coder.decode(encoded)
+    assert encoded.encoding == roundtripped.encoding
+    assert encoded.dims[-1] == encoded.encoding['char_dim_name']
+    assert roundtripped.dims[-1] == original.dims[-1]
+
+    # To compare with the original requires logic since encoding either preserves
+    # encoding['char_dim_name'] or it creates it from scratch. Could hardcode to get
+    # around the logic but then the test is brittle to change/addition in the
+    # parametrized data used and similarly confusing in its arbitrariness.
+    if 'char_dim_name' in original.encoding.keys():
+        expected_char_dim_name = original.encoding['char_dim_name']
+    else:
+        expected_char_dim_name = 'string%s' % encoded.data.shape[-1]
+
+    assert encoded.encoding['char_dim_name'] == expected_char_dim_name
+    assert roundtripped.encoding['char_dim_name'] == expected_char_dim_name
+
+
 def test_StackedBytesArray():
     array = np.array([[b'a', b'b', b'c'], [b'd', b'e', b'f']], dtype='S')
     actual = strings.StackedBytesArray(array)