diff --git a/doc/source/release.rst b/doc/source/release.rst index 6d8f915e2ebb8..d4fbd288221f3 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -381,6 +381,7 @@ Bug Fixes - Bug in arithmetic operations affecting to NaT (:issue:`6873`) - Bug in ``Series.str.extract`` where the resulting ``Series`` from a single group match wasn't renamed to the group name +- Bug causing UnicodeEncodeError when get_dummies called with unicode values and a prefix (:issue:`6885`) pandas 0.13.1 ------------- diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 7dc266617c5fd..2f4dbc2598126 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1017,7 +1017,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False): dummy_mat[cat.labels == -1] = 0 if prefix is not None: - dummy_cols = ['%s%s%s' % (prefix, prefix_sep, str(v)) + dummy_cols = ['%s%s%s' % (prefix, prefix_sep, v) for v in levels] else: dummy_cols = levels diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index b04fb979e6c8e..42427617991af 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -18,7 +18,7 @@ from pandas.core.reshape import (melt, convert_dummies, lreshape, get_dummies, wide_to_long) import pandas.util.testing as tm -from pandas.compat import StringIO, cPickle, range +from pandas.compat import StringIO, cPickle, range, u _multiprocess_can_split_ = True @@ -199,6 +199,16 @@ def test_include_na(self): exp_just_na = DataFrame(Series(1.0,index=[0]),columns=[nan]) assert_array_equal(res_just_na.values, exp_just_na.values) + def test_unicode(self): # See GH 6885 - get_dummies chokes on unicode values + import unicodedata + e = 'e' + eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE') + s = [e, eacute, eacute] + res = get_dummies(s, prefix='letter') + exp = DataFrame({'letter_e': {0: 1.0, 1: 0.0, 2: 0.0}, + u('letter_%s') % eacute: {0: 0.0, 1: 1.0, 2: 1.0}}) + assert_frame_equal(res, exp) + class TestConvertDummies(tm.TestCase): def test_convert_dummies(self): df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',