diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 196c881f97526..fb3fdafce9c85 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1127,6 +1127,28 @@ def join(self, other, how='left', level=None, return_indexers=False, return Index.join(this, other, how=how, level=level, return_indexers=return_indexers, sort=sort) + def _tz_compare(self, other): + """ + Compare string representations of timezones of two DatetimeIndex as + directly comparing equality is broken. The same timezone can be + represented as different instances of timezones. For example + `` and + `` are essentially same + timezones but aren't evaluted such, but the string representation + for both of these is `'Europe/Paris'`. + + Parameters + ---------- + other: DatetimeIndex + + Returns: + ------- + compare : Boolean + + """ + # GH 18523 + return str(self.tzinfo) == str(other.tzinfo) + def _maybe_utc_convert(self, other): this = self if isinstance(other, DatetimeIndex): @@ -1138,7 +1160,7 @@ def _maybe_utc_convert(self, other): raise TypeError('Cannot join tz-naive with tz-aware ' 'DatetimeIndex') - if self.tz != other.tz: + if not self._tz_compare(other): this = self.tz_convert('UTC') other = other.tz_convert('UTC') return this, other @@ -1243,7 +1265,7 @@ def __iter__(self): def _wrap_union_result(self, other, result): name = self.name if self.name == other.name else None - if self.tz != other.tz: + if not self._tz_compare(other): raise ValueError('Passed item and index have different timezone') return self._simple_new(result, name=name, freq=None, tz=self.tz) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index b59dd25ead57f..588d5968bc932 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -443,6 +443,26 @@ def test_000constructor_resolution(self): assert idx.nanosecond[0] == t1.nanosecond + def test_concat(self): + idx1 = pd.date_range('2011-01-01', periods=3, freq='H', + tz='Europe/Paris') + idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq='H') + df1 = pd.DataFrame({'a': [1, 2, 3]}, index=idx1) + df2 = pd.DataFrame({'b': [1, 2, 3]}, index=idx2) + res = pd.concat([df1, df2], axis=1) + + assert str(res.index.tzinfo) == str(df1.index.tzinfo) + assert str(res.index.tzinfo) == str(df2.index.tzinfo) + + idx3 = pd.date_range('2011-01-01', periods=3, + freq='H', tz='Asia/Tokyo') + df3 = pd.DataFrame({'b': [1, 2, 3]}, index=idx3) + res = pd.concat([df1, df3], axis=1) + + assert str(res.index.tzinfo) == 'UTC' + assert str(res.index.tzinfo) != str(df1.index.tzinfo) + assert str(res.index.tzinfo) != str(df3.index.tzinfo) + class TestTimeSeries(object): diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a1287c3102b77..2f0beefbac5ce 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -892,7 +892,7 @@ def test_to_datetime_freq(self): xp = bdate_range('2000-1-1', periods=10, tz='UTC') rs = xp.to_datetime() assert xp.freq == rs.freq - assert xp.tzinfo == rs.tzinfo + assert xp._tz_compare(rs) def test_to_datetime_overflow(self): # gh-17637 diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index ae41502f237f1..c4f0b6a2b64f5 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1959,6 +1959,45 @@ def test_concat_order(self): expected = expected.sort_values() tm.assert_index_equal(result, expected) + def test_concat_datetime_timezone(self): + # GH 18523 + idx1 = pd.date_range('2011-01-01', periods=3, freq='H', + tz='Europe/Paris') + idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq='H') + df1 = pd.DataFrame({'a': [1, 2, 3]}, index=idx1) + df2 = pd.DataFrame({'b': [1, 2, 3]}, index=idx2) + res = pd.concat([df1, df2], axis=1) + + exp_idx = DatetimeIndex(['2011-01-01 00:00:00+01:00', + '2011-01-01 01:00:00+01:00', + '2011-01-01 02:00:00+01:00'], + freq='H' + ).tz_localize('UTC').tz_convert('Europe/Paris') + + exp = pd.DataFrame([[1, 1], [2, 2], [3, 3]], + index=exp_idx, columns=['a', 'b']) + + tm.assert_frame_equal(res, exp) + + idx3 = pd.date_range('2011-01-01', periods=3, + freq='H', tz='Asia/Tokyo') + df3 = pd.DataFrame({'b': [1, 2, 3]}, index=idx3) + res = pd.concat([df1, df3], axis=1) + + exp_idx = DatetimeIndex(['2010-12-31 15:00:00+00:00', + '2010-12-31 16:00:00+00:00', + '2010-12-31 17:00:00+00:00', + '2010-12-31 23:00:00+00:00', + '2011-01-01 00:00:00+00:00', + '2011-01-01 01:00:00+00:00'] + ).tz_localize('UTC') + + exp = pd.DataFrame([[np.nan, 1], [np.nan, 2], [np.nan, 3], + [1, np.nan], [2, np.nan], [3, np.nan]], + index=exp_idx, columns=['a', 'b']) + + tm.assert_frame_equal(res, exp) + @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['float'])