Skip to content

Commit be25266

Browse files
committed
Merge pull request #3549 from jreback/hdf_bug2
BUG/TST: fixed up retaining of index names in the table .info (like freq/tz)
2 parents ba19ff9 + acfbc37 commit be25266

File tree

4 files changed

+80
-43
lines changed

4 files changed

+80
-43
lines changed

RELEASE.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ pandas 0.11.1
4141
- ``HDFStore``
4242
4343
- will retain index attributes (freq,tz,name) on recreation (GH3499_)
44-
- will warn with a FrequencyWarning if you are attempting to append
45-
an index with a different frequency than the existing
44+
- will warn with a AttributeConflictWarning if you are attempting to append
45+
an index with a different frequency than the existing, or attempting
46+
to append an index with a different name than the existing
4647
- support datelike columns with a timezone as data_columns (GH2852_)
4748
- table writing performance improvements.
4849

doc/source/v0.11.1.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ Enhancements
1717
- ``HDFStore``
1818

1919
- will retain index attributes (freq,tz,name) on recreation (GH3499_)
20-
- will warn with a FrequencyWarning if you are attempting to append
21-
an index with a different frequency than the existing
20+
- will warn with a AttributeConflictWarning if you are attempting to append
21+
an index with a different frequency than the existing, or attempting
22+
to append an index with a different name than the existing
2223
- support datelike columns with a timezone as data_columns (GH2852_)
2324

2425
See the `full release notes

pandas/io/pytables.py

+52-30
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ class IncompatibilityWarning(Warning): pass
4242
where criteria is being ignored as this version [%s] is too old (or not-defined),
4343
read the file in and write it out to a new file to upgrade (with the copy_to method)
4444
"""
45-
class FrequencyWarning(Warning): pass
46-
frequency_doc = """
47-
the frequency of the existing index is [%s] which conflicts with the new freq [%s],
48-
resetting the frequency to None
45+
class AttributeConflictWarning(Warning): pass
46+
attribute_conflict_doc = """
47+
the [%s] attribute of the existing index is [%s] which conflicts with the new [%s],
48+
resetting the attribute to None
4949
"""
5050
class PerformanceWarning(Warning): pass
5151
performance_doc = """
@@ -873,9 +873,9 @@ def _write_to_group(self, key, value, index=True, table=False, append=False, com
873873
if not s.is_table or (s.is_table and table is None and s.is_exists):
874874
raise ValueError('Can only append to Tables')
875875
if not s.is_exists:
876-
s.set_info()
876+
s.set_object_info()
877877
else:
878-
s.set_info()
878+
s.set_object_info()
879879

880880
if not s.is_table and complib:
881881
raise ValueError('Compression not supported on non-table')
@@ -949,7 +949,7 @@ class IndexCol(object):
949949
is_an_indexable = True
950950
is_data_indexable = True
951951
is_searchable = False
952-
_info_fields = ['freq','tz','name']
952+
_info_fields = ['freq','tz','index_name']
953953

954954
def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None,
955955
name=None, axis=None, kind_attr=None, pos=None, freq=None, tz=None,
@@ -965,7 +965,7 @@ def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None,
965965
self.pos = pos
966966
self.freq = freq
967967
self.tz = tz
968-
self.index_name = None
968+
self.index_name = index_name
969969
self.table = None
970970

971971
if name is not None:
@@ -1042,7 +1042,7 @@ def convert(self, values, nan_rep):
10421042
kwargs['freq'] = self.freq
10431043
if self.tz is not None:
10441044
kwargs['tz'] = self.tz
1045-
if self.name is not None:
1045+
if self.index_name is not None:
10461046
kwargs['name'] = self.index_name
10471047
try:
10481048
self.values = Index(_maybe_convert(values, self.kind), **kwargs)
@@ -1128,7 +1128,7 @@ def validate_attr(self, append):
11281128

11291129
def update_info(self, info):
11301130
""" set/update the info for this indexable with the key/value
1131-
if validate is True, then raise if an existing value does not match the value """
1131+
if there is a conflict raise/warn as needed """
11321132

11331133
for key in self._info_fields:
11341134

@@ -1140,15 +1140,16 @@ def update_info(self, info):
11401140
idx = info[self.name] = dict()
11411141

11421142
existing_value = idx.get(key)
1143-
if key in idx and existing_value != value:
1143+
if key in idx and value is not None and existing_value != value:
11441144

1145-
# frequency just warn
1146-
if key == 'freq':
1147-
ws = frequency_doc % (existing_value,value)
1148-
warnings.warn(ws, FrequencyWarning)
1145+
# frequency/name just warn
1146+
if key in ['freq','index_name']:
1147+
ws = attribute_conflict_doc % (key,existing_value,value)
1148+
warnings.warn(ws, AttributeConflictWarning)
11491149

11501150
# reset
11511151
idx[key] = None
1152+
setattr(self,key,None)
11521153

11531154
else:
11541155
raise ValueError("invalid info for [%s] for [%s]"""
@@ -1554,7 +1555,7 @@ def __repr__(self):
15541555
def __str__(self):
15551556
return self.__repr__()
15561557

1557-
def set_info(self):
1558+
def set_object_info(self):
15581559
""" set my pandas type & version """
15591560
self.attrs.pandas_type = self.pandas_kind
15601561
self.attrs.pandas_version = _version
@@ -2275,16 +2276,20 @@ def values_cols(self):
22752276
""" return a list of my values cols """
22762277
return [i.cname for i in self.values_axes]
22772278

2279+
def set_info(self):
2280+
""" update our table index info """
2281+
self.attrs.info = self.info
2282+
22782283
def set_attrs(self):
22792284
""" set our table type & indexables """
22802285
self.attrs.table_type = self.table_type
22812286
self.attrs.index_cols = self.index_cols()
22822287
self.attrs.values_cols = self.values_cols()
22832288
self.attrs.non_index_axes = self.non_index_axes
22842289
self.attrs.data_columns = self.data_columns
2285-
self.attrs.info = self.info
22862290
self.attrs.nan_rep = self.nan_rep
22872291
self.attrs.levels = self.levels
2292+
self.set_info()
22882293

22892294
def get_attrs(self):
22902295
""" retrieve our attributes """
@@ -2487,7 +2492,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
24872492
axes = [ a.axis for a in existing_table.index_axes]
24882493
data_columns = existing_table.data_columns
24892494
nan_rep = existing_table.nan_rep
2490-
self.info = existing_table.info
2495+
self.info = copy.copy(existing_table.info)
24912496
else:
24922497
existing_table = None
24932498

@@ -2879,6 +2884,9 @@ def write(self, obj, axes=None, append=False, complib=None,
28792884
else:
28802885
table = self.table
28812886

2887+
# update my info
2888+
self.set_info()
2889+
28822890
# validate the axes and set the kinds
28832891
for a in self.axes:
28842892
a.validate_and_set(table, append)
@@ -3036,10 +3044,10 @@ def read(self, where=None, columns=None, **kwargs):
30363044
if self.is_transposed:
30373045
values = a.cvalues
30383046
index_ = cols
3039-
cols_ = Index(index)
3047+
cols_ = Index(index,name=getattr(index,'name',None))
30403048
else:
30413049
values = a.cvalues.T
3042-
index_ = Index(index)
3050+
index_ = Index(index,name=getattr(index,'name',None))
30433051
cols_ = cols
30443052

30453053
# if we have a DataIndexableCol, its shape will only be 1 dim
@@ -3157,12 +3165,17 @@ class AppendableNDimTable(AppendablePanelTable):
31573165
obj_type = Panel4D
31583166

31593167
def _convert_index(index):
3168+
index_name = getattr(index,'name',None)
3169+
31603170
if isinstance(index, DatetimeIndex):
31613171
converted = index.asi8
3162-
return IndexCol(converted, 'datetime64', _tables().Int64Col(), freq=getattr(index,'freq',None), tz=getattr(index,'tz',None))
3172+
return IndexCol(converted, 'datetime64', _tables().Int64Col(),
3173+
freq=getattr(index,'freq',None), tz=getattr(index,'tz',None),
3174+
index_name=index_name)
31633175
elif isinstance(index, (Int64Index, PeriodIndex)):
31643176
atom = _tables().Int64Col()
3165-
return IndexCol(index.values, 'integer', atom, freq=getattr(index,'freq',None))
3177+
return IndexCol(index.values, 'integer', atom, freq=getattr(index,'freq',None),
3178+
index_name=index_name)
31663179

31673180
if isinstance(index, MultiIndex):
31683181
raise Exception('MultiIndex not supported here!')
@@ -3173,36 +3186,45 @@ def _convert_index(index):
31733186

31743187
if inferred_type == 'datetime64':
31753188
converted = values.view('i8')
3176-
return IndexCol(converted, 'datetime64', _tables().Int64Col())
3189+
return IndexCol(converted, 'datetime64', _tables().Int64Col(),
3190+
freq=getattr(index,'freq',None), tz=getattr(index,'tz',None),
3191+
index_name=index_name)
31773192
elif inferred_type == 'datetime':
31783193
converted = np.array([(time.mktime(v.timetuple()) +
31793194
v.microsecond / 1E6) for v in values],
31803195
dtype=np.float64)
3181-
return IndexCol(converted, 'datetime', _tables().Time64Col())
3196+
return IndexCol(converted, 'datetime', _tables().Time64Col(),
3197+
index_name=index_name)
31823198
elif inferred_type == 'date':
31833199
converted = np.array([time.mktime(v.timetuple()) for v in values],
31843200
dtype=np.int32)
3185-
return IndexCol(converted, 'date', _tables().Time32Col())
3201+
return IndexCol(converted, 'date', _tables().Time32Col(),
3202+
index_name=index_name)
31863203
elif inferred_type == 'string':
31873204
# atom = _tables().ObjectAtom()
31883205
# return np.asarray(values, dtype='O'), 'object', atom
31893206

31903207
converted = np.array(list(values), dtype=np.str_)
31913208
itemsize = converted.dtype.itemsize
3192-
return IndexCol(converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize)
3209+
return IndexCol(converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize,
3210+
index_name=index_name)
31933211
elif inferred_type == 'unicode':
31943212
atom = _tables().ObjectAtom()
3195-
return IndexCol(np.asarray(values, dtype='O'), 'object', atom)
3213+
return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
3214+
index_name=index_name)
31963215
elif inferred_type == 'integer':
31973216
# take a guess for now, hope the values fit
31983217
atom = _tables().Int64Col()
3199-
return IndexCol(np.asarray(values, dtype=np.int64), 'integer', atom)
3218+
return IndexCol(np.asarray(values, dtype=np.int64), 'integer', atom,
3219+
index_name=index_name)
32003220
elif inferred_type == 'floating':
32013221
atom = _tables().Float64Col()
3202-
return IndexCol(np.asarray(values, dtype=np.float64), 'float', atom)
3222+
return IndexCol(np.asarray(values, dtype=np.float64), 'float', atom,
3223+
index_name=index_name)
32033224
else: # pragma: no cover
32043225
atom = _tables().ObjectAtom()
3205-
return IndexCol(np.asarray(values, dtype='O'), 'object', atom)
3226+
return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
3227+
index_name=index_name)
32063228

32073229
def _unconvert_index(data, kind):
32083230
if kind == 'datetime64':

pandas/io/tests/test_pytables.py

+22-9
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
import pandas
1111
from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
1212
date_range, Index)
13-
from pandas.io.pytables import (HDFStore, get_store, Term,
13+
from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf,
1414
IncompatibilityWarning, PerformanceWarning,
15-
FrequencyWarning)
15+
AttributeConflictWarning)
1616
import pandas.util.testing as tm
1717
from pandas.tests.test_series import assert_series_equal
1818
from pandas.tests.test_frame import assert_frame_equal
@@ -109,8 +109,6 @@ def test_conv_read_write(self):
109109

110110
try:
111111

112-
from pandas import read_hdf
113-
114112
def roundtrip(key, obj,**kwargs):
115113
obj.to_hdf(self.path, key,**kwargs)
116114
return read_hdf(self.path, key)
@@ -2089,17 +2087,17 @@ def test_retain_index_attributes(self):
20892087
result = store.get('data')
20902088
tm.assert_frame_equal(df,result)
20912089

2092-
for attr in ['freq','tz']:
2090+
for attr in ['freq','tz','name']:
20932091
for idx in ['index','columns']:
20942092
self.assert_(getattr(getattr(df,idx),attr,None) == getattr(getattr(result,idx),attr,None))
20952093

20962094

20972095
# try to append a table with a different frequency
2098-
warnings.filterwarnings('ignore', category=FrequencyWarning)
2096+
warnings.filterwarnings('ignore', category=AttributeConflictWarning)
20992097
df2 = DataFrame(dict(A = Series(xrange(3),
21002098
index=date_range('2002-1-1',periods=3,freq='D'))))
21012099
store.append('data',df2)
2102-
warnings.filterwarnings('always', category=FrequencyWarning)
2100+
warnings.filterwarnings('always', category=AttributeConflictWarning)
21032101

21042102
self.assert_(store.get_storer('data').info['index']['freq'] is None)
21052103

@@ -2114,12 +2112,27 @@ def test_retain_index_attributes(self):
21142112
def test_retain_index_attributes2(self):
21152113

21162114
with tm.ensure_clean(self.path) as path:
2117-
warnings.filterwarnings('ignore', category=FrequencyWarning)
2115+
2116+
warnings.filterwarnings('ignore', category=AttributeConflictWarning)
2117+
21182118
df = DataFrame(dict(A = Series(xrange(3), index=date_range('2000-1-1',periods=3,freq='H'))))
21192119
df.to_hdf(path,'data',mode='w',append=True)
21202120
df2 = DataFrame(dict(A = Series(xrange(3), index=date_range('2002-1-1',periods=3,freq='D'))))
21212121
df2.to_hdf(path,'data',append=True)
2122-
warnings.filterwarnings('always', category=FrequencyWarning)
2122+
2123+
idx = date_range('2000-1-1',periods=3,freq='H')
2124+
idx.name = 'foo'
2125+
df = DataFrame(dict(A = Series(xrange(3), index=idx)))
2126+
df.to_hdf(path,'data',mode='w',append=True)
2127+
self.assert_(read_hdf(path,'data').index.name == 'foo')
2128+
2129+
idx2 = date_range('2001-1-1',periods=3,freq='H')
2130+
idx2.name = 'bar'
2131+
df2 = DataFrame(dict(A = Series(xrange(3), index=idx2)))
2132+
df2.to_hdf(path,'data',append=True)
2133+
self.assert_(read_hdf(path,'data').index.name is None)
2134+
2135+
warnings.filterwarnings('always', category=AttributeConflictWarning)
21232136

21242137
def test_panel_select(self):
21252138

0 commit comments

Comments
 (0)