diff --git a/doc/source/io.rst b/doc/source/io.rst index bbf473628cafb..2c5dac3931c0d 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1001,7 +1001,7 @@ Objects can be written to the file just like adding key-value pairs to a dict: store['wp'] = wp # the type of stored data - store.handle.root.wp._v_attrs.pandas_type + store.root.wp._v_attrs.pandas_type store @@ -1037,8 +1037,7 @@ Storing in Table format ``HDFStore`` supports another ``PyTables`` format on disk, the ``table`` format. Conceptually a ``table`` is shaped very much like a DataFrame, with rows and columns. A ``table`` may be appended to in the same or other sessions. -In addition, delete & query type operations are supported. You can create an index with ``create_table_index`` -after data is already in the table (this may become automatic in the future or an option on appending/putting a ``table``). +In addition, delete & query type operations are supported. .. ipython:: python :suppress: @@ -1061,11 +1060,7 @@ after data is already in the table (this may become automatic in the future or a store.select('df') # the type of stored data - store.handle.root.df._v_attrs.pandas_type - - # create an index - store.create_table_index('df') - store.handle.root.df.table + store.root.df._v_attrs.pandas_type Hierarchical Keys ~~~~~~~~~~~~~~~~~ @@ -1090,7 +1085,7 @@ Storing Mixed Types in a Table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Storing mixed-dtype data is supported. Strings are store as a fixed-width using the maximum size of the appended column. Subsequent appends will truncate strings at this length. -Passing ``min_itemsize = { column_name : size }`` as a paremeter to append will set a larger minimum for the column. Storing ``floats, strings, ints, bools`` are currently supported. +Passing ``min_itemsize = { `values` : size }`` as a parameter to append will set a larger minimum for the string columns. Storing ``floats, strings, ints, bools`` are currently supported. .. ipython:: python @@ -1099,11 +1094,14 @@ Passing ``min_itemsize = { column_name : size }`` as a paremeter to append will df_mixed['int'] = 1 df_mixed['bool'] = True - store.append('df_mixed',df_mixed) + store.append('df_mixed', df_mixed, min_itemsize = { 'values' : 50 }) df_mixed1 = store.select('df_mixed') df_mixed1 df_mixed1.get_dtype_counts() + # we have provided a minimum string column size + store.root.df_mixed.table + Querying a Table ~~~~~~~~~~~~~~~~ @@ -1135,41 +1133,63 @@ Queries are built up using a list of ``Terms`` (currently only **anding** of ter store store.select('wp',[ 'major_axis>20000102', ('minor_axis', '=', ['A','B']) ]) +Indexing +~~~~~~~~ +You can create an index for a table with ``create_table_index`` after data is already in the table (after and ``append/put`` operation). Creating a table index is **highly** encouraged. This will speed your queries a great deal when you use a ``select`` with the indexed dimension as the ``where``. It is not automagically done now because you may want to index different axes than the default (except in the case of a DataFrame, where it almost always makes sense to index the ``index``. + +.. ipython:: python + + # create an index + store.create_table_index('df') + i = store.root.df.table.cols.index.index + i.optlevel, i.kind + + # change an index by passing new parameters + store.create_table_index('df', optlevel = 9, kind = 'full') + i = store.root.df.table.cols.index.index + i.optlevel, i.kind + + Delete from a Table ~~~~~~~~~~~~~~~~~~~ .. ipython:: python + # returns the number of rows deleted store.remove('wp', 'major_axis>20000102' ) store.select('wp') Notes & Caveats ~~~~~~~~~~~~~~~ - - Selection by items (the top level panel dimension) is not possible; you always get all of the items in the returned Panel - Once a ``table`` is created its items (Panel) / columns (DataFrame) are fixed; only exactly the same columns can be appended - You can not append/select/delete to a non-table (table creation is determined on the first append, or by passing ``table=True`` in a put operation) - - ``PyTables`` only supports fixed-width string columns in ``tables``. The sizes of a string based indexing column (e.g. *column* or *minor_axis*) are determined as the maximum size of the elements in that axis or by passing the parameter ``min_itemsize`` on the first table creation (``min_itemsize`` can be an integer or a dict of column name to an integer). If subsequent appends introduce elements in the indexing axis that are larger than the supported indexer, an Exception will be raised (otherwise you could have a silent truncation of these indexers, leading to loss of information). This is **ONLY** necessary for storing ``Panels`` (as the indexing column is stored directly in a column) + - ``HDFStore`` is **not-threadsafe for writing**. The underlying ``PyTables`` only supports concurrent reads (via threading or processes). If you need reading and writing *at the same time*, you need to serialize these operations in a single thread in a single process. You will corrupt your data otherwise. See the issue for more information. + + - ``PyTables`` only supports fixed-width string columns in ``tables``. The sizes of a string based indexing column (e.g. *columns* or *minor_axis*) are determined as the maximum size of the elements in that axis or by passing the parameter ``min_itemsize`` on the first table creation (``min_itemsize`` can be an integer or a dict of column name to an integer). If subsequent appends introduce elements in the indexing axis that are larger than the supported indexer, an Exception will be raised (otherwise you could have a silent truncation of these indexers, leading to loss of information). Just to be clear, this fixed-width restriction applies to **indexables** (the indexing columns) and **string values** in a mixed_type table. .. ipython:: python - store.append('wp_big_strings', wp, min_itemsize = 30) + store.append('wp_big_strings', wp, min_itemsize = { 'minor_axis' : 30 }) wp = wp.rename_axis(lambda x: x + '_big_strings', axis=2) store.append('wp_big_strings', wp) store.select('wp_big_strings') + # we have provided a minimum minor_axis indexable size + store.root.wp_big_strings.table + Compatibility ~~~~~~~~~~~~~ 0.10 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas, -however, query terms using the prior (undocumented) methodology are unsupported. You must read in the entire -file and write it out using the new format to take advantage of the updates. +however, query terms using the prior (undocumented) methodology are unsupported. ``HDFStore`` will issue a warning if you try to use a prior-version format file. You must read in the entire +file and write it out using the new format to take advantage of the updates. The group attribute ``pandas_version`` contains the version information. Performance ~~~~~~~~~~~ - - ``Tables`` come with a performance penalty as compared to regular stores. The benefit is the ability to append/delete and query (potentially very large amounts of data). + - ``Tables`` come with a writing performance penalty as compared to regular stores. The benefit is the ability to append/delete and query (potentially very large amounts of data). Write times are generally longer as compared with regular stores. Query times can be quite fast, especially on an indexed axis. - ``Tables`` can (as of 0.10.0) be expressed as different types. @@ -1177,12 +1197,31 @@ Performance - ``WORMTable`` (pending implementation) - is available to faciliate very fast writing of tables that are also queryable (but CANNOT support appends) - To delete a lot of data, it is sometimes better to erase the table and rewrite it. ``PyTables`` tends to increase the file size with deletions - - In general it is best to store Panels with the most frequently selected dimension in the minor axis and a time/date like dimension in the major axis, but this is not required. Panels can have any major_axis and minor_axis type that is a valid Panel indexer. - - No dimensions are currently indexed automagically (in the ``PyTables`` sense); these require an explict call to ``create_table_index`` - ``Tables`` offer better performance when compressed after writing them (as opposed to turning on compression at the very beginning) use the pytables utilities ``ptrepack`` to rewrite the file (and also can change compression methods) - Duplicate rows can be written, but are filtered out in selection (with the last items being selected; thus a table is unique on major, minor pairs) +Experimental +~~~~~~~~~~~~ + +HDFStore supports ``Panel4D`` storage. + +.. ipython:: python + + p4d = Panel4D({ 'l1' : wp }) + p4d + store.append('p4d', p4d) + store + +These, by default, index the three axes ``items, major_axis, minor_axis``. On an ``AppendableTable`` it is possible to setup with the first append a different indexing scheme, depending on how you want to store your data. Pass the ``axes`` keyword with a list of dimension (currently must by exactly 1 less than the total dimensions of the object). This cannot be changed after table creation. + +.. ipython:: python + + from pandas.io.pytables import Term + store.append('p4d2', p4d, axes = ['labels','major_axis','minor_axis']) + store + store.select('p4d2', [ Term('labels=l1'), Term('items=Item1'), Term('minor_axis=A_big_strings') ]) + .. ipython:: python :suppress: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 86627563854b3..91bd27ff510ef 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -10,6 +10,7 @@ import re import copy import itertools +import warnings import numpy as np from pandas import ( @@ -20,7 +21,7 @@ from pandas.tseries.api import PeriodIndex, DatetimeIndex from pandas.core.common import adjoin from pandas.core.algorithms import match, unique - +from pandas.core.strings import str_len from pandas.core.categorical import Factor from pandas.core.common import _asarray_tuplesafe, _try_sort from pandas.core.internals import BlockManager, make_block, form_blocks @@ -34,6 +35,11 @@ from contextlib import contextmanager +# versioning attribute +_version = '0.10' + +class IncompatibilityWarning(Warning): pass + # reading and writing the full object in one go _TYPE_MAP = { Series: 'series', @@ -85,9 +91,12 @@ def _tables(): _table_mod = tables # version requirements - major, minor, subv = tables.__version__.split('.') - if int(major) >= 2 and int(minor[0]) >= 3: - _table_supports_index = True + ver = tables.__version__.split('.') + try: + if int(ver[0]) >= 2 and int(ver[1][0]) >= 3: + _table_supports_index = True + except: + pass return _table_mod @@ -327,7 +336,7 @@ def get(self, key): raise KeyError('No object named %s in the file' % key) return self._read_group(group) - def select(self, key, where=None): + def select(self, key, where=None, **kwargs): """ Retrieve pandas object stored in file, optionally based on where criteria @@ -341,9 +350,7 @@ def select(self, key, where=None): group = self.get_node(key) if group is None: raise KeyError('No object named %s in the file' % key) - if where is not None and not _is_table_type(group): - raise Exception('can only select with where on objects written as tables') - return self._read_group(group, where) + return self._read_group(group, where, **kwargs) def put(self, key, value, table=False, append=False, compression=None, **kwargs): @@ -390,7 +397,7 @@ def remove(self, key, where=None): if group is not None: # remove the node - if where is None or not len(where): + if where is None: group = self.get_node(key) group._f_remove(recursive=True) @@ -433,8 +440,9 @@ def create_table_index(self, key, **kwargs): """ # version requirements + _tables() if not _table_supports_index: - raise("PyTables >= 2.3 is required for table indexing") + raise Exception("PyTables >= 2.3 is required for table indexing") group = self.get_node(key) if group is None: return @@ -498,6 +506,8 @@ def _write_to_group(self, key, value, table=False, append=False, wrapper(value) group._v_attrs.pandas_type = kind + group._v_attrs.pandas_version = _version + #group._v_attrs.meta = getattr(value,'meta',None) def _write_series(self, group, series): self._write_index(group, 'index', series.index) @@ -617,10 +627,6 @@ def _read_block_manager(self, group): return BlockManager(blocks, axes) - def _write_frame_table(self, group, df, append=False, comp=None, **kwargs): - t = create_table(self, group, typ = 'appendable_frame') - t.write(axes_to_index=[0], obj=df, append=append, compression=comp, **kwargs) - def _write_wide(self, group, panel): panel._consolidate_inplace() self._write_block_manager(group, panel._data) @@ -628,23 +634,33 @@ def _write_wide(self, group, panel): def _read_wide(self, group, where=None): return Panel(self._read_block_manager(group)) - def _write_wide_table(self, group, panel, append=False, comp=None, **kwargs): - t = create_table(self, group, typ = 'appendable_panel') - t.write(axes_to_index=[1,2], obj=panel, - append=append, compression=comp, **kwargs) - - def _write_ndim_table(self, group, obj, append=False, comp=None, axes_to_index=None, **kwargs): - if axes_to_index is None: - axes_to_index=[1,2,3] + def _write_ndim_table(self, group, obj, append=False, comp=None, axes=None, **kwargs): + if axes is None: + axes = [1,2,3] t = create_table(self, group, typ = 'appendable_ndim') - t.write(axes_to_index=axes_to_index, obj=obj, + t.write(axes=axes, obj=obj, append=append, compression=comp, **kwargs) - def _read_wide_table(self, group, where=None): - t = create_table(self, group) + def _read_ndim_table(self, group, where=None, **kwargs): + t = create_table(self, group, **kwargs) return t.read(where) - _read_ndim_table = _read_wide_table + def _write_frame_table(self, group, df, append=False, comp=None, axes=None, **kwargs): + if axes is None: + axes = [0] + t = create_table(self, group, typ = 'appendable_frame') + t.write(axes=axes, obj=df, append=append, compression=comp, **kwargs) + + _read_frame_table = _read_ndim_table + + def _write_wide_table(self, group, panel, append=False, comp=None, axes=None, **kwargs): + if axes is None: + axes = [1,2] + t = create_table(self, group, typ = 'appendable_panel') + t.write(axes=axes, obj=panel, + append=append, compression=comp, **kwargs) + + _read_wide_table = _read_ndim_table def _write_index(self, group, key, index): if isinstance(index, MultiIndex): @@ -827,11 +843,16 @@ def _write_array(self, group, key, value): getattr(group, key)._v_attrs.transposed = transposed - def _read_group(self, group, where=None): + def _read_group(self, group, where=None, **kwargs): kind = group._v_attrs.pandas_type kind = _LEGACY_MAP.get(kind, kind) handler = self._get_handler(op='read', kind=kind) - return handler(group, where) + v = handler(group, where, **kwargs) + #if v is not None: + # meta = getattr(group._v_attrs,'meta',None) + # if meta is not None: + # v.meta = meta + return v def _read_series(self, group, where=None): index = self._read_index(group, 'index') @@ -860,11 +881,6 @@ def _read_index_legacy(self, group, key): kind = node._v_attrs.kind return _unconvert_index_legacy(data, kind) - def _read_frame_table(self, group, where=None): - t = create_table(self, group) - return t.read(where) - - class IndexCol(object): """ an index column description class @@ -928,6 +944,10 @@ def __repr__(self): __str__ = __repr__ + def __eq__(self, other): + """ compare 2 col items """ + return all([ getattr(self,a,None) == getattr(other,a,None) for a in ['name','cname','axis','pos'] ]) + def copy(self): new_self = copy.copy(self) return new_self @@ -981,16 +1001,22 @@ def validate_and_set(self, table, append, **kwargs): self.validate_attr(append) self.set_attr() - def validate_col(self): - """ validate this column & set table data for it """ + def validate_col(self, itemsize = None): + """ validate this column: return the compared against itemsize """ # validate this column for string truncation (or reset to the max size) - if self.kind == 'string': + dtype = getattr(self,'dtype',None) + if self.kind == 'string' or (dtype is not None and dtype.startswith('string')): c = self.col if c is not None: - if c.itemsize < self.itemsize: - raise Exception("[%s] column has a min_itemsize of [%s] but itemsize [%s] is required!" % (self.cname,self.itemsize,c.itemsize)) + if itemsize is None: + itemsize = self.itemsize + if c.itemsize < itemsize: + raise Exception("[%s] column has a min_itemsize of [%s] but itemsize [%s] is required!" % (self.cname,itemsize,c.itemsize)) + return c.itemsize + + return None def validate_attr(self, append): @@ -1034,12 +1060,21 @@ def __init__(self, values = None, kind = None, typ = None, cname = None, data = def __repr__(self): return "name->%s,cname->%s,dtype->%s,shape->%s" % (self.name,self.cname,self.dtype,self.shape) + def __eq__(self, other): + """ compare 2 col items """ + return all([ getattr(self,a,None) == getattr(other,a,None) for a in ['name','cname','dtype','pos'] ]) + def set_data(self, data): self.data = data if data is not None: if self.dtype is None: self.dtype = data.dtype.name + def take_data(self): + """ return the data & release the memory """ + self.data, data = None, self.data + return data + @property def shape(self): return getattr(self.data,'shape',None) @@ -1111,9 +1146,10 @@ class Table(object): obj_type = None ndim = None - def __init__(self, parent, group): + def __init__(self, parent, group, **kwargs): self.parent = parent self.group = group + self.version = getattr(group._v_attrs,'pandas_version',None) self.index_axes = [] self.non_index_axes = [] self.values_axes = [] @@ -1129,10 +1165,25 @@ def pandas_type(self): def __repr__(self): """ return a pretty representatgion of myself """ - return "%s (typ->%s,nrows->%s)" % (self.pandas_type,self.table_type_short,self.nrows) + self.infer_axes() + return "%s (typ->%s,nrows->%s,indexers->[%s])" % (self.pandas_type,self.table_type_short,self.nrows,','.join([ a.name for a in self.index_axes ])) __str__ = __repr__ + def copy(self): + new_self = copy.copy(self) + return new_self + + def __eq__(self, other): + """ return True if we are 'equal' to this other table (in all respects that matter) """ + for c in ['index_axes','non_index_axes','values_axes']: + if getattr(self,c,None) != getattr(other,c,None): + return False + return True + + def __ne__(self, other): + return not self.__eq__(other) + @property def nrows(self): return getattr(self.table,'nrows',None) @@ -1178,6 +1229,15 @@ def description(self): def axes(self): return itertools.chain(self.index_axes, self.values_axes) + @property + def is_transposed(self): + return False + + @property + def data_orientation(self): + """ return a tuple of my permutated axes, non_indexable at the front """ + return tuple(itertools.chain([ a[0] for a in self.non_index_axes ], [ a.axis for a in self.index_axes ])) + def queryables(self): """ return a dict of the kinds allowable columns for this object """ return dict([ (a.cname,a.kind) for a in self.index_axes ] + [ (self.obj_type._AXIS_NAMES[axis],None) for axis, values in self.non_index_axes ]) @@ -1197,6 +1257,12 @@ def set_attrs(self): self.attrs.values_cols = self.values_cols() self.attrs.non_index_axes = self.non_index_axes + def validate_version(self, where = None): + """ are we trying to operate on an old version? """ + if where is not None: + if self.version is None or float(self.version) < 0.1: + warnings.warn("where criteria is being ignored as we this version is too old (or not-defined) [%s]" % self.version, IncompatibilityWarning) + def validate(self): """ raise if we have an incompitable table type with the current """ et = getattr(self.attrs,'table_type',None) @@ -1243,10 +1309,7 @@ def create_index(self, columns = None, optlevel = None, kind = None): """ - table = self.table - if table is None: return - - self.infer_axes() + if not self.infer_axes(): return if columns is None: columns = [ self.index_axes[0].name ] @@ -1259,16 +1322,39 @@ def create_index(self, columns = None, optlevel = None, kind = None): if kind is not None: kw['kind'] = kind + table = self.table for c in columns: v = getattr(table.cols,c,None) - if v is not None and not v.is_indexed: - v.createIndex(**kw) + if v is not None: + + # remove the index if the kind/optlevel have changed + if v.is_indexed: + index = v.index + cur_optlevel = index.optlevel + cur_kind = index.kind + + if kind is not None and cur_kind != kind: + v.removeIndex() + else: + kw['kind'] = cur_kind + + if optlevel is not None and cur_optlevel != optlevel: + v.removeIndex() + else: + kw['optlevel'] = cur_optlevel + + # create the index + if not v.is_indexed: + v.createIndex(**kw) def read_axes(self, where): - """ create and return the axes sniffed from the table """ + """ create and return the axes sniffed from the table: return boolean for success """ + + # validate the version + self.validate_version(where) # infer the data kind - self.infer_axes() + if not self.infer_axes(): return False # create the selection self.selection = Selection(self, where) @@ -1278,25 +1364,54 @@ def read_axes(self, where): for a in self.axes: a.convert(self.selection) + return True + def infer_axes(self): - """ infer the axes from the indexables """ + """ infer the axes from the indexables: + return a boolean indicating if we have a valid table or not """ + + table = self.table + if table is None: + return False + self.index_axes, self.values_axes = [ a.infer(self.table) for a in self.indexables if a.is_indexable ], [ a.infer(self.table) for a in self.indexables if not a.is_indexable ] - self.non_index_axes = getattr(self.attrs,'non_index_axes',None) or [] + self.non_index_axes = getattr(self.attrs,'non_index_axes',None) or [] - def create_axes(self, axes_to_index, obj, validate = True, min_itemsize = None): + return True + + def get_data_blocks(self, obj): + """ return the data blocks for this obj """ + return obj._data.blocks + + def create_axes(self, axes, obj, validate = True, min_itemsize = None): """ create and return the axes leagcy tables create an indexable column, indexable index, non-indexable fields """ - self.index_axes = [] - self.non_index_axes = [] + # map axes to numbers + axes = set([ obj._get_axis_number(a) for a in axes ]) + + # do we have an existing table (if so, use its axes)? + if self.infer_axes(): + existing_table = self.copy() + axes = [ a.axis for a in existing_table.index_axes] + else: + existing_table = None + + # currently support on ndim-1 axes + if len(axes) != self.ndim-1: + raise Exception("currenctly only support ndim-1 indexers in an AppendableTable") + + # create according to the new data + self.index_axes = [] + self.non_index_axes = [] # create axes to index and non_index j = 0 for i, a in enumerate(obj.axes): - if i in axes_to_index: + if i in axes: name = obj._AXIS_NAMES[i] self.index_axes.append(_convert_index(a).set_name(name).set_axis(i).set_pos(j)) j += 1 @@ -1309,18 +1424,41 @@ def create_axes(self, axes_to_index, obj, validate = True, min_itemsize = None): for a in self.axes: a.maybe_set_size(min_itemsize = min_itemsize) + + blocks = self.get_data_blocks(obj) + # add my values self.values_axes = [] - for i, b in enumerate(obj._data.blocks): + for i, b in enumerate(blocks): + + # shape of the data column are the indexable axes + shape = b.shape[0] values = b.values # a string column if b.dtype.name == 'object': - atom = _tables().StringCol(itemsize = values.dtype.itemsize, shape = b.shape[0]) - utype = 'S8' + + # itemsize is the maximum length of a string (along any dimension) + itemsize = _itemsize_string_array(values) + + # specified min_itemsize? + if isinstance(min_itemsize, dict): + itemsize = max(int(min_itemsize.get('values')),itemsize) + + # check for column in the values conflicts + if existing_table is not None and validate: + eci = existing_table.values_axes[i].validate_col(itemsize) + if eci > itemsize: + itemsize = eci + + atom = _tables().StringCol(itemsize = itemsize, shape = shape) + utype = 'S%s' % itemsize + kind = 'string' + else: - atom = getattr(_tables(),"%sCol" % b.dtype.name.capitalize())(shape = b.shape[0]) + atom = getattr(_tables(),"%sCol" % b.dtype.name.capitalize())(shape = shape) utype = atom._deftype + kind = b.dtype.name # coerce data to this type try: @@ -1328,10 +1466,15 @@ def create_axes(self, axes_to_index, obj, validate = True, min_itemsize = None): except (Exception), detail: raise Exception("cannot coerce data type -> [dtype->%s]" % b.dtype.name) - dc = DataCol.create_for_block(i = i, values = list(b.items), kind = b.dtype.name, typ = atom, data = values, pos = j) + dc = DataCol.create_for_block(i = i, values = list(b.items), kind = kind, typ = atom, data = values, pos = j) j += 1 self.values_axes.append(dc) + # validate the axes if we have an existing table + if existing_table is not None: + if self != existing_table: + raise Exception("try to write axes [%s] that are invalid to an existing table [%s]!" % (axes,self.group)) + def create_description(self, compression = None, complevel = None): """ create the description of the table from the axes & values """ @@ -1392,10 +1535,11 @@ class LegacyTable(Table): that can be easily searched """ - _indexables = [IndexCol(name = 'index', axis = 0, pos = 0), - IndexCol(name = 'column', axis = 1, pos = 1, index_kind = 'columns_kind'), + _indexables = [IndexCol(name = 'index', axis = 1, pos = 0), + IndexCol(name = 'column', axis = 2, pos = 1, index_kind = 'columns_kind'), DataCol( name = 'fields', cname = 'values', kind_attr = 'fields', pos = 2) ] table_type = 'legacy' + ndim = 3 def write(self, **kwargs): raise Exception("write operations are not allowed on legacy tables!") @@ -1403,8 +1547,13 @@ def write(self, **kwargs): def read(self, where=None): """ we have n indexable columns, with an arbitrary number of data axes """ - self.read_axes(where) + + _dm = create_debug_memory(self.parent) + _dm('start') + + if not self.read_axes(where): return None + _dm('read_axes') indicies = [ i.values for i in self.index_axes ] factors = [ Factor.from_array(i) for i in indicies ] levels = [ f.levels for f in factors ] @@ -1424,14 +1573,23 @@ def read(self, where=None): for c in self.values_axes: # the data need to be sorted - sorted_values = c.data.take(sorter, axis=0) + sorted_values = c.take_data().take(sorter, axis=0) take_labels = [ l.take(sorter) for l in labels ] items = Index(c.values) + _dm('pre block') + block = block2d_to_blocknd(sorted_values, items, tuple(N), take_labels) + _dm('block created done') - block = block2d_to_blocknd(sorted_values, items, tuple(N), take_labels) + # create the object mgr = BlockManager([block], [items] + levels) - objs.append(self.obj_type(mgr)) + obj = self.obj_type(mgr) + + # permute if needed + if self.is_transposed: + obj = obj.transpose(*self.data_orientation) + + objs.append(obj) else: if not self._quiet: # pragma: no cover @@ -1459,16 +1617,28 @@ def read(self, where=None): lp = DataFrame(new_values, index=new_index, columns=lp.columns) objs.append(lp.to_panel()) + _dm('pre-concat') + # create the composite object - wp = concat(objs, axis = 0, verify_integrity = True) + if len(objs) == 1: + wp = objs[0] + else: + wp = concat(objs, axis = 0, verify_integrity = True) + + _dm('post-concat') # reorder by any non_index_axes for axis,labels in self.non_index_axes: wp = wp.reindex_axis(labels,axis=axis,copy=False) + # apply the selection filters (but keep in the same order) if self.selection.filter: - new_minor = sorted(set(wp.minor_axis) & self.selection.filter) - wp = wp.reindex(minor=new_minor, copy = False) + filter_axis_name = wp._get_axis_name(self.non_index_axes[0][0]) + ordered = getattr(wp,filter_axis_name) + new_axis = sorted(ordered & self.selection.filter) + wp = wp.reindex(**{ filter_axis_name : new_axis, 'copy' : False }) + + _dm('done') return wp @@ -1489,7 +1659,7 @@ class AppendableTable(LegacyTable): _indexables = None table_type = 'appendable' - def write(self, axes_to_index, obj, append=False, compression=None, + def write(self, axes, obj, append=False, compression=None, complevel=None, min_itemsize = None, **kwargs): # create the table if it doesn't exist (or get it if it does) @@ -1498,7 +1668,7 @@ def write(self, axes_to_index, obj, append=False, compression=None, self.handle.removeNode(self.group, 'table') # create the axes - self.create_axes(axes_to_index = axes_to_index, obj = obj, validate = append, min_itemsize = min_itemsize) + self.create_axes(axes = axes, obj = obj, validate = append, min_itemsize = min_itemsize) if 'table' not in self.group: @@ -1530,9 +1700,8 @@ def write(self, axes_to_index, obj, append=False, compression=None, def write_data(self): """ fast writing of data: requires specific cython routines each axis shape """ + # create the masks & values masks = [] - - # create the masks for a in self.values_axes: # figure the mask: only do if we can successfully process this column, otherwise ignore the mask @@ -1549,7 +1718,7 @@ def write_data(self): for m in masks[1:]: m = mask & m - # the arguments & values + # the arguments args = [ a.cvalues for a in self.index_axes ] values = [ a.data for a in self.values_axes ] @@ -1565,14 +1734,18 @@ def write_data(self): raise Exception("tables cannot write this data -> %s" % str(detail)) def delete(self, where = None): - if where is None: - return super(LegacyTable, self).delete() + + # delete all rows (and return the nrows) + if where is None or not len(where): + nrows = self.nrows + self.handle.removeNode(self.group, recursive=True) + return nrows # infer the data kind - table = self.table - self.infer_axes() + if not self.infer_axes(): return None # create the selection + table = self.table self.selection = Selection(self, where) self.selection.select_coords() @@ -1603,32 +1776,53 @@ class AppendableFrameTable(AppendableTable): ndim = 2 obj_type = DataFrame + @property + def is_transposed(self): + return self.index_axes[0].axis == 1 + + def get_data_blocks(self, obj): + """ these are written transposed """ + if self.is_transposed: + obj = obj.T + return obj._data.blocks + def read(self, where=None): - self.read_axes(where) + if not self.read_axes(where): return None index = Index(self.index_axes[0].values) frames = [] for a in self.values_axes: columns = Index(a.values) - block = make_block(a.cvalues.T, columns, columns) - mgr = BlockManager([ block ], [ columns, index ]) + + if self.is_transposed: + values = a.cvalues + index_ = columns + columns_ = index + else: + values = a.cvalues.T + index_ = index + columns_ = columns + + block = make_block(values, columns_, columns_) + mgr = BlockManager([ block ], [ columns_, index_ ]) frames.append(DataFrame(mgr)) df = concat(frames, axis = 1, verify_integrity = True) # sort the indicies & reorder the columns for axis,labels in self.non_index_axes: df = df.reindex_axis(labels,axis=axis,copy=False) - columns_ordered = df.columns - # apply the column filter (but keep columns in the same order) - if self.selection.filter: - columns = Index(set(columns_ordered) & self.selection.filter) - columns = sorted(columns_ordered.get_indexer(columns)) - df = df.reindex(columns = columns_ordered.take(columns), copy = False) + # apply the selection filters (but keep in the same order) + filter_axis_name = df._get_axis_name(self.non_index_axes[0][0]) + ordered = getattr(df,filter_axis_name) + if self.selection.filter: + ordd = ordered & self.selection.filter + ordd = sorted(ordered.get_indexer(ordd)) + df = df.reindex(**{ filter_axis_name : ordered.take(ordd), 'copy' : False }) else: - df = df.reindex(columns = columns_ordered, copy = False) + df = df.reindex(**{ filter_axis_name : ordered , 'copy' : False }) return df @@ -1638,6 +1832,16 @@ class AppendablePanelTable(AppendableTable): ndim = 3 obj_type = Panel + def get_data_blocks(self, obj): + """ these are written transposed """ + if self.is_transposed: + obj = obj.transpose(*self.data_orientation) + return obj._data.blocks + + @property + def is_transposed(self): + return self.data_orientation != tuple(range(self.ndim)) + class AppendableNDimTable(AppendablePanelTable): """ suppor the new appendable table formats """ table_type = 'appendable_ndim' @@ -1659,7 +1863,7 @@ def create_table(parent, group, typ = None, **kwargs): """ return a suitable Table class to operate """ pt = getattr(group._v_attrs,'pandas_type',None) - tt = getattr(group._v_attrs,'table_type',None) + tt = getattr(group._v_attrs,'table_type',None) or typ # a new node if pt is None: @@ -1672,7 +1876,8 @@ def create_table(parent, group, typ = None, **kwargs): # distiguish between a frame/table tt = 'legacy_panel' try: - if group.table.description.values.shape[0] == 1: + fields = group.table._v_attrs.fields + if len(fields) == 1 and fields[0] == 'value': tt = 'legacy_frame' except: pass @@ -1680,6 +1885,10 @@ def create_table(parent, group, typ = None, **kwargs): return _TABLE_MAP.get(tt)(parent, group, **kwargs) +def _itemsize_string_array(arr): + """ return the maximum size of elements in a strnig array """ + return max([ str_len(arr[v]).max() for v in range(arr.shape[0]) ]) + def _convert_index(index): if isinstance(index, DatetimeIndex): converted = index.asi8 @@ -1957,8 +2166,11 @@ def eval(self): if not self.is_valid: raise Exception("query term is not valid [%s]" % str(self)) - # convert values - values = [ self.convert_value(v) for v in self.value ] + # convert values if we are in the table + if self.is_in_table: + values = [ self.convert_value(v) for v in self.value ] + else: + values = [ [v, v] for v in self.value ] # equality conditions if self.op in ['=','!=']: @@ -1983,14 +2195,24 @@ def eval(self): self.condition = '(%s %s %s)' % (self.field, self.op, values[0][0]) + else: + + raise Exception("passing a filterable condition to a non-table indexer [%s]" % str(self)) + def convert_value(self, v): #### a little hacky here, need to really figure out what we should convert ####x if self.field == 'index' or self.field == 'major_axis': if self.kind == 'datetime64' : return [lib.Timestamp(v).value, None] - elif isinstance(v, datetime): + elif isinstance(v, datetime) or hasattr(v,'timetuple') or self.kind == 'date': return [time.mktime(v.timetuple()), None] + elif self.kind == 'integer': + v = int(float(v)) + return [v, v] + elif self.kind == 'float': + v = float(v) + return [v, v] elif not isinstance(v, basestring): return [str(v), None] @@ -2052,7 +2274,7 @@ def select_coords(self): """ generate the selection """ - self.values = self.table.table.getWhereList(self.condition) + self.values = self.table.table.getWhereList(self.condition, sort = True) def _get_index_factory(klass): @@ -2062,3 +2284,23 @@ def f(values, freq=None, tz=None): tz=tz) return f return klass + +def create_debug_memory(parent): + _debug_memory = getattr(parent,'_debug_memory',False) + def get_memory(s): + pass + + if not _debug_memory: + pass + else: + try: + import psutil, os + def get_memory(s): + p = psutil.Process(os.getpid()) + (rss,vms) = p.get_memory_info() + mp = p.get_memory_percent() + print "[%s] cur_mem->%.2f (MB),per_mem->%.2f" % (s,rss/1000000.0,mp) + except: + pass + + return get_memory diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 7ecb0bc2fd5ee..a047109e509a9 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -2,13 +2,14 @@ import unittest import os import sys +import warnings from datetime import datetime import numpy as np from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, date_range, Index) -from pandas.io.pytables import HDFStore, get_store, Term +from pandas.io.pytables import HDFStore, get_store, Term, IncompatibilityWarning import pandas.util.testing as tm from pandas.tests.test_series import assert_series_equal from pandas.tests.test_frame import assert_frame_equal @@ -85,6 +86,50 @@ def test_contains(self): self.assert_('/foo/b' not in self.store) self.assert_('bar' not in self.store) + def test_versioning(self): + self.store['a'] = tm.makeTimeSeries() + self.store['b'] = tm.makeDataFrame() + df = tm.makeTimeDataFrame() + self.store.remove('df1') + self.store.append('df1', df[:10]) + self.store.append('df1', df[10:]) + self.assert_(self.store.root.a._v_attrs.pandas_version == '0.10') + self.assert_(self.store.root.b._v_attrs.pandas_version == '0.10') + self.assert_(self.store.root.df1._v_attrs.pandas_version == '0.10') + + # write a file and wipe its versioning + self.store.remove('df2') + self.store.append('df2', df) + self.store.get_node('df2')._v_attrs.pandas_version = None + self.store.select('df2') + self.store.select('df2', [ Term('index','>',df.index[2]) ]) + + def test_meta(self): + raise nose.SkipTest('no meta') + + meta = { 'foo' : [ 'I love pandas ' ] } + s = tm.makeTimeSeries() + s.meta = meta + self.store['a'] = s + self.assert_(self.store['a'].meta == meta) + + df = tm.makeDataFrame() + df.meta = meta + self.store['b'] = df + self.assert_(self.store['b'].meta == meta) + + # this should work, but because slicing doesn't propgate meta it doesn + self.store.remove('df1') + self.store.append('df1', df[:10]) + self.store.append('df1', df[10:]) + results = self.store['df1'] + #self.assert_(getattr(results,'meta',None) == meta) + + # no meta + df = tm.makeDataFrame() + self.store['b'] = df + self.assert_(hasattr(self.store['b'],'meta') == False) + def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() self.store.open('w', warn=False) @@ -131,6 +176,29 @@ def test_put(self): self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c']) + def test_put_string_index(self): + + index = Index([ "I am a very long string index: %s" % i for i in range(20) ]) + s = Series(np.arange(20), index = index) + df = DataFrame({ 'A' : s, 'B' : s }) + + self.store['a'] = s + tm.assert_series_equal(self.store['a'], s) + + self.store['b'] = df + tm.assert_frame_equal(self.store['b'], df) + + # mixed length + index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] + [ "I am a very long string index: %s" % i for i in range(20) ]) + s = Series(np.arange(21), index = index) + df = DataFrame({ 'A' : s, 'B' : s }) + self.store['a'] = s + tm.assert_series_equal(self.store['a'], s) + + self.store['b'] = df + tm.assert_frame_equal(self.store['b'], df) + + def test_put_compression(self): df = tm.makeTimeDataFrame() @@ -158,50 +226,106 @@ def test_put_integer(self): self._check_roundtrip(df, tm.assert_frame_equal) def test_append(self): - pth = '__test_append__.h5' - try: - store = HDFStore(pth) - - df = tm.makeTimeDataFrame() - store.append('df1', df[:10]) - store.append('df1', df[10:]) - tm.assert_frame_equal(store['df1'], df) - - store.put('df2', df[:10], table=True) - store.append('df2', df[10:]) - tm.assert_frame_equal(store['df2'], df) - - store.append('/df3', df[:10]) - store.append('/df3', df[10:]) - tm.assert_frame_equal(store['df3'], df) - - # this is allowed by almost always don't want to do it - import warnings - import tables - warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) - store.append('/df3 foo', df[:10]) - store.append('/df3 foo', df[10:]) - tm.assert_frame_equal(store['df3 foo'], df) - warnings.filterwarnings('always', category=tables.NaturalNameWarning) - - # panel - wp = tm.makePanel() - store.append('wp1', wp.ix[:,:10,:]) - store.append('wp1', wp.ix[:,10:,:]) - tm.assert_panel_equal(store['wp1'], wp) - - # ndim - p4d = tm.makePanel4D() - store.append('p4d', p4d.ix[:,:,:10,:]) - store.append('p4d', p4d.ix[:,:,10:,:]) - tm.assert_panel4d_equal(store['p4d'], p4d) - - except: - raise - finally: - store.close() - os.remove(pth) + df = tm.makeTimeDataFrame() + self.store.remove('df1') + self.store.append('df1', df[:10]) + self.store.append('df1', df[10:]) + tm.assert_frame_equal(self.store['df1'], df) + + self.store.remove('df2') + self.store.put('df2', df[:10], table=True) + self.store.append('df2', df[10:]) + tm.assert_frame_equal(self.store['df2'], df) + + self.store.remove('df3') + self.store.append('/df3', df[:10]) + self.store.append('/df3', df[10:]) + tm.assert_frame_equal(self.store['df3'], df) + + # this is allowed by almost always don't want to do it + warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) + self.store.remove('/df3 foo') + self.store.append('/df3 foo', df[:10]) + self.store.append('/df3 foo', df[10:]) + tm.assert_frame_equal(self.store['df3 foo'], df) + warnings.filterwarnings('always', category=tables.NaturalNameWarning) + + # panel + wp = tm.makePanel() + self.store.remove('wp1') + self.store.append('wp1', wp.ix[:,:10,:]) + self.store.append('wp1', wp.ix[:,10:,:]) + tm.assert_panel_equal(self.store['wp1'], wp) + + # ndim + p4d = tm.makePanel4D() + self.store.remove('p4d') + self.store.append('p4d', p4d.ix[:,:,:10,:]) + self.store.append('p4d', p4d.ix[:,:,10:,:]) + tm.assert_panel4d_equal(self.store['p4d'], p4d) + + # test using axis labels + self.store.remove('p4d') + self.store.append('p4d', p4d.ix[:,:,:10,:], axes=['items','major_axis','minor_axis']) + self.store.append('p4d', p4d.ix[:,:,10:,:], axes=['items','major_axis','minor_axis']) + tm.assert_panel4d_equal(self.store['p4d'], p4d) + + def test_append_frame_column_oriented(self): + + # column oriented + df = tm.makeTimeDataFrame() + self.store.remove('df1') + self.store.append('df1', df.ix[:,:2], axes = ['columns']) + self.store.append('df1', df.ix[:,2:]) + tm.assert_frame_equal(self.store['df1'], df) + + result = self.store.select('df1', 'columns=A') + expected = df.reindex(columns=['A']) + tm.assert_frame_equal(expected, result) + + # this isn't supported + self.assertRaises(Exception, self.store.select, 'df1', ('columns=A', Term('index','>',df.index[4]))) + + # selection on the non-indexable + result = self.store.select('df1', ('columns=A', Term('index','=',df.index[0:4]))) + expected = df.reindex(columns=['A'],index=df.index[0:4]) + tm.assert_frame_equal(expected, result) + + def test_ndim_indexables(self): + """ test using ndim tables in new ways""" + + p4d = tm.makePanel4D() + + # append then change (will take existing schema) + self.store.remove('p4d') + self.store.append('p4d', p4d.ix[:,:,:10,:], axes=['items','major_axis','minor_axis']) + self.store.append('p4d', p4d.ix[:,:,10:,:], axes=['labels','items','major_axis']) + + # pass incorrect number of axes + self.store.remove('p4d') + self.assertRaises(Exception, self.store.append, 'p4d', p4d.ix[:,:,:10,:], axes=['major_axis','minor_axis']) + + # different than default indexables + self.store.remove('p4d') + self.store.append('p4d', p4d.ix[:,:,:10,:], axes=[0,2,3]) + self.store.append('p4d', p4d.ix[:,:,10:,:], axes=[0,2,3]) + tm.assert_panel4d_equal(self.store['p4d'], p4d) + + # partial selection + result = self.store.select('p4d',['labels=l1']) + expected = p4d.reindex(labels = ['l1']) + tm.assert_panel4d_equal(result, expected) + + # partial selection2 + result = self.store.select('p4d',[Term('labels=l1'), Term('items=ItemA'), Term('minor_axis=B')]) + expected = p4d.reindex(labels = ['l1'], items = ['ItemA'], minor_axis = ['B']) + tm.assert_panel4d_equal(result, expected) + + # non-existant partial selection + result = self.store.select('p4d',[Term('labels=l1'), Term('items=Item1'), Term('minor_axis=B')]) + expected = p4d.reindex(labels = ['l1'], items = [], minor_axis = ['B']) + tm.assert_panel4d_equal(result, expected) def test_append_with_strings(self): wp = tm.makePanel() @@ -228,6 +352,27 @@ def test_append_with_strings(self): self.store.append('s4', wp) self.assertRaises(Exception, self.store.append, 's4', wp2) + # avoid truncation on elements + df = DataFrame([[123,'asdqwerty'], [345,'dggnhebbsdfbdfb']]) + self.store.append('df_big',df, min_itemsize = { 'values' : 1024 }) + tm.assert_frame_equal(self.store.select('df_big'), df) + + # appending smaller string ok + df2 = DataFrame([[124,'asdqy'], [346,'dggnhefbdfb']]) + self.store.append('df_big',df2) + expected = concat([ df, df2 ]) + tm.assert_frame_equal(self.store.select('df_big'), expected) + + # avoid truncation on elements + df = DataFrame([[123,'asdqwerty'], [345,'dggnhebbsdfbdfb']]) + self.store.append('df_big2',df, min_itemsize = { 'values' : 10 }) + tm.assert_frame_equal(self.store.select('df_big2'), df) + + # bigger string on next append + self.store.append('df_new',df, min_itemsize = { 'values' : 16 }) + df_new = DataFrame([[124,'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) + self.assertRaises(Exception, self.store.append, 'df_new',df_new) + def test_create_table_index(self): wp = tm.makePanel() self.store.append('p5', wp) @@ -236,14 +381,29 @@ def test_create_table_index(self): assert(self.store.handle.root.p5.table.cols.major_axis.is_indexed == True) assert(self.store.handle.root.p5.table.cols.minor_axis.is_indexed == False) + # default optlevels + assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 6) + assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'medium') + + # let's change the indexing scheme + self.store.create_table_index('p5') + assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 6) + assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'medium') + self.store.create_table_index('p5', optlevel=9) + assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 9) + assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'medium') + self.store.create_table_index('p5', kind='full') + assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 9) + assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'full') + self.store.create_table_index('p5', optlevel=1, kind='light') + assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 1) + assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'light') + df = tm.makeTimeDataFrame() self.store.append('f', df[:10]) self.store.append('f', df[10:]) self.store.create_table_index('f') - # create twice - self.store.create_table_index('f') - # try to index a non-table self.store.put('f2', df) self.assertRaises(Exception, self.store.create_table_index, 'f2') @@ -253,6 +413,43 @@ def test_create_table_index(self): pytables._table_supports_index = False self.assertRaises(Exception, self.store.create_table_index, 'f') + # test out some versions + original = tables.__version__ + + for v in ['2.2','2.2b']: + pytables._table_mod = None + pytables._table_supports_index = False + tables.__version__ = v + self.assertRaises(Exception, self.store.create_table_index, 'f') + + for v in ['2.3.1','2.3.1b','2.4dev','2.4',original]: + pytables._table_mod = None + pytables._table_supports_index = False + tables.__version__ = v + self.store.create_table_index('f') + pytables._table_mod = None + pytables._table_supports_index = False + tables.__version__ = original + + + def test_big_table(self): + raise nose.SkipTest('no big table') + + # create and write a big table + wp = Panel(np.random.randn(20, 1000, 1000), items= [ 'Item%s' % i for i in xrange(20) ], + major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%s' % i for i in xrange(1000) ]) + + wp.ix[:,100:200,300:400] = np.nan + + try: + store = HDFStore(self.scratchpath) + store._debug_memory = True + store.append('wp',wp) + recons = store.select('wp') + finally: + store.close() + os.remove(self.scratchpath) + def test_append_diff_item_order(self): wp = tm.makePanel() wp1 = wp.ix[:, :10, :] @@ -317,6 +514,21 @@ def _make_one_panel(): self.store.append('p1_mixed', p1) tm.assert_panel_equal(self.store.select('p1_mixed'), p1) + # ndim + def _make_one_p4d(): + wp = tm.makePanel4D() + wp['obj1'] = 'foo' + wp['obj2'] = 'bar' + wp['bool1'] = wp['l1'] > 0 + wp['bool2'] = wp['l2'] > 0 + wp['int1'] = 1 + wp['int2'] = 2 + return wp.consolidate() + + p4d = _make_one_p4d() + self.store.append('p4d_mixed', p4d) + tm.assert_panel4d_equal(self.store.select('p4d_mixed'), p4d) + def test_remove(self): ts = tm.makeTimeSeries() df = tm.makeDataFrame() @@ -366,7 +578,10 @@ def test_remove_where(self): # empty where self.store.remove('wp') self.store.put('wp', wp, table=True) - self.store.remove('wp', []) + + # deleted number (entire table) + n = self.store.remove('wp', []) + assert(n == 120) # non - empty where self.store.remove('wp') @@ -375,9 +590,9 @@ def test_remove_where(self): 'wp', ['foo']) # selectin non-table with a where - self.store.put('wp2', wp, table=False) - self.assertRaises(Exception, self.store.remove, - 'wp2', [('column', ['A', 'D'])]) + #self.store.put('wp2', wp, table=False) + #self.assertRaises(Exception, self.store.remove, + # 'wp2', [('column', ['A', 'D'])]) def test_remove_crit(self): @@ -387,8 +602,14 @@ def test_remove_crit(self): crit1 = Term('major_axis','>',date) crit2 = Term('minor_axis',['A', 'D']) - self.store.remove('wp', where=[crit1]) - self.store.remove('wp', where=[crit2]) + n = self.store.remove('wp', where=[crit1]) + + # deleted number + assert(n == 56) + + n = self.store.remove('wp', where=[crit2]) + assert(n == 32) + result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) @@ -447,8 +668,8 @@ def test_terms(self): tm.assert_panel_equal(result, expected) # p4d - result = self.store.select('p4d',[ Term('major_axis<20000108'), Term('minor_axis', '=', ['A','B']) ]) - expected = p4d.truncate(after='20000108').reindex(minor=['A', 'B']) + result = self.store.select('p4d',[ Term('major_axis<20000108'), Term('minor_axis', '=', ['A','B']), Term('items', '=', ['ItemA','ItemB']) ]) + expected = p4d.truncate(after='20000108').reindex(minor=['A', 'B'],items=['ItemA','ItemB']) tm.assert_panel4d_equal(result, expected) # valid terms @@ -464,12 +685,23 @@ def test_terms(self): (('minor_axis', ['A','B']),), (('minor_axis', ['A','B']),), ((('minor_axis', ['A','B']),),), + (('items', ['ItemA','ItemB']),), + ('items=ItemA'), ] for t in terms: self.store.select('wp', t) self.store.select('p4d', t) + # valid for p4d only + terms = [ + (('labels', '=', ['l1','l2']),), + Term('labels', '=', ['l1','l2']), + ] + + for t in terms: + self.store.select('p4d', t) + def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) @@ -797,8 +1029,8 @@ def test_select(self): self.store.select('wp2') # selectin non-table with a where - self.assertRaises(Exception, self.store.select, - 'wp2', ('column', ['A', 'D'])) + #self.assertRaises(Exception, self.store.select, + # 'wp2', ('column', ['A', 'D'])) def test_panel_select(self): wp = tm.makePanel() @@ -833,10 +1065,21 @@ def test_frame_select(self): expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) + # other indicies for a frame + + # integer + df = DataFrame(dict(A = np.random.rand(20), B = np.random.rand(20))) + self.store.append('df_int', df) + self.store.select('df_int', [ Term("index<10"), Term("columns", "=", ["A"]) ]) + + df = DataFrame(dict(A = np.random.rand(20), B = np.random.rand(20), index = np.arange(20,dtype='f8'))) + self.store.append('df_float', df) + self.store.select('df_float', [ Term("index<10.0"), Term("columns", "=", ["A"]) ]) + # can't select if not written as table - self.store['frame'] = df - self.assertRaises(Exception, self.store.select, - 'frame', [crit1, crit2]) + #self.store['frame'] = df + #self.assertRaises(Exception, self.store.select, + # 'frame', [crit1, crit2]) def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) @@ -911,6 +1154,16 @@ def test_legacy_table_read(self): store.select('df1') store.select('df2') store.select('wp1') + + # force the frame + store.select('df2', typ = 'legacy_frame') + + # old version (this still throws an exception though) + import warnings + warnings.filterwarnings('ignore', category=IncompatibilityWarning) + self.assertRaises(Exception, store.select, 'wp1', Term('minor_axis','=','B')) + warnings.filterwarnings('always', category=IncompatibilityWarning) + store.close() def test_legacy_table_write(self):