diff --git a/ci/lint.sh b/ci/lint.sh index a4c960084040f..9f582f72fcdd7 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -8,7 +8,7 @@ RET=0 if [ "$LINT" ]; then echo "Linting" - for path in 'core' 'indexes' 'types' 'formats' 'io' 'stats' 'compat' 'sparse' 'tools' 'tseries' 'tests' 'computation' 'util' + for path in 'api' 'core' 'indexes' 'types' 'formats' 'io' 'stats' 'compat' 'sparse' 'tools' 'tseries' 'tests' 'computation' 'util' do echo "linting -> pandas/$path" flake8 pandas/$path --filename '*.py' diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index fb09f99f2a7fe..bef02a06135de 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -10,6 +10,7 @@ users upgrade to this version. Highlights include: - :func:`merge_asof` for asof-style time-series joining, see :ref:`here ` +- pandas development api, see :ref:`here ` .. contents:: What's new in v0.18.2 :local: @@ -20,6 +21,25 @@ Highlights include: New features ~~~~~~~~~~~~ +.. _whatsnew_0190.dev_api: + +pandas development API +^^^^^^^^^^^^^^^^^^^^^^ + +As part of making pandas APi more uniform and accessible in the future, we have created a standard +sub-package of pandas, ``pandas.api`` to hold public API's. We are starting by exposing type +introspection functions in ``pandas.api.types``. More sub-packages and officially sanctioned API's +will be published in future versions of pandas. + +The following are now part of this API: + +.. ipython:: python + + import pprint + from pandas.api import types + funcs = [ f for f in dir(types) if not f.startswith('_') ] + pprint.pprint(funcs) + .. _whatsnew_0190.enhancements.asof_merge: :func:`merge_asof` for asof-style time-series joining @@ -227,7 +247,7 @@ Other enhancements - Consistent with the Python API, ``pd.read_csv()`` will now interpret ``+inf`` as positive infinity (:issue:`13274`) - The ``DataFrame`` constructor will now respect key ordering if a list of ``OrderedDict`` objects are passed in (:issue:`13304`) - ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) -- A top-level function :func:`union_categorical` has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`) +- A function :func:`union_categorical` has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`) - ``Series`` has gained the properties ``.is_monotonic``, ``.is_monotonic_increasing``, ``.is_monotonic_decreasing``, similar to ``Index`` (:issue:`13336`) .. _whatsnew_0190.api: diff --git a/pandas/__init__.py b/pandas/__init__.py index 350898c9925e7..2d91c97144e3c 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -16,7 +16,7 @@ if missing_dependencies: raise ImportError("Missing required dependencies {0}".format(missing_dependencies)) - +del hard_dependencies, dependency, missing_dependencies # numpy compat from pandas.compat.numpy import * diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py new file mode 100644 index 0000000000000..fcbf42f6dabc4 --- /dev/null +++ b/pandas/api/__init__.py @@ -0,0 +1 @@ +""" public toolkit API """ diff --git a/pandas/api/tests/__init__.py b/pandas/api/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py new file mode 100644 index 0000000000000..3f6c97441d659 --- /dev/null +++ b/pandas/api/tests/test_api.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- + +import pandas as pd +from pandas.core import common as com +from pandas import api +from pandas.api import types +from pandas.util import testing as tm + +_multiprocess_can_split_ = True + + +class Base(object): + + def check(self, namespace, expected, ignored=None): + # see which names are in the namespace, minus optional + # ignored ones + # compare vs the expected + + result = sorted([f for f in dir(namespace) if not f.startswith('_')]) + if ignored is not None: + result = sorted(list(set(result) - set(ignored))) + + expected = sorted(expected) + tm.assert_almost_equal(result, expected) + + +class TestPDApi(Base, tm.TestCase): + + # these are optionally imported based on testing + # & need to be ignored + ignored = ['tests', 'rpy', 'sandbox', 'locale'] + + # top-level sub-packages + lib = ['api', 'compat', 'computation', 'core', + 'indexes', 'formats', 'pandas', + 'test', 'tools', 'tseries', + 'types', 'util', 'options', 'io'] + + # top-level packages that are c-imports, should rename to _* + # to avoid naming conflicts + lib_to_rename = ['algos', 'hashtable', 'tslib', 'msgpack', 'sparse', + 'json', 'lib', 'index', 'parser'] + + # these are already deprecated; awaiting removal + deprecated_modules = ['ols', 'stats'] + + # misc + misc = ['IndexSlice', 'NaT'] + + # top-level classes + classes = ['Categorical', 'CategoricalIndex', 'DataFrame', 'DateOffset', + 'DatetimeIndex', 'ExcelFile', 'ExcelWriter', 'Float64Index', + 'Grouper', 'HDFStore', 'Index', 'Int64Index', 'MultiIndex', + 'Period', 'PeriodIndex', 'RangeIndex', + 'Series', 'SparseArray', 'SparseDataFrame', + 'SparseSeries', 'TimeGrouper', 'Timedelta', + 'TimedeltaIndex', 'Timestamp'] + + # these are already deprecated; awaiting removal + deprecated_classes = ['SparsePanel', 'TimeSeries', 'WidePanel', + 'SparseTimeSeries'] + + # these should be deperecated in the future + deprecated_classes_in_future = ['Panel', 'Panel4D', + 'SparseList', 'Term'] + + # these should be removed from top-level namespace + remove_classes_from_top_level_namespace = ['Expr'] + + # external modules exposed in pandas namespace + modules = ['np', 'datetime', 'datetools'] + + # top-level functions + funcs = ['bdate_range', 'concat', 'crosstab', 'cut', + 'date_range', 'eval', + 'factorize', 'get_dummies', 'get_store', + 'infer_freq', 'isnull', 'lreshape', + 'match', 'melt', 'notnull', 'offsets', + 'merge', 'merge_ordered', 'merge_asof', + 'period_range', + 'pivot', 'pivot_table', 'plot_params', 'qcut', + 'scatter_matrix', + 'show_versions', 'timedelta_range', 'unique', + 'value_counts', 'wide_to_long'] + + # top-level option funcs + funcs_option = ['reset_option', 'describe_option', 'get_option', + 'option_context', 'set_option', + 'set_eng_float_format'] + + # top-level read_* funcs + funcs_read = ['read_clipboard', 'read_csv', 'read_excel', 'read_fwf', + 'read_gbq', 'read_hdf', 'read_html', 'read_json', + 'read_msgpack', 'read_pickle', 'read_sas', 'read_sql', + 'read_sql_query', 'read_sql_table', 'read_stata', + 'read_table'] + + # top-level to_* funcs + funcs_to = ['to_datetime', 'to_msgpack', + 'to_numeric', 'to_pickle', 'to_timedelta'] + + # these should be deperecated in the future + deprecated_funcs_in_future = ['pnow', 'groupby', 'info'] + + # these are already deprecated; awaiting removal + deprecated_funcs = ['ewma', 'ewmcorr', 'ewmcov', 'ewmstd', 'ewmvar', + 'ewmvol', 'expanding_apply', 'expanding_corr', + 'expanding_count', 'expanding_cov', 'expanding_kurt', + 'expanding_max', 'expanding_mean', 'expanding_median', + 'expanding_min', 'expanding_quantile', + 'expanding_skew', 'expanding_std', 'expanding_sum', + 'expanding_var', 'fama_macbeth', 'rolling_apply', + 'rolling_corr', 'rolling_count', 'rolling_cov', + 'rolling_kurt', 'rolling_max', 'rolling_mean', + 'rolling_median', 'rolling_min', 'rolling_quantile', + 'rolling_skew', 'rolling_std', 'rolling_sum', + 'rolling_var', 'rolling_window', 'ordered_merge'] + + def test_api(self): + + self.check(pd, + self.lib + self.lib_to_rename + self.misc + + self.modules + self.deprecated_modules + + self.classes + self.deprecated_classes + + self.deprecated_classes_in_future + + self.remove_classes_from_top_level_namespace + + self.funcs + self.funcs_option + + self.funcs_read + self.funcs_to + + self.deprecated_funcs + + self.deprecated_funcs_in_future, + self.ignored) + + +class TestApi(Base, tm.TestCase): + + allowed = ['tests', 'types'] + + def test_api(self): + + self.check(api, self.allowed) + + +class TestTypes(Base, tm.TestCase): + + allowed = ['is_any_int_dtype', 'is_bool', 'is_bool_dtype', + 'is_categorical', 'is_categorical_dtype', 'is_complex', + 'is_complex_dtype', 'is_datetime64_any_dtype', + 'is_datetime64_dtype', 'is_datetime64_ns_dtype', + 'is_datetime64tz_dtype', 'is_datetimetz', 'is_dtype_equal', + 'is_extension_type', 'is_float', 'is_float_dtype', + 'is_floating_dtype', 'is_int64_dtype', 'is_integer', + 'is_integer_dtype', 'is_number', 'is_numeric_dtype', + 'is_object_dtype', 'is_scalar', 'is_sparse', + 'is_string_dtype', 'is_timedelta64_dtype', + 'is_timedelta64_ns_dtype', + 'is_re', 'is_re_compilable', + 'is_dict_like', 'is_iterator', + 'is_list_like', 'is_hashable', + 'is_named_tuple', 'is_sequence', + 'pandas_dtype'] + + def test_types(self): + + self.check(types, self.allowed) + + def check_deprecation(self, fold, fnew): + with tm.assert_produces_warning(FutureWarning): + try: + result = fold('foo') + expected = fnew('foo') + self.assertEqual(result, expected) + except TypeError: + self.assertRaises(TypeError, + lambda: fnew('foo')) + except AttributeError: + self.assertRaises(AttributeError, + lambda: fnew('foo')) + + def test_deprecation_core_common(self): + + # test that we are in fact deprecating + # the pandas.core.common introspectors + for t in self.allowed: + self.check_deprecation(getattr(com, t), getattr(types, t)) + + def test_deprecation_core_common_moved(self): + + # these are in pandas.types.common + l = ['is_datetime_arraylike', + 'is_datetime_or_timedelta_dtype', + 'is_datetimelike', + 'is_datetimelike_v_numeric', + 'is_datetimelike_v_object', + 'is_datetimetz', + 'is_int_or_datetime_dtype', + 'is_period_arraylike', + 'is_string_like', + 'is_string_like_dtype'] + + from pandas.types import common as c + for t in l: + self.check_deprecation(getattr(com, t), getattr(c, t)) + + def test_removed_from_core_common(self): + + for t in ['is_null_datelike_scalar', + 'ensure_float']: + self.assertRaises(AttributeError, lambda: getattr(com, t)) + +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/api/types/__init__.py b/pandas/api/types/__init__.py new file mode 100644 index 0000000000000..ee217543f0420 --- /dev/null +++ b/pandas/api/types/__init__.py @@ -0,0 +1,4 @@ +""" public toolkit API """ + +from pandas.types.api import * # noqa +del np # noqa diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 15bf6d31b7109..adc17c7514832 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -21,7 +21,8 @@ from numpy import ndarray from pandas.util.validators import (validate_args, validate_kwargs, validate_args_and_kwargs) -from pandas.core.common import is_bool, is_integer, UnsupportedFunctionCall +from pandas.core.common import UnsupportedFunctionCall +from pandas.types.common import is_integer, is_bool from pandas.compat import OrderedDict diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index 7a0743f6b2778..96a04cff9372e 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -7,11 +7,11 @@ import numpy as np +from pandas.types.common import is_list_like, is_scalar import pandas as pd from pandas.compat import PY3, string_types, text_type import pandas.core.common as com from pandas.formats.printing import pprint_thing, pprint_thing_encoded -import pandas.lib as lib from pandas.core.base import StringMixin from pandas.computation.common import _ensure_decoded, _result_type_many from pandas.computation.scope import _DEFAULT_GLOBALS @@ -100,7 +100,7 @@ def update(self, value): @property def isscalar(self): - return lib.isscalar(self._value) + return is_scalar(self._value) @property def type(self): @@ -229,7 +229,7 @@ def _in(x, y): try: return x.isin(y) except AttributeError: - if com.is_list_like(x): + if is_list_like(x): try: return y.isin(x) except AttributeError: @@ -244,7 +244,7 @@ def _not_in(x, y): try: return ~x.isin(y) except AttributeError: - if com.is_list_like(x): + if is_list_like(x): try: return ~y.isin(x) except AttributeError: diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index d6d55d15fec30..e375716b0d606 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -7,6 +7,8 @@ from datetime import datetime, timedelta import numpy as np import pandas as pd + +from pandas.types.common import is_list_like import pandas.core.common as com from pandas.compat import u, string_types, DeepChainMap from pandas.core.base import StringMixin @@ -127,7 +129,7 @@ def pr(left, right): def conform(self, rhs): """ inplace conform rhs """ - if not com.is_list_like(rhs): + if not is_list_like(rhs): rhs = [rhs] if isinstance(rhs, np.ndarray): rhs = rhs.ravel() diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 5019dd392a567..066df0521fef6 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -13,6 +13,7 @@ from numpy.random import randn, rand, randint import numpy as np +from pandas.types.common import is_list_like, is_scalar import pandas as pd from pandas.core import common as com from pandas import DataFrame, Series, Panel, date_range @@ -200,7 +201,7 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): ex = '(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)'.format(cmp1=cmp1, binop=binop, cmp2=cmp2) - scalar_with_in_notin = (lib.isscalar(rhs) and (cmp1 in skip_these or + scalar_with_in_notin = (is_scalar(rhs) and (cmp1 in skip_these or cmp2 in skip_these)) if scalar_with_in_notin: with tm.assertRaises(TypeError): @@ -253,7 +254,7 @@ def check_operands(left, right, cmp_op): def check_simple_cmp_op(self, lhs, cmp1, rhs): ex = 'lhs {0} rhs'.format(cmp1) - if cmp1 in ('in', 'not in') and not com.is_list_like(rhs): + if cmp1 in ('in', 'not in') and not is_list_like(rhs): self.assertRaises(TypeError, pd.eval, ex, engine=self.engine, parser=self.parser, local_dict={'lhs': lhs, 'rhs': rhs}) @@ -331,7 +332,7 @@ def check_pow(self, lhs, arith1, rhs): expected = self.get_expected_pow_result(lhs, rhs) result = pd.eval(ex, engine=self.engine, parser=self.parser) - if (lib.isscalar(lhs) and lib.isscalar(rhs) and + if (is_scalar(lhs) and is_scalar(rhs) and _is_py3_complex_incompat(result, expected)): self.assertRaises(AssertionError, tm.assert_numpy_array_equal, result, expected) @@ -364,16 +365,16 @@ def check_compound_invert_op(self, lhs, cmp1, rhs): skip_these = 'in', 'not in' ex = '~(lhs {0} rhs)'.format(cmp1) - if lib.isscalar(rhs) and cmp1 in skip_these: + if is_scalar(rhs) and cmp1 in skip_these: self.assertRaises(TypeError, pd.eval, ex, engine=self.engine, parser=self.parser, local_dict={'lhs': lhs, 'rhs': rhs}) else: # compound - if lib.isscalar(lhs) and lib.isscalar(rhs): + if is_scalar(lhs) and is_scalar(rhs): lhs, rhs = map(lambda x: np.array([x]), (lhs, rhs)) expected = _eval_single_bin(lhs, cmp1, rhs, self.engine) - if lib.isscalar(expected): + if is_scalar(expected): expected = not expected else: expected = ~expected @@ -643,17 +644,17 @@ def test_identical(self): x = 1 result = pd.eval('x', engine=self.engine, parser=self.parser) self.assertEqual(result, 1) - self.assertTrue(lib.isscalar(result)) + self.assertTrue(is_scalar(result)) x = 1.5 result = pd.eval('x', engine=self.engine, parser=self.parser) self.assertEqual(result, 1.5) - self.assertTrue(lib.isscalar(result)) + self.assertTrue(is_scalar(result)) x = False result = pd.eval('x', engine=self.engine, parser=self.parser) self.assertEqual(result, False) - self.assertTrue(lib.isscalar(result)) + self.assertTrue(is_scalar(result)) x = np.array([1]) result = pd.eval('x', engine=self.engine, parser=self.parser) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4b40bce79cbb5..c3ba734353a8d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -7,10 +7,31 @@ import numpy as np from pandas import compat, lib, tslib, _np_version_under1p8 +from pandas.types.cast import _maybe_promote +from pandas.types.generic import ABCPeriodIndex, ABCDatetimeIndex +from pandas.types.common import (is_integer_dtype, + is_int64_dtype, + is_categorical_dtype, + is_extension_type, + is_datetimetz, + is_period_arraylike, + is_datetime_or_timedelta_dtype, + is_float_dtype, + needs_i8_conversion, + is_categorical, + is_datetime64_dtype, + is_timedelta64_dtype, + is_scalar, + _ensure_platform_int, + _ensure_object, + _ensure_float64, + _ensure_int64, + is_list_like) +from pandas.types.missing import isnull + import pandas.core.common as com import pandas.algos as algos import pandas.hashtable as htable -from pandas.types import api as gt from pandas.compat import string_types from pandas.tslib import iNaT @@ -105,12 +126,12 @@ def isin(comps, values): boolean array same length as comps """ - if not com.is_list_like(comps): + if not is_list_like(comps): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__)) comps = np.asarray(comps) - if not com.is_list_like(values): + if not is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__)) @@ -126,15 +147,15 @@ def isin(comps, values): f = lambda x, y: lib.ismember_int64(x, set(y)) # may need i8 conversion for proper membership testing - if com.is_datetime64_dtype(comps): + if is_datetime64_dtype(comps): from pandas.tseries.tools import to_datetime values = to_datetime(values)._values.view('i8') comps = comps.view('i8') - elif com.is_timedelta64_dtype(comps): + elif is_timedelta64_dtype(comps): from pandas.tseries.timedeltas import to_timedelta values = to_timedelta(values)._values.view('i8') comps = comps.view('i8') - elif com.is_int64_dtype(comps): + elif is_int64_dtype(comps): pass else: f = lambda x, y: lib.ismember(x, set(values)) @@ -171,20 +192,20 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): vals = np.asarray(values) # localize to UTC - is_datetimetz = com.is_datetimetz(values) - if is_datetimetz: + is_datetimetz_type = is_datetimetz(values) + if is_datetimetz_type: values = DatetimeIndex(values) vals = values.tz_localize(None) - is_datetime = com.is_datetime64_dtype(vals) - is_timedelta = com.is_timedelta64_dtype(vals) + is_datetime = is_datetime64_dtype(vals) + is_timedelta = is_timedelta64_dtype(vals) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) uniques = vec_klass() labels = table.get_labels(vals, uniques, 0, na_sentinel, True) - labels = com._ensure_platform_int(labels) + labels = _ensure_platform_int(labels) uniques = uniques.to_array() @@ -194,7 +215,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): except: # unorderable in py3 if mixed str/int t = hash_klass(len(uniques)) - t.map_locations(com._ensure_object(uniques)) + t.map_locations(_ensure_object(uniques)) # order ints before strings ordered = np.concatenate([ @@ -202,8 +223,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): dtype=object)) for f in [lambda x: not isinstance(x, string_types), lambda x: isinstance(x, string_types)]]) - sorter = com._ensure_platform_int(t.lookup( - com._ensure_object(ordered))) + sorter = _ensure_platform_int(t.lookup( + _ensure_object(ordered))) reverse_indexer = np.empty(len(sorter), dtype=np.int_) reverse_indexer.put(sorter, np.arange(len(sorter))) @@ -214,7 +235,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): uniques = uniques.take(sorter) - if is_datetimetz: + if is_datetimetz_type: # reset tz uniques = DatetimeIndex(uniques.astype('M8[ns]')).tz_localize( @@ -267,7 +288,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, raise TypeError("bins argument only works with numeric data.") values = cat.codes - if com.is_extension_type(values) and not com.is_datetimetz(values): + if is_extension_type(values) and not is_datetimetz(values): # handle Categorical and sparse, # datetime tz can be handeled in ndarray path result = Series(values).values.value_counts(dropna=dropna) @@ -298,9 +319,9 @@ def value_counts(values, sort=True, ascending=False, normalize=False, def _value_counts_arraylike(values, dropna=True): - is_datetimetz = com.is_datetimetz(values) - is_period = (isinstance(values, gt.ABCPeriodIndex) or - com.is_period_arraylike(values)) + is_datetimetz_type = is_datetimetz(values) + is_period = (isinstance(values, ABCPeriodIndex) or + is_period_arraylike(values)) orig = values @@ -308,7 +329,7 @@ def _value_counts_arraylike(values, dropna=True): values = Series(values).values dtype = values.dtype - if com.is_datetime_or_timedelta_dtype(dtype) or is_period: + if is_datetime_or_timedelta_dtype(dtype) or is_period: from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex @@ -327,8 +348,8 @@ def _value_counts_arraylike(values, dropna=True): keys = keys.astype(dtype) # dtype handling - if is_datetimetz: - if isinstance(orig, gt.ABCDatetimeIndex): + if is_datetimetz_type: + if isinstance(orig, ABCDatetimeIndex): tz = orig.tz else: tz = orig.dt.tz @@ -336,15 +357,15 @@ def _value_counts_arraylike(values, dropna=True): if is_period: keys = PeriodIndex._simple_new(keys, freq=freq) - elif com.is_integer_dtype(dtype): - values = com._ensure_int64(values) + elif is_integer_dtype(dtype): + values = _ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) - elif com.is_float_dtype(dtype): - values = com._ensure_float64(values) + elif is_float_dtype(dtype): + values = _ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: - values = com._ensure_object(values) - mask = com.isnull(values) + values = _ensure_object(values) + mask = isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) @@ -366,8 +387,8 @@ def mode(values): constructor = Series dtype = values.dtype - if com.is_integer_dtype(values): - values = com._ensure_int64(values) + if is_integer_dtype(values): + values = _ensure_int64(values) result = constructor(sorted(htable.mode_int64(values)), dtype=dtype) elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): @@ -375,11 +396,11 @@ def mode(values): values = values.view(np.int64) result = constructor(sorted(htable.mode_int64(values)), dtype=dtype) - elif com.is_categorical_dtype(values): + elif is_categorical_dtype(values): result = constructor(values.mode()) else: - mask = com.isnull(values) - values = com._ensure_object(values) + mask = isnull(values) + values = _ensure_object(values) res = htable.mode_object(values, mask) try: res = sorted(res) @@ -459,7 +480,7 @@ def quantile(x, q, interpolation_method='fraction'): """ x = np.asarray(x) - mask = com.isnull(x) + mask = isnull(x) x = x[~mask] @@ -486,7 +507,7 @@ def _get_score(at): return score - if lib.isscalar(q): + if is_scalar(q): return _get_score(q) else: q = np.asarray(q, np.float64) @@ -593,18 +614,18 @@ def _hashtable_algo(f, dtype, return_dtype=None): """ f(HashTable, type_caster) -> result """ - if com.is_float_dtype(dtype): - return f(htable.Float64HashTable, com._ensure_float64) - elif com.is_integer_dtype(dtype): - return f(htable.Int64HashTable, com._ensure_int64) - elif com.is_datetime64_dtype(dtype): + if is_float_dtype(dtype): + return f(htable.Float64HashTable, _ensure_float64) + elif is_integer_dtype(dtype): + return f(htable.Int64HashTable, _ensure_int64) + elif is_datetime64_dtype(dtype): return_dtype = return_dtype or 'M8[ns]' - return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) - elif com.is_timedelta64_dtype(dtype): + return f(htable.Int64HashTable, _ensure_int64).view(return_dtype) + elif is_timedelta64_dtype(dtype): return_dtype = return_dtype or 'm8[ns]' - return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) + return f(htable.Int64HashTable, _ensure_int64).view(return_dtype) else: - return f(htable.PyObjectHashTable, com._ensure_object) + return f(htable.PyObjectHashTable, _ensure_object) _hashtables = { 'float64': (htable.Float64HashTable, htable.Float64Vector), @@ -614,20 +635,20 @@ def _hashtable_algo(f, dtype, return_dtype=None): def _get_data_algo(values, func_map): - if com.is_float_dtype(values): + if is_float_dtype(values): f = func_map['float64'] - values = com._ensure_float64(values) + values = _ensure_float64(values) - elif com.needs_i8_conversion(values): + elif needs_i8_conversion(values): f = func_map['int64'] values = values.view('i8') - elif com.is_integer_dtype(values): + elif is_integer_dtype(values): f = func_map['int64'] - values = com._ensure_int64(values) + values = _ensure_int64(values) else: f = func_map['generic'] - values = com._ensure_object(values) + values = _ensure_object(values) return f, values @@ -689,7 +710,7 @@ def _take_nd_generic(arr, indexer, out, axis, fill_value, mask_info): if arr.dtype != out.dtype: arr = arr.astype(out.dtype) if arr.shape[axis] > 0: - arr.take(com._ensure_platform_int(indexer), axis=axis, out=out) + arr.take(_ensure_platform_int(indexer), axis=axis, out=out) if needs_masking: outindexer = [slice(None)] * arr.ndim outindexer[axis] = mask @@ -830,7 +851,7 @@ def _get_take_nd_function(ndim, arr_dtype, out_dtype, axis=0, mask_info=None): return func def func(arr, indexer, out, fill_value=np.nan): - indexer = com._ensure_int64(indexer) + indexer = _ensure_int64(indexer) _take_nd_generic(arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info) @@ -854,7 +875,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, out : ndarray or None, default None Optional output array, must be appropriate type to hold input and fill_value together, if indexer has any -1 value entries; call - common._maybe_promote to determine this type for any fill_value + _maybe_promote to determine this type for any fill_value fill_value : any, default np.nan Fill value to replace -1 values with mask_info : tuple of (ndarray, boolean) @@ -868,24 +889,24 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, """ # dispatch to internal type takes - if com.is_categorical(arr): + if is_categorical(arr): return arr.take_nd(indexer, fill_value=fill_value, allow_fill=allow_fill) - elif com.is_datetimetz(arr): + elif is_datetimetz(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) if indexer is None: indexer = np.arange(arr.shape[axis], dtype=np.int64) dtype, fill_value = arr.dtype, arr.dtype.type() else: - indexer = com._ensure_int64(indexer) + indexer = _ensure_int64(indexer) if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() mask_info = None, False else: # check for promotion based on types only (do this first because # it's faster than computing a mask) - dtype, fill_value = com._maybe_promote(arr.dtype, fill_value) + dtype, fill_value = _maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer if mask_info is not None: @@ -931,7 +952,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info) - indexer = com._ensure_int64(indexer) + indexer = _ensure_int64(indexer) func(arr, indexer, out, fill_value) if flip_order: @@ -957,11 +978,11 @@ def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None, if row_idx is None: row_idx = np.arange(arr.shape[0], dtype=np.int64) else: - row_idx = com._ensure_int64(row_idx) + row_idx = _ensure_int64(row_idx) if col_idx is None: col_idx = np.arange(arr.shape[1], dtype=np.int64) else: - col_idx = com._ensure_int64(col_idx) + col_idx = _ensure_int64(col_idx) indexer = row_idx, col_idx if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() @@ -969,7 +990,7 @@ def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None, else: # check for promotion based on types only (do this first because # it's faster than computing a mask) - dtype, fill_value = com._maybe_promote(arr.dtype, fill_value) + dtype, fill_value = _maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer if mask_info is not None: @@ -1032,7 +1053,7 @@ def diff(arr, n, axis=0): na = np.nan dtype = arr.dtype is_timedelta = False - if com.needs_i8_conversion(arr): + if needs_i8_conversion(arr): dtype = np.float64 arr = arr.view('i8') na = tslib.iNaT diff --git a/pandas/core/api.py b/pandas/core/api.py index 0a6992bfebd70..579f21eb4ada8 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -5,7 +5,7 @@ import numpy as np from pandas.core.algorithms import factorize, match, unique, value_counts -from pandas.core.common import isnull, notnull +from pandas.types.missing import isnull, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper from pandas.formats.format import set_eng_float_format diff --git a/pandas/core/base.py b/pandas/core/base.py index 13a6b4b7b4ce0..a0dfebdfde356 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,6 +4,12 @@ from pandas import compat from pandas.compat import builtins import numpy as np + +from pandas.types.missing import isnull +from pandas.types.generic import ABCDataFrame, ABCSeries, ABCIndex +from pandas.types.common import (_ensure_object, is_object_dtype, + is_list_like, is_scalar) + from pandas.core import common as com import pandas.core.nanops as nanops import pandas.lib as lib @@ -11,7 +17,6 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError -from pandas.types import api as gt from pandas.formats.printing import pprint_thing _shared_docs = dict() @@ -121,7 +126,7 @@ def __sizeof__(self): """ if hasattr(self, 'memory_usage'): mem = self.memory_usage(deep=True) - if not lib.isscalar(mem): + if not is_scalar(mem): mem = mem.sum() return int(mem) @@ -293,15 +298,15 @@ def name(self): @property def _selection_list(self): - if not isinstance(self._selection, (list, tuple, gt.ABCSeries, - gt.ABCIndex, np.ndarray)): + if not isinstance(self._selection, (list, tuple, ABCSeries, + ABCIndex, np.ndarray)): return [self._selection] return self._selection @cache_readonly def _selected_obj(self): - if self._selection is None or isinstance(self.obj, gt.ABCSeries): + if self._selection is None or isinstance(self.obj, ABCSeries): return self.obj else: return self.obj[self._selection] @@ -313,7 +318,7 @@ def ndim(self): @cache_readonly def _obj_with_exclusions(self): if self._selection is not None and isinstance(self.obj, - gt.ABCDataFrame): + ABCDataFrame): return self.obj.reindex(columns=self._selection_list) if len(self.exclusions) > 0: @@ -325,7 +330,7 @@ def __getitem__(self, key): if self._selection is not None: raise Exception('Column(s) %s already selected' % self._selection) - if isinstance(key, (list, tuple, gt.ABCSeries, gt.ABCIndex, + if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)): if len(self.obj.columns.intersection(key)) != len(key): bad_keys = list(set(key).difference(self.obj.columns)) @@ -553,7 +558,7 @@ def _agg(arg, func): if isinstance(result, list): result = concat(result, keys=keys, axis=1) elif isinstance(list(compat.itervalues(result))[0], - gt.ABCDataFrame): + ABCDataFrame): result = concat([result[k] for k in keys], keys=keys, axis=1) else: from pandas import DataFrame @@ -682,7 +687,7 @@ def _gotitem(self, key, ndim, subset=None): **kwargs) self._reset_cache() if subset.ndim == 2: - if lib.isscalar(key) and key in subset or com.is_list_like(key): + if is_scalar(key) and key in subset or is_list_like(key): self._selection = key return self @@ -903,7 +908,7 @@ def argmin(self, axis=None): @cache_readonly def hasnans(self): """ return if I have any nans; enables various perf speedups """ - return com.isnull(self).any() + return isnull(self).any() def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): @@ -980,7 +985,7 @@ def nunique(self, dropna=True): """ uniqs = self.unique() n = len(uniqs) - if dropna and com.isnull(uniqs).any(): + if dropna and isnull(uniqs).any(): n -= 1 return n @@ -1053,7 +1058,7 @@ def memory_usage(self, deep=False): return self.values.memory_usage(deep=deep) v = self.values.nbytes - if deep and com.is_object_dtype(self): + if deep and is_object_dtype(self): v += lib.memory_usage_of_objects(self.values) return v @@ -1195,7 +1200,7 @@ def drop_duplicates(self, keep='first', inplace=False): False: 'first'}) @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs) def duplicated(self, keep='first'): - keys = com._values_from_object(com._ensure_object(self.values)) + keys = com._values_from_object(_ensure_object(self.values)) duplicated = lib.duplicated(keys, keep=keep) try: return self._constructor(duplicated, diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index f4aeaf9184d09..79d8bfbf57f12 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -7,6 +7,22 @@ from pandas import compat, lib from pandas.compat import u +from pandas.types.generic import ABCSeries, ABCIndexClass, ABCCategoricalIndex +from pandas.types.missing import isnull, notnull +from pandas.types.cast import (_possibly_infer_to_datetimelike, + _coerce_indexer_dtype) +from pandas.types.dtypes import CategoricalDtype +from pandas.types.common import (_ensure_int64, + _ensure_object, + _ensure_platform_int, + is_dtype_equal, + is_datetimelike, + is_categorical_dtype, + is_integer_dtype, is_bool, + is_list_like, is_sequence, + is_scalar) +from pandas.core.common import is_null_slice + from pandas.core.algorithms import factorize, take_1d from pandas.core.base import (PandasObject, PandasDelegate, NoNewAttributesMixin, _shared_docs) @@ -16,13 +32,6 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) -from pandas.core.common import ( - ABCSeries, ABCIndexClass, ABCCategoricalIndex, isnull, notnull, - is_dtype_equal, is_categorical_dtype, is_integer_dtype, - _possibly_infer_to_datetimelike, is_list_like, - is_sequence, is_null_slice, is_bool, _ensure_object, _ensure_int64, - _coerce_indexer_dtype) -from pandas.types.api import CategoricalDtype from pandas.util.terminal import get_terminal_size from pandas.core.config import get_option @@ -64,7 +73,7 @@ def f(self, other): # With cat[0], for example, being ``np.int64(1)`` by the time it gets # into this function would become ``np.array(1)``. other = lib.item_from_zerodim(other) - if lib.isscalar(other): + if is_scalar(other): if other in self.categories: i = self.categories.get_loc(other) return getattr(self._codes, op)(i) @@ -968,7 +977,7 @@ def shift(self, periods): if codes.ndim > 1: raise NotImplementedError("Categorical with ndim > 1.") if np.prod(codes.shape) and (periods != 0): - codes = np.roll(codes, com._ensure_platform_int(periods), axis=0) + codes = np.roll(codes, _ensure_platform_int(periods), axis=0) if periods > 0: codes[:periods] = -1 else: @@ -1148,7 +1157,7 @@ def value_counts(self, dropna=True): counts : Series """ from numpy import bincount - from pandas.core.common import isnull + from pandas.types.missing import isnull from pandas.core.series import Series from pandas.core.index import CategoricalIndex @@ -1182,7 +1191,7 @@ def get_values(self): Index if datetime / periods """ # if we are a datetime and period index, return Index to keep metadata - if com.is_datetimelike(self.categories): + if is_datetimelike(self.categories): return self.categories.take(self._codes, fill_value=np.nan) return np.array(self) @@ -1933,7 +1942,7 @@ def _convert_to_list_like(list_like): if (is_sequence(list_like) or isinstance(list_like, tuple) or isinstance(list_like, types.GeneratorType)): return list(list_like) - elif lib.isscalar(list_like): + elif is_scalar(list_like): return [list_like] else: # is this reached? diff --git a/pandas/core/common.py b/pandas/core/common.py index 28bae362a3411..99dd2e9f5b8a9 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2,23 +2,66 @@ Misc tools for implementing data structures """ -import re -import collections -import numbers +import sys +import warnings from datetime import datetime, timedelta from functools import partial import numpy as np -import pandas as pd -import pandas.algos as algos import pandas.lib as lib import pandas.tslib as tslib from pandas import compat -from pandas.compat import (long, zip, map, string_types, - iteritems) -from pandas.types import api as gt -from pandas.types.api import * # noqa +from pandas.compat import long, zip, iteritems from pandas.core.config import get_option +from pandas.types.generic import ABCSeries +from pandas.types.common import _NS_DTYPE, is_integer +from pandas.types.inference import _iterable_not_string +from pandas.types.missing import isnull +from pandas.api import types +from pandas.types import common + +# back-compat of public API +# deprecate these functions +m = sys.modules['pandas.core.common'] +for t in [t for t in dir(types) if not t.startswith('_')]: + + def outer(t=t): + + def wrapper(*args, **kwargs): + warnings.warn("pandas.core.common.{t} is deprecated. " + "import from the public API: " + "pandas.api.types.{t} instead".format(t=t), + FutureWarning, stacklevel=2) + return getattr(types, t)(*args, **kwargs) + return wrapper + + setattr(m, t, outer(t)) + +# back-compat for non-public functions +# deprecate these functions +for t in ['is_datetime_arraylike', + 'is_datetime_or_timedelta_dtype', + 'is_datetimelike', + 'is_datetimelike_v_numeric', + 'is_datetimelike_v_object', + 'is_datetimetz', + 'is_int_or_datetime_dtype', + 'is_period_arraylike', + 'is_string_like', + 'is_string_like_dtype']: + + def outer(t=t): + + def wrapper(*args, **kwargs): + warnings.warn("pandas.core.common.{t} is deprecated. " + "These are not longer public API functions, " + "but can be imported from " + "pandas.types.common.{t} instead".format(t=t), + FutureWarning, stacklevel=2) + return getattr(common, t)(*args, **kwargs) + return wrapper + + setattr(m, t, outer(t)) class PandasError(Exception): @@ -58,322 +101,6 @@ def __str__(self): self.class_instance.__class__.__name__) -_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name - for t in ['O', 'int8', 'uint8', 'int16', 'uint16', - 'int32', 'uint32', 'int64', 'uint64']]) - -_NS_DTYPE = np.dtype('M8[ns]') -_TD_DTYPE = np.dtype('m8[ns]') -_INT64_DTYPE = np.dtype(np.int64) -_DATELIKE_DTYPES = set([np.dtype(t) - for t in ['M8[ns]', 'M8[ns]', - 'm8[ns]', 'm8[ns]']]) -_int8_max = np.iinfo(np.int8).max -_int16_max = np.iinfo(np.int16).max -_int32_max = np.iinfo(np.int32).max -_int64_max = np.iinfo(np.int64).max - - -def isnull(obj): - """Detect missing values (NaN in numeric arrays, None/NaN in object arrays) - - Parameters - ---------- - arr : ndarray or object value - Object to check for null-ness - - Returns - ------- - isnulled : array-like of bool or bool - Array or bool indicating whether an object is null or if an array is - given which of the element is null. - - See also - -------- - pandas.notnull: boolean inverse of pandas.isnull - """ - return _isnull(obj) - - -def _isnull_new(obj): - if lib.isscalar(obj): - return lib.checknull(obj) - # hack (for now) because MI registers as ndarray - elif isinstance(obj, pd.MultiIndex): - raise NotImplementedError("isnull is not defined for MultiIndex") - elif isinstance(obj, (gt.ABCSeries, np.ndarray, pd.Index)): - return _isnull_ndarraylike(obj) - elif isinstance(obj, gt.ABCGeneric): - return obj._constructor(obj._data.isnull(func=isnull)) - elif isinstance(obj, list) or hasattr(obj, '__array__'): - return _isnull_ndarraylike(np.asarray(obj)) - else: - return obj is None - - -def _isnull_old(obj): - """Detect missing values. Treat None, NaN, INF, -INF as null. - - Parameters - ---------- - arr: ndarray or object value - - Returns - ------- - boolean ndarray or boolean - """ - if lib.isscalar(obj): - return lib.checknull_old(obj) - # hack (for now) because MI registers as ndarray - elif isinstance(obj, pd.MultiIndex): - raise NotImplementedError("isnull is not defined for MultiIndex") - elif isinstance(obj, (gt.ABCSeries, np.ndarray, pd.Index)): - return _isnull_ndarraylike_old(obj) - elif isinstance(obj, gt.ABCGeneric): - return obj._constructor(obj._data.isnull(func=_isnull_old)) - elif isinstance(obj, list) or hasattr(obj, '__array__'): - return _isnull_ndarraylike_old(np.asarray(obj)) - else: - return obj is None - - -_isnull = _isnull_new - - -def _use_inf_as_null(key): - """Option change callback for null/inf behaviour - Choose which replacement for numpy.isnan / ~numpy.isfinite is used. - - Parameters - ---------- - flag: bool - True means treat None, NaN, INF, -INF as null (old way), - False means None and NaN are null, but INF, -INF are not null - (new way). - - Notes - ----- - This approach to setting global module values is discussed and - approved here: - - * http://stackoverflow.com/questions/4859217/ - programmatically-creating-variables-in-python/4859312#4859312 - """ - flag = get_option(key) - if flag: - globals()['_isnull'] = _isnull_old - else: - globals()['_isnull'] = _isnull_new - - -def _isnull_ndarraylike(obj): - - values = getattr(obj, 'values', obj) - dtype = values.dtype - - if is_string_dtype(dtype): - if is_categorical_dtype(values): - from pandas import Categorical - if not isinstance(values, Categorical): - values = values.values - result = values.isnull() - else: - - # Working around NumPy ticket 1542 - shape = values.shape - - if is_string_like_dtype(dtype): - result = np.zeros(values.shape, dtype=bool) - else: - result = np.empty(shape, dtype=bool) - vec = lib.isnullobj(values.ravel()) - result[...] = vec.reshape(shape) - - elif is_datetimelike(obj): - # this is the NaT pattern - result = values.view('i8') == tslib.iNaT - else: - result = np.isnan(values) - - # box - if isinstance(obj, gt.ABCSeries): - from pandas import Series - result = Series(result, index=obj.index, name=obj.name, copy=False) - - return result - - -def _isnull_ndarraylike_old(obj): - values = getattr(obj, 'values', obj) - dtype = values.dtype - - if is_string_dtype(dtype): - # Working around NumPy ticket 1542 - shape = values.shape - - if is_string_like_dtype(dtype): - result = np.zeros(values.shape, dtype=bool) - else: - result = np.empty(shape, dtype=bool) - vec = lib.isnullobj_old(values.ravel()) - result[:] = vec.reshape(shape) - - elif dtype in _DATELIKE_DTYPES: - # this is the NaT pattern - result = values.view('i8') == tslib.iNaT - else: - result = ~np.isfinite(values) - - # box - if isinstance(obj, gt.ABCSeries): - from pandas import Series - result = Series(result, index=obj.index, name=obj.name, copy=False) - - return result - - -def notnull(obj): - """Replacement for numpy.isfinite / ~numpy.isnan which is suitable for use - on object arrays. - - Parameters - ---------- - arr : ndarray or object value - Object to check for *not*-null-ness - - Returns - ------- - isnulled : array-like of bool or bool - Array or bool indicating whether an object is *not* null or if an array - is given which of the element is *not* null. - - See also - -------- - pandas.isnull : boolean inverse of pandas.notnull - """ - res = isnull(obj) - if lib.isscalar(res): - return not res - return ~res - - -def is_null_datelike_scalar(other): - """ test whether the object is a null datelike, e.g. Nat - but guard against passing a non-scalar """ - if other is pd.NaT or other is None: - return True - elif lib.isscalar(other): - - # a timedelta - if hasattr(other, 'dtype'): - return other.view('i8') == tslib.iNaT - elif is_integer(other) and other == tslib.iNaT: - return True - return isnull(other) - return False - - -def array_equivalent(left, right, strict_nan=False): - """ - True if two arrays, left and right, have equal non-NaN elements, and NaNs - in corresponding locations. False otherwise. It is assumed that left and - right are NumPy arrays of the same dtype. The behavior of this function - (particularly with respect to NaNs) is not defined if the dtypes are - different. - - Parameters - ---------- - left, right : ndarrays - strict_nan : bool, default False - If True, consider NaN and None to be different. - - Returns - ------- - b : bool - Returns True if the arrays are equivalent. - - Examples - -------- - >>> array_equivalent( - ... np.array([1, 2, np.nan]), - ... np.array([1, 2, np.nan])) - True - >>> array_equivalent( - ... np.array([1, np.nan, 2]), - ... np.array([1, 2, np.nan])) - False - """ - - left, right = np.asarray(left), np.asarray(right) - - # shape compat - if left.shape != right.shape: - return False - - # Object arrays can contain None, NaN and NaT. - # string dtypes must be come to this path for NumPy 1.7.1 compat - if is_string_dtype(left) or is_string_dtype(right): - - if not strict_nan: - # pd.isnull considers NaN and None to be equivalent. - return lib.array_equivalent_object(_ensure_object(left.ravel()), - _ensure_object(right.ravel())) - - for left_value, right_value in zip(left, right): - if left_value is tslib.NaT and right_value is not tslib.NaT: - return False - - elif isinstance(left_value, float) and np.isnan(left_value): - if (not isinstance(right_value, float) or - not np.isnan(right_value)): - return False - else: - if left_value != right_value: - return False - return True - - # NaNs can occur in float and complex arrays. - if is_float_dtype(left) or is_complex_dtype(left): - return ((left == right) | (np.isnan(left) & np.isnan(right))).all() - - # numpy will will not allow this type of datetimelike vs integer comparison - elif is_datetimelike_v_numeric(left, right): - return False - - # M8/m8 - elif needs_i8_conversion(left) and needs_i8_conversion(right): - if not is_dtype_equal(left.dtype, right.dtype): - return False - - left = left.view('i8') - right = right.view('i8') - - # NaNs cannot occur otherwise. - try: - return np.array_equal(left, right) - except AttributeError: - # see gh-13388 - # - # NumPy v1.7.1 has a bug in its array_equal - # function that prevents it from correctly - # comparing two arrays with complex dtypes. - # This bug is corrected in v1.8.0, so remove - # this try-except block as soon as we stop - # supporting NumPy versions < 1.8.0 - if not is_dtype_equal(left.dtype, right.dtype): - return False - - left = left.tolist() - right = right.tolist() - - return left == right - - -def _iterable_not_string(x): - return (isinstance(x, collections.Iterable) and - not isinstance(x, compat.string_types)) - - def flatten(l): """Flatten an arbitrarily nested sequence. @@ -398,510 +125,6 @@ def flatten(l): yield el -def _coerce_indexer_dtype(indexer, categories): - """ coerce the indexer input array to the smallest dtype possible """ - l = len(categories) - if l < _int8_max: - return _ensure_int8(indexer) - elif l < _int16_max: - return _ensure_int16(indexer) - elif l < _int32_max: - return _ensure_int32(indexer) - return _ensure_int64(indexer) - - -def _coerce_to_dtypes(result, dtypes): - """ given a dtypes and a result set, coerce the result elements to the - dtypes - """ - if len(result) != len(dtypes): - raise AssertionError("_coerce_to_dtypes requires equal len arrays") - - from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type - - def conv(r, dtype): - try: - if isnull(r): - pass - elif dtype == _NS_DTYPE: - r = lib.Timestamp(r) - elif dtype == _TD_DTYPE: - r = _coerce_scalar_to_timedelta_type(r) - elif dtype == np.bool_: - # messy. non 0/1 integers do not get converted. - if is_integer(r) and r not in [0, 1]: - return int(r) - r = bool(r) - elif dtype.kind == 'f': - r = float(r) - elif dtype.kind == 'i': - r = int(r) - except: - pass - - return r - - return [conv(r, dtype) for r, dtype in zip(result, dtypes)] - - -def _infer_fill_value(val): - """ - infer the fill value for the nan/NaT from the provided - scalar/ndarray/list-like if we are a NaT, return the correct dtyped - element to provide proper block construction - """ - - if not is_list_like(val): - val = [val] - val = np.array(val, copy=False) - if is_datetimelike(val): - return np.array('NaT', dtype=val.dtype) - elif is_object_dtype(val.dtype): - dtype = lib.infer_dtype(_ensure_object(val)) - if dtype in ['datetime', 'datetime64']: - return np.array('NaT', dtype=_NS_DTYPE) - elif dtype in ['timedelta', 'timedelta64']: - return np.array('NaT', dtype=_TD_DTYPE) - return np.nan - - -def _infer_dtype_from_scalar(val): - """ interpret the dtype from a scalar """ - - dtype = np.object_ - - # a 1-element ndarray - if isinstance(val, np.ndarray): - if val.ndim != 0: - raise ValueError( - "invalid ndarray passed to _infer_dtype_from_scalar") - - dtype = val.dtype - val = val.item() - - elif isinstance(val, compat.string_types): - - # If we create an empty array using a string to infer - # the dtype, NumPy will only allocate one character per entry - # so this is kind of bad. Alternately we could use np.repeat - # instead of np.empty (but then you still don't want things - # coming out as np.str_! - - dtype = np.object_ - - elif isinstance(val, (np.datetime64, - datetime)) and getattr(val, 'tzinfo', None) is None: - val = lib.Timestamp(val).value - dtype = np.dtype('M8[ns]') - - elif isinstance(val, (np.timedelta64, timedelta)): - val = lib.Timedelta(val).value - dtype = np.dtype('m8[ns]') - - elif is_bool(val): - dtype = np.bool_ - - elif is_integer(val): - if isinstance(val, np.integer): - dtype = type(val) - else: - dtype = np.int64 - - elif is_float(val): - if isinstance(val, np.floating): - dtype = type(val) - else: - dtype = np.float64 - - elif is_complex(val): - dtype = np.complex_ - - return dtype, val - - -def _is_na_compat(arr, fill_value=np.nan): - """ - Parameters - ---------- - arr: a numpy array - fill_value: fill value, default to np.nan - - Returns - ------- - True if we can fill using this fill_value - """ - dtype = arr.dtype - if isnull(fill_value): - return not (is_bool_dtype(dtype) or - is_integer_dtype(dtype)) - return True - - -def _maybe_fill(arr, fill_value=np.nan): - """ - if we have a compatiable fill_value and arr dtype, then fill - """ - if _is_na_compat(arr, fill_value): - arr.fill(fill_value) - return arr - - -def _maybe_promote(dtype, fill_value=np.nan): - - # if we passed an array here, determine the fill value by dtype - if isinstance(fill_value, np.ndarray): - if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)): - fill_value = tslib.iNaT - else: - - # we need to change to object type as our - # fill_value is of object type - if fill_value.dtype == np.object_: - dtype = np.dtype(np.object_) - fill_value = np.nan - - # returns tuple of (dtype, fill_value) - if issubclass(dtype.type, (np.datetime64, np.timedelta64)): - # for now: refuse to upcast datetime64 - # (this is because datetime64 will not implicitly upconvert - # to object correctly as of numpy 1.6.1) - if isnull(fill_value): - fill_value = tslib.iNaT - else: - if issubclass(dtype.type, np.datetime64): - try: - fill_value = lib.Timestamp(fill_value).value - except: - # the proper thing to do here would probably be to upcast - # to object (but numpy 1.6.1 doesn't do this properly) - fill_value = tslib.iNaT - elif issubclass(dtype.type, np.timedelta64): - try: - fill_value = lib.Timedelta(fill_value).value - except: - # as for datetimes, cannot upcast to object - fill_value = tslib.iNaT - else: - fill_value = tslib.iNaT - elif is_datetimetz(dtype): - if isnull(fill_value): - fill_value = tslib.iNaT - elif is_float(fill_value): - if issubclass(dtype.type, np.bool_): - dtype = np.object_ - elif issubclass(dtype.type, np.integer): - dtype = np.float64 - elif is_bool(fill_value): - if not issubclass(dtype.type, np.bool_): - dtype = np.object_ - elif is_integer(fill_value): - if issubclass(dtype.type, np.bool_): - dtype = np.object_ - elif issubclass(dtype.type, np.integer): - # upcast to prevent overflow - arr = np.asarray(fill_value) - if arr != arr.astype(dtype): - dtype = arr.dtype - elif is_complex(fill_value): - if issubclass(dtype.type, np.bool_): - dtype = np.object_ - elif issubclass(dtype.type, (np.integer, np.floating)): - dtype = np.complex128 - elif fill_value is None: - if is_float_dtype(dtype) or is_complex_dtype(dtype): - fill_value = np.nan - elif is_integer_dtype(dtype): - dtype = np.float64 - fill_value = np.nan - elif is_datetime_or_timedelta_dtype(dtype): - fill_value = tslib.iNaT - else: - dtype = np.object_ - else: - dtype = np.object_ - - # in case we have a string that looked like a number - if is_categorical_dtype(dtype): - pass - elif is_datetimetz(dtype): - pass - elif issubclass(np.dtype(dtype).type, compat.string_types): - dtype = np.object_ - - return dtype, fill_value - - -def _maybe_upcast_putmask(result, mask, other): - """ - A safe version of putmask that potentially upcasts the result - - Parameters - ---------- - result : ndarray - The destination array. This will be mutated in-place if no upcasting is - necessary. - mask : boolean ndarray - other : ndarray or scalar - The source array or value - - Returns - ------- - result : ndarray - changed : boolean - Set to true if the result array was upcasted - """ - - if mask.any(): - # Two conversions for date-like dtypes that can't be done automatically - # in np.place: - # NaN -> NaT - # integer or integer array -> date-like array - if result.dtype in _DATELIKE_DTYPES: - if lib.isscalar(other): - if isnull(other): - other = result.dtype.type('nat') - elif is_integer(other): - other = np.array(other, dtype=result.dtype) - elif is_integer_dtype(other): - other = np.array(other, dtype=result.dtype) - - def changeit(): - - # try to directly set by expanding our array to full - # length of the boolean - try: - om = other[mask] - om_at = om.astype(result.dtype) - if (om == om_at).all(): - new_result = result.values.copy() - new_result[mask] = om_at - result[:] = new_result - return result, False - except: - pass - - # we are forced to change the dtype of the result as the input - # isn't compatible - r, _ = _maybe_upcast(result, fill_value=other, copy=True) - np.place(r, mask, other) - - return r, True - - # we want to decide whether place will work - # if we have nans in the False portion of our mask then we need to - # upcast (possibly), otherwise we DON't want to upcast (e.g. if we - # have values, say integers, in the success portion then it's ok to not - # upcast) - new_dtype, _ = _maybe_promote(result.dtype, other) - if new_dtype != result.dtype: - - # we have a scalar or len 0 ndarray - # and its nan and we are changing some values - if (lib.isscalar(other) or - (isinstance(other, np.ndarray) and other.ndim < 1)): - if isnull(other): - return changeit() - - # we have an ndarray and the masking has nans in it - else: - - if isnull(other[mask]).any(): - return changeit() - - try: - np.place(result, mask, other) - except: - return changeit() - - return result, False - - -def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): - """ provide explict type promotion and coercion - - Parameters - ---------- - values : the ndarray that we want to maybe upcast - fill_value : what we want to fill with - dtype : if None, then use the dtype of the values, else coerce to this type - copy : if True always make a copy even if no upcast is required - """ - - if is_extension_type(values): - if copy: - values = values.copy() - else: - if dtype is None: - dtype = values.dtype - new_dtype, fill_value = _maybe_promote(dtype, fill_value) - if new_dtype != values.dtype: - values = values.astype(new_dtype) - elif copy: - values = values.copy() - - return values, fill_value - - -def _possibly_cast_item(obj, item, dtype): - chunk = obj[item] - - if chunk.values.dtype != dtype: - if dtype in (np.object_, np.bool_): - obj[item] = chunk.astype(np.object_) - elif not issubclass(dtype, (np.integer, np.bool_)): # pragma: no cover - raise ValueError("Unexpected dtype encountered: %s" % dtype) - - -def _possibly_downcast_to_dtype(result, dtype): - """ try to cast to the specified dtype (e.g. convert back to bool/int - or could be an astype of float64->float32 - """ - - if lib.isscalar(result): - return result - - def trans(x): - return x - - if isinstance(dtype, compat.string_types): - if dtype == 'infer': - inferred_type = lib.infer_dtype(_ensure_object(result.ravel())) - if inferred_type == 'boolean': - dtype = 'bool' - elif inferred_type == 'integer': - dtype = 'int64' - elif inferred_type == 'datetime64': - dtype = 'datetime64[ns]' - elif inferred_type == 'timedelta64': - dtype = 'timedelta64[ns]' - - # try to upcast here - elif inferred_type == 'floating': - dtype = 'int64' - if issubclass(result.dtype.type, np.number): - - def trans(x): # noqa - return x.round() - else: - dtype = 'object' - - if isinstance(dtype, compat.string_types): - dtype = np.dtype(dtype) - - try: - - # don't allow upcasts here (except if empty) - if dtype.kind == result.dtype.kind: - if (result.dtype.itemsize <= dtype.itemsize and - np.prod(result.shape)): - return result - - if issubclass(dtype.type, np.floating): - return result.astype(dtype) - elif dtype == np.bool_ or issubclass(dtype.type, np.integer): - - # if we don't have any elements, just astype it - if not np.prod(result.shape): - return trans(result).astype(dtype) - - # do a test on the first element, if it fails then we are done - r = result.ravel() - arr = np.array([r[0]]) - - # if we have any nulls, then we are done - if isnull(arr).any() or not np.allclose(arr, - trans(arr).astype(dtype)): - return result - - # a comparable, e.g. a Decimal may slip in here - elif not isinstance(r[0], (np.integer, np.floating, np.bool, int, - float, bool)): - return result - - if (issubclass(result.dtype.type, (np.object_, np.number)) and - notnull(result).all()): - new_result = trans(result).astype(dtype) - try: - if np.allclose(new_result, result): - return new_result - except: - - # comparison of an object dtype with a number type could - # hit here - if (new_result == result).all(): - return new_result - - # a datetimelike - elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i']: - try: - result = result.astype(dtype) - except: - if dtype.tz: - # convert to datetime and change timezone - result = pd.to_datetime(result).tz_localize(dtype.tz) - - except: - pass - - return result - - -def _maybe_convert_string_to_object(values): - """ - - Convert string-like and string-like array to convert object dtype. - This is to avoid numpy to handle the array as str dtype. - """ - if isinstance(values, string_types): - values = np.array([values], dtype=object) - elif (isinstance(values, np.ndarray) and - issubclass(values.dtype.type, (np.string_, np.unicode_))): - values = values.astype(object) - return values - - -def _maybe_convert_scalar(values): - """ - Convert a python scalar to the appropriate numpy dtype if possible - This avoids numpy directly converting according to platform preferences - """ - if lib.isscalar(values): - dtype, values = _infer_dtype_from_scalar(values) - try: - values = dtype(values) - except TypeError: - pass - return values - - -def _lcd_dtypes(a_dtype, b_dtype): - """ return the lcd dtype to hold these types """ - - if is_datetime64_dtype(a_dtype) or is_datetime64_dtype(b_dtype): - return _NS_DTYPE - elif is_timedelta64_dtype(a_dtype) or is_timedelta64_dtype(b_dtype): - return _TD_DTYPE - elif is_complex_dtype(a_dtype): - if is_complex_dtype(b_dtype): - return a_dtype - return np.float64 - elif is_integer_dtype(a_dtype): - if is_integer_dtype(b_dtype): - if a_dtype.itemsize == b_dtype.itemsize: - return a_dtype - return np.int64 - return np.float64 - elif is_float_dtype(a_dtype): - if is_float_dtype(b_dtype): - if a_dtype.itemsize == b_dtype.itemsize: - return a_dtype - else: - return np.float64 - elif is_integer(b_dtype): - return np.float64 - return np.object - - def _consensus_name_attr(objs): name = objs[0].name for obj in objs[1:]: @@ -909,66 +132,20 @@ def _consensus_name_attr(objs): return None return name -# ---------------------------------------------------------------------- -# Lots of little utilities - - -def _validate_date_like_dtype(dtype): - try: - typ = np.datetime_data(dtype)[0] - except ValueError as e: - raise TypeError('%s' % e) - if typ != 'generic' and typ != 'ns': - raise ValueError('%r is too specific of a frequency, try passing %r' % - (dtype.name, dtype.type.__name__)) - - -def _invalidate_string_dtypes(dtype_set): - """Change string like dtypes to object for - ``DataFrame.select_dtypes()``. - """ - non_string_dtypes = dtype_set - _string_dtypes - if non_string_dtypes != dtype_set: - raise TypeError("string dtypes are not allowed, use 'object' instead") - - -def _get_dtype_from_object(dtype): - """Get a numpy dtype.type-style object. This handles the datetime64[ns] - and datetime64[ns, TZ] compat - - Notes - ----- - If nothing can be found, returns ``object``. - """ - # type object from a dtype - if isinstance(dtype, type) and issubclass(dtype, np.generic): - return dtype - elif is_categorical(dtype): - return gt.CategoricalDtype().type - elif is_datetimetz(dtype): - return gt.DatetimeTZDtype(dtype).type - elif isinstance(dtype, np.dtype): # dtype object - try: - _validate_date_like_dtype(dtype) - except TypeError: - # should still pass if we don't have a datelike - pass - return dtype.type - elif isinstance(dtype, compat.string_types): - if dtype == 'datetime' or dtype == 'timedelta': - dtype += '64' - - try: - return _get_dtype_from_object(getattr(np, dtype)) - except (AttributeError, TypeError): - # handles cases like _get_dtype(int) - # i.e., python objects that are valid dtypes (unlike user-defined - # types, in general) - # TypeError handles the float16 typecode of 'e' - # further handle internal types - pass - return _get_dtype_from_object(np.dtype(dtype)) +def _maybe_match_name(a, b): + a_has = hasattr(a, 'name') + b_has = hasattr(b, 'name') + if a_has and b_has: + if a.name == b.name: + return a.name + else: + return None + elif a_has: + return a.name + elif b_has: + return b.name + return None def _get_info_slice(obj, indexer): @@ -1005,225 +182,8 @@ def _maybe_box_datetimelike(value): _values_from_object = lib.values_from_object -def _possibly_castable(arr): - # return False to force a non-fastpath - - # check datetime64[ns]/timedelta64[ns] are valid - # otherwise try to coerce - kind = arr.dtype.kind - if kind == 'M' or kind == 'm': - return arr.dtype in _DATELIKE_DTYPES - - return arr.dtype.name not in _POSSIBLY_CAST_DTYPES - - -def _possibly_convert_platform(values): - """ try to do platform conversion, allow ndarray or list here """ - - if isinstance(values, (list, tuple)): - values = lib.list_to_object_array(values) - if getattr(values, 'dtype', None) == np.object_: - if hasattr(values, '_values'): - values = values._values - values = lib.maybe_convert_objects(values) - - return values - - -def _possibly_cast_to_datetime(value, dtype, errors='raise'): - """ try to cast the array/value to a datetimelike dtype, converting float - nan to iNaT - """ - from pandas.tseries.timedeltas import to_timedelta - from pandas.tseries.tools import to_datetime - - if dtype is not None: - if isinstance(dtype, compat.string_types): - dtype = np.dtype(dtype) - - is_datetime64 = is_datetime64_dtype(dtype) - is_datetime64tz = is_datetime64tz_dtype(dtype) - is_timedelta64 = is_timedelta64_dtype(dtype) - - if is_datetime64 or is_datetime64tz or is_timedelta64: - - # force the dtype if needed - if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if dtype.name == 'datetime64[ns]': - dtype = _NS_DTYPE - else: - raise TypeError("cannot convert datetimelike to " - "dtype [%s]" % dtype) - elif is_datetime64tz: - - # our NaT doesn't support tz's - # this will coerce to DatetimeIndex with - # a matching dtype below - if lib.isscalar(value) and isnull(value): - value = [value] - - elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype.name == 'timedelta64[ns]': - dtype = _TD_DTYPE - else: - raise TypeError("cannot convert timedeltalike to " - "dtype [%s]" % dtype) - - if lib.isscalar(value): - if value == tslib.iNaT or isnull(value): - value = tslib.iNaT - else: - value = np.array(value, copy=False) - - # have a scalar array-like (e.g. NaT) - if value.ndim == 0: - value = tslib.iNaT - - # we have an array of datetime or timedeltas & nulls - elif np.prod(value.shape) or not is_dtype_equal(value.dtype, - dtype): - try: - if is_datetime64: - value = to_datetime(value, errors=errors)._values - elif is_datetime64tz: - # input has to be UTC at this point, so just - # localize - value = to_datetime( - value, - errors=errors).tz_localize(dtype.tz) - elif is_timedelta64: - value = to_timedelta(value, errors=errors)._values - except (AttributeError, ValueError, TypeError): - pass - - # coerce datetimelike to object - elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype): - if is_object_dtype(dtype): - ints = np.asarray(value).view('i8') - return tslib.ints_to_pydatetime(ints) - - # we have a non-castable dtype that was passed - raise TypeError('Cannot cast datetime64 to %s' % dtype) - - else: - - is_array = isinstance(value, np.ndarray) - - # catch a datetime/timedelta that is not of ns variety - # and no coercion specified - if is_array and value.dtype.kind in ['M', 'm']: - dtype = value.dtype - - if dtype.kind == 'M' and dtype != _NS_DTYPE: - value = value.astype(_NS_DTYPE) - - elif dtype.kind == 'm' and dtype != _TD_DTYPE: - value = to_timedelta(value) - - # only do this if we have an array and the dtype of the array is not - # setup already we are not an integer/object, so don't bother with this - # conversion - elif not (is_array and not (issubclass(value.dtype.type, np.integer) or - value.dtype == np.object_)): - value = _possibly_infer_to_datetimelike(value) - - return value - - -def _possibly_infer_to_datetimelike(value, convert_dates=False): - """ - we might have an array (or single object) that is datetime like, - and no dtype is passed don't change the value unless we find a - datetime/timedelta set - - this is pretty strict in that a datetime/timedelta is REQUIRED - in addition to possible nulls/string likes - - ONLY strings are NOT datetimelike - - Parameters - ---------- - value : np.array / Series / Index / list-like - convert_dates : boolean, default False - if True try really hard to convert dates (such as datetime.date), other - leave inferred dtype 'date' alone - - """ - - if isinstance(value, (gt.ABCDatetimeIndex, gt.ABCPeriodIndex)): - return value - elif isinstance(value, gt.ABCSeries): - if isinstance(value._values, gt.ABCDatetimeIndex): - return value._values - - v = value - if not is_list_like(v): - v = [v] - v = np.array(v, copy=False) - shape = v.shape - if not v.ndim == 1: - v = v.ravel() - - if len(v): - - def _try_datetime(v): - # safe coerce to datetime64 - try: - v = tslib.array_to_datetime(v, errors='raise') - except ValueError: - - # we might have a sequence of the same-datetimes with tz's - # if so coerce to a DatetimeIndex; if they are not the same, - # then these stay as object dtype - try: - from pandas import to_datetime - return to_datetime(v) - except: - pass - - except: - pass - - return v.reshape(shape) - - def _try_timedelta(v): - # safe coerce to timedelta64 - - # will try first with a string & object conversion - from pandas.tseries.timedeltas import to_timedelta - try: - return to_timedelta(v)._values.reshape(shape) - except: - return v - - # do a quick inference for perf - sample = v[:min(3, len(v))] - inferred_type = lib.infer_dtype(sample) - - if (inferred_type in ['datetime', 'datetime64'] or - (convert_dates and inferred_type in ['date'])): - value = _try_datetime(v) - elif inferred_type in ['timedelta', 'timedelta64']: - value = _try_timedelta(v) - - # It's possible to have nulls intermixed within the datetime or - # timedelta. These will in general have an inferred_type of 'mixed', - # so have to try both datetime and timedelta. - - # try timedelta first to avoid spurious datetime conversions - # e.g. '00:00:01' is a timedelta but technically is also a datetime - elif inferred_type in ['mixed']: - - if lib.is_possible_datetimelike_array(_ensure_object(v)): - value = _try_timedelta(v) - if lib.infer_dtype(value) in ['mixed']: - value = _try_datetime(v) - - return value - - def is_bool_indexer(key): - if isinstance(key, (gt.ABCSeries, np.ndarray)): + if isinstance(key, (ABCSeries, np.ndarray)): if key.dtype == np.object_: key = np.asarray(_values_from_object(key)) @@ -1250,12 +210,6 @@ def _default_index(n): return RangeIndex(0, n, name=None) -def ensure_float(arr): - if issubclass(arr.dtype.type, (np.integer, np.bool_)): - arr = arr.astype(float) - return arr - - def _mut_exclusive(**kwargs): item1, item2 = kwargs.items() label1, val1 = item1 @@ -1287,6 +241,10 @@ def _all_not_none(*args): return True +def _count_not_none(*args): + return sum(x is not None for x in args) + + def _try_sort(iterable): listed = list(iterable) try: @@ -1295,10 +253,6 @@ def _try_sort(iterable): return listed -def _count_not_none(*args): - return sum(x is not None for x in args) - - def iterpairs(seq): """ Parameters @@ -1451,349 +405,6 @@ def _maybe_make_list(obj): return [obj] return obj -# TYPE TESTING - -is_bool = lib.is_bool - -is_integer = lib.is_integer - -is_float = lib.is_float - -is_complex = lib.is_complex - - -def is_string_like(obj): - return isinstance(obj, (compat.text_type, compat.string_types)) - - -def is_iterator(obj): - # python 3 generators have __next__ instead of next - return hasattr(obj, 'next') or hasattr(obj, '__next__') - - -def is_number(obj): - return isinstance(obj, (numbers.Number, np.number)) - - -def is_period_arraylike(arr): - """ return if we are period arraylike / PeriodIndex """ - if isinstance(arr, pd.PeriodIndex): - return True - elif isinstance(arr, (np.ndarray, gt.ABCSeries)): - return arr.dtype == object and lib.infer_dtype(arr) == 'period' - return getattr(arr, 'inferred_type', None) == 'period' - - -def is_datetime_arraylike(arr): - """ return if we are datetime arraylike / DatetimeIndex """ - if isinstance(arr, gt.ABCDatetimeIndex): - return True - elif isinstance(arr, (np.ndarray, gt.ABCSeries)): - return arr.dtype == object and lib.infer_dtype(arr) == 'datetime' - return getattr(arr, 'inferred_type', None) == 'datetime' - - -def is_datetimelike(arr): - return (arr.dtype in _DATELIKE_DTYPES or - isinstance(arr, gt.ABCPeriodIndex) or - is_datetimetz(arr)) - - -def _coerce_to_dtype(dtype): - """ coerce a string / np.dtype to a dtype """ - if is_categorical_dtype(dtype): - dtype = gt.CategoricalDtype() - elif is_datetime64tz_dtype(dtype): - dtype = gt.DatetimeTZDtype(dtype) - else: - dtype = np.dtype(dtype) - return dtype - - -def _get_dtype(arr_or_dtype): - if isinstance(arr_or_dtype, np.dtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, type): - return np.dtype(arr_or_dtype) - elif isinstance(arr_or_dtype, gt.CategoricalDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, gt.DatetimeTZDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, compat.string_types): - if is_categorical_dtype(arr_or_dtype): - return gt.CategoricalDtype.construct_from_string(arr_or_dtype) - elif is_datetime64tz_dtype(arr_or_dtype): - return gt.DatetimeTZDtype.construct_from_string(arr_or_dtype) - - if hasattr(arr_or_dtype, 'dtype'): - arr_or_dtype = arr_or_dtype.dtype - return np.dtype(arr_or_dtype) - - -def _get_dtype_type(arr_or_dtype): - if isinstance(arr_or_dtype, np.dtype): - return arr_or_dtype.type - elif isinstance(arr_or_dtype, type): - return np.dtype(arr_or_dtype).type - elif isinstance(arr_or_dtype, gt.CategoricalDtype): - return gt.CategoricalDtypeType - elif isinstance(arr_or_dtype, gt.DatetimeTZDtype): - return gt.DatetimeTZDtypeType - elif isinstance(arr_or_dtype, compat.string_types): - if is_categorical_dtype(arr_or_dtype): - return gt.CategoricalDtypeType - elif is_datetime64tz_dtype(arr_or_dtype): - return gt.DatetimeTZDtypeType - return _get_dtype_type(np.dtype(arr_or_dtype)) - try: - return arr_or_dtype.dtype.type - except AttributeError: - return type(None) - - -def is_dtype_equal(source, target): - """ return a boolean if the dtypes are equal """ - try: - source = _get_dtype(source) - target = _get_dtype(target) - return source == target - except (TypeError, AttributeError): - - # invalid comparison - # object == category will hit this - return False - - -def is_any_int_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.integer) - - -def is_integer_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.integer) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_int64_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.int64) - - -def is_int_or_datetime_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.integer) or - issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_datetime64_dtype(arr_or_dtype): - try: - tipo = _get_dtype_type(arr_or_dtype) - except TypeError: - return False - return issubclass(tipo, np.datetime64) - - -def is_datetime64tz_dtype(arr_or_dtype): - return gt.DatetimeTZDtype.is_dtype(arr_or_dtype) - - -def is_datetime64_any_dtype(arr_or_dtype): - return (is_datetime64_dtype(arr_or_dtype) or - is_datetime64tz_dtype(arr_or_dtype)) - - -def is_datetime64_ns_dtype(arr_or_dtype): - try: - tipo = _get_dtype(arr_or_dtype) - except TypeError: - return False - return tipo == _NS_DTYPE - - -def is_timedelta64_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.timedelta64) - - -def is_timedelta64_ns_dtype(arr_or_dtype): - tipo = _get_dtype(arr_or_dtype) - return tipo == _TD_DTYPE - - -def is_datetime_or_timedelta_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, (np.datetime64, np.timedelta64)) - - -def is_numeric_v_string_like(a, b): - """ - numpy doesn't like to compare numeric arrays vs scalar string-likes - - return a boolean result if this is the case for a,b or b,a - - """ - is_a_array = isinstance(a, np.ndarray) - is_b_array = isinstance(b, np.ndarray) - - is_a_numeric_array = is_a_array and is_numeric_dtype(a) - is_b_numeric_array = is_b_array and is_numeric_dtype(b) - is_a_string_array = is_a_array and is_string_like_dtype(a) - is_b_string_array = is_b_array and is_string_like_dtype(b) - - is_a_scalar_string_like = not is_a_array and is_string_like(a) - is_b_scalar_string_like = not is_b_array and is_string_like(b) - - return ((is_a_numeric_array and is_b_scalar_string_like) or - (is_b_numeric_array and is_a_scalar_string_like) or - (is_a_numeric_array and is_b_string_array) or - (is_b_numeric_array and is_a_string_array)) - - -def is_datetimelike_v_numeric(a, b): - # return if we have an i8 convertible and numeric comparison - if not hasattr(a, 'dtype'): - a = np.asarray(a) - if not hasattr(b, 'dtype'): - b = np.asarray(b) - - def is_numeric(x): - return is_integer_dtype(x) or is_float_dtype(x) - - is_datetimelike = needs_i8_conversion - return ((is_datetimelike(a) and is_numeric(b)) or - (is_datetimelike(b) and is_numeric(a))) - - -def is_datetimelike_v_object(a, b): - # return if we have an i8 convertible and object comparsion - if not hasattr(a, 'dtype'): - a = np.asarray(a) - if not hasattr(b, 'dtype'): - b = np.asarray(b) - - def f(x): - return is_object_dtype(x) - - def is_object(x): - return is_integer_dtype(x) or is_float_dtype(x) - - is_datetimelike = needs_i8_conversion - return ((is_datetimelike(a) and is_object(b)) or - (is_datetimelike(b) and is_object(a))) - - -def needs_i8_conversion(arr_or_dtype): - return (is_datetime_or_timedelta_dtype(arr_or_dtype) or - is_datetime64tz_dtype(arr_or_dtype)) - - -def is_numeric_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, (np.number, np.bool_)) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_string_dtype(arr_or_dtype): - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('O', 'S', 'U') - - -def is_string_like_dtype(arr_or_dtype): - # exclude object as its a mixed dtype - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('S', 'U') - - -def is_float_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.floating) - - -def is_floating_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return isinstance(tipo, np.floating) - - -def is_bool_dtype(arr_or_dtype): - try: - tipo = _get_dtype_type(arr_or_dtype) - except ValueError: - # this isn't even a dtype - return False - return issubclass(tipo, np.bool_) - - -def is_sparse(array): - """ return if we are a sparse array """ - return isinstance(array, (gt.ABCSparseArray, gt.ABCSparseSeries)) - - -def is_datetimetz(array): - """ return if we are a datetime with tz array """ - return ((isinstance(array, gt.ABCDatetimeIndex) and - getattr(array, 'tz', None) is not None) or - is_datetime64tz_dtype(array)) - - -def is_extension_type(value): - """ - if we are a klass that is preserved by the internals - these are internal klasses that we represent (and don't use a np.array) - """ - if is_categorical(value): - return True - elif is_sparse(value): - return True - elif is_datetimetz(value): - return True - return False - - -def is_categorical(array): - """ return if we are a categorical possibility """ - return isinstance(array, gt.ABCCategorical) or is_categorical_dtype(array) - - -def is_categorical_dtype(arr_or_dtype): - return gt.CategoricalDtype.is_dtype(arr_or_dtype) - - -def is_complex_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.complexfloating) - - -def is_object_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.object_) - - -def is_re(obj): - return isinstance(obj, re._pattern_type) - - -def is_re_compilable(obj): - try: - re.compile(obj) - except TypeError: - return False - else: - return True - - -def is_list_like(arg): - return (hasattr(arg, '__iter__') and - not isinstance(arg, compat.string_and_binary_types)) - - -def is_dict_like(arg): - return hasattr(arg, '__getitem__') and hasattr(arg, 'keys') - - -def is_named_tuple(arg): - return isinstance(arg, tuple) and hasattr(arg, '_fields') - def is_null_slice(obj): """ we have a null slice """ @@ -1807,47 +418,6 @@ def is_full_slice(obj, l): obj.step is None) -def is_hashable(arg): - """Return True if hash(arg) will succeed, False otherwise. - - Some types will pass a test against collections.Hashable but fail when they - are actually hashed with hash(). - - Distinguish between these and other types by trying the call to hash() and - seeing if they raise TypeError. - - Examples - -------- - >>> a = ([],) - >>> isinstance(a, collections.Hashable) - True - >>> is_hashable(a) - False - """ - # unfortunately, we can't use isinstance(arg, collections.Hashable), which - # can be faster than calling hash, because numpy scalars on Python 3 fail - # this test - - # reconsider this decision once this numpy bug is fixed: - # https://github.com/numpy/numpy/issues/5562 - - try: - hash(arg) - except TypeError: - return False - else: - return True - - -def is_sequence(x): - try: - iter(x) - len(x) # it has a length - return not isinstance(x, compat.string_and_binary_types) - except (TypeError, AttributeError): - return False - - def _get_callable_name(obj): # typical case has name if hasattr(obj, '__name__'): @@ -1875,74 +445,6 @@ def _apply_if_callable(maybe_callable, obj, **kwargs): return maybe_callable -_string_dtypes = frozenset(map(_get_dtype_from_object, (compat.binary_type, - compat.text_type))) - -_ensure_float64 = algos.ensure_float64 -_ensure_float32 = algos.ensure_float32 -_ensure_int64 = algos.ensure_int64 -_ensure_int32 = algos.ensure_int32 -_ensure_int16 = algos.ensure_int16 -_ensure_int8 = algos.ensure_int8 -_ensure_platform_int = algos.ensure_platform_int -_ensure_object = algos.ensure_object - - -def _astype_nansafe(arr, dtype, copy=True): - """ return a view if copy is False, but - need to be very careful as the result shape could change! """ - if not isinstance(dtype, np.dtype): - dtype = _coerce_to_dtype(dtype) - - if issubclass(dtype.type, compat.text_type): - # in Py3 that's str, in Py2 that's unicode - return lib.astype_unicode(arr.ravel()).reshape(arr.shape) - elif issubclass(dtype.type, compat.string_types): - return lib.astype_str(arr.ravel()).reshape(arr.shape) - elif is_datetime64_dtype(arr): - if dtype == object: - return tslib.ints_to_pydatetime(arr.view(np.int64)) - elif dtype == np.int64: - return arr.view(dtype) - elif dtype != _NS_DTYPE: - raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % - (arr.dtype, dtype)) - return arr.astype(_NS_DTYPE) - elif is_timedelta64_dtype(arr): - if dtype == np.int64: - return arr.view(dtype) - elif dtype == object: - return tslib.ints_to_pytimedelta(arr.view(np.int64)) - - # in py3, timedelta64[ns] are int64 - elif ((compat.PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or - (not compat.PY3 and dtype != _TD_DTYPE)): - - # allow frequency conversions - if dtype.kind == 'm': - mask = isnull(arr) - result = arr.astype(dtype).astype(np.float64) - result[mask] = np.nan - return result - - raise TypeError("cannot astype a timedelta from [%s] to [%s]" % - (arr.dtype, dtype)) - - return arr.astype(_TD_DTYPE) - elif (np.issubdtype(arr.dtype, np.floating) and - np.issubdtype(dtype, np.integer)): - - if np.isnan(arr).any(): - raise ValueError('Cannot convert NA to integer') - elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): - # work around NumPy brokenness, #1987 - return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) - - if copy: - return arr.astype(dtype) - return arr.view(dtype) - - def _all_none(*args): for arg in args: if arg is not None: @@ -1988,6 +490,9 @@ class Sentinel(object): return Sentinel() +# ---------------------------------------------------------------------- +# Detect our environment + def in_interactive_session(): """ check if we're running in an interactive shell @@ -2055,21 +560,6 @@ def in_ipython_frontend(): return False -def _maybe_match_name(a, b): - a_has = hasattr(a, 'name') - b_has = hasattr(b, 'name') - if a_has and b_has: - if a.name == b.name: - return a.name - else: - return None - elif a_has: - return a.name - elif b_has: - return b.name - return None - - def _random_state(state=None): """ Helper function for processing random_state arguments. diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 3ca2c6cd014bc..5cbc968f06fa7 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -366,7 +366,7 @@ def mpl_style_cb(key): def use_inf_as_null_cb(key): - from pandas.core.common import _use_inf_as_null + from pandas.types.missing import _use_inf_as_null _use_inf_as_null(key) with cf.config_prefix('mode'): diff --git a/pandas/core/convert.py b/pandas/core/convert.py deleted file mode 100644 index 7f4fe73c688f8..0000000000000 --- a/pandas/core/convert.py +++ /dev/null @@ -1,127 +0,0 @@ -""" -Functions for converting object to other types -""" - -import numpy as np - -import pandas as pd -from pandas.core.common import (_possibly_cast_to_datetime, is_object_dtype, - isnull) -import pandas.lib as lib - - -# TODO: Remove in 0.18 or 2017, which ever is sooner -def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, - convert_timedeltas=True, copy=True): - """ if we have an object dtype, try to coerce dates and/or numbers """ - - # if we have passed in a list or scalar - if isinstance(values, (list, tuple)): - values = np.array(values, dtype=np.object_) - if not hasattr(values, 'dtype'): - values = np.array([values], dtype=np.object_) - - # convert dates - if convert_dates and values.dtype == np.object_: - - # we take an aggressive stance and convert to datetime64[ns] - if convert_dates == 'coerce': - new_values = _possibly_cast_to_datetime(values, 'M8[ns]', - errors='coerce') - - # if we are all nans then leave me alone - if not isnull(new_values).all(): - values = new_values - - else: - values = lib.maybe_convert_objects(values, - convert_datetime=convert_dates) - - # convert timedeltas - if convert_timedeltas and values.dtype == np.object_: - - if convert_timedeltas == 'coerce': - from pandas.tseries.timedeltas import to_timedelta - new_values = to_timedelta(values, coerce=True) - - # if we are all nans then leave me alone - if not isnull(new_values).all(): - values = new_values - - else: - values = lib.maybe_convert_objects( - values, convert_timedelta=convert_timedeltas) - - # convert to numeric - if values.dtype == np.object_: - if convert_numeric: - try: - new_values = lib.maybe_convert_numeric(values, set(), - coerce_numeric=True) - - # if we are all nans then leave me alone - if not isnull(new_values).all(): - values = new_values - - except: - pass - else: - # soft-conversion - values = lib.maybe_convert_objects(values) - - values = values.copy() if copy else values - - return values - - -def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, - coerce=False, copy=True): - """ if we have an object dtype, try to coerce dates and/or numbers """ - - conversion_count = sum((datetime, numeric, timedelta)) - if conversion_count == 0: - raise ValueError('At least one of datetime, numeric or timedelta must ' - 'be True.') - elif conversion_count > 1 and coerce: - raise ValueError("Only one of 'datetime', 'numeric' or " - "'timedelta' can be True when when coerce=True.") - - if isinstance(values, (list, tuple)): - # List or scalar - values = np.array(values, dtype=np.object_) - elif not hasattr(values, 'dtype'): - values = np.array([values], dtype=np.object_) - elif not is_object_dtype(values.dtype): - # If not object, do not attempt conversion - values = values.copy() if copy else values - return values - - # If 1 flag is coerce, ensure 2 others are False - if coerce: - # Immediate return if coerce - if datetime: - return pd.to_datetime(values, errors='coerce', box=False) - elif timedelta: - return pd.to_timedelta(values, errors='coerce', box=False) - elif numeric: - return pd.to_numeric(values, errors='coerce') - - # Soft conversions - if datetime: - values = lib.maybe_convert_objects(values, convert_datetime=datetime) - - if timedelta and is_object_dtype(values.dtype): - # Object check to ensure only run if previous did not convert - values = lib.maybe_convert_objects(values, convert_timedelta=timedelta) - - if numeric and is_object_dtype(values.dtype): - try: - converted = lib.maybe_convert_numeric(values, set(), - coerce_numeric=True) - # If all NaNs, then do not-alter - values = converted if not isnull(converted).all() else values - values = values.copy() if copy else values - except: - pass - - return values diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e01fc6dca6be3..334526b424be5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,12 +23,43 @@ import numpy as np import numpy.ma as ma -from pandas.core.common import ( - isnull, notnull, PandasError, _try_sort, _default_index, _maybe_upcast, - is_sequence, _infer_dtype_from_scalar, _values_from_object, is_list_like, - _maybe_box_datetimelike, is_categorical_dtype, is_object_dtype, - is_extension_type, is_datetimetz, _possibly_infer_to_datetimelike, - _dict_compat) +from pandas.types.cast import (_maybe_upcast, + _infer_dtype_from_scalar, + _possibly_cast_to_datetime, + _possibly_infer_to_datetimelike, + _possibly_convert_platform, + _possibly_downcast_to_dtype, + _invalidate_string_dtypes, + _coerce_to_dtypes, + _maybe_upcast_putmask) +from pandas.types.common import (is_categorical_dtype, + is_object_dtype, + is_extension_type, + is_datetimetz, + is_datetime64_dtype, + is_bool_dtype, + is_integer_dtype, + is_float_dtype, + is_integer, + is_scalar, + needs_i8_conversion, + _get_dtype_from_object, + _lcd_dtypes, + _ensure_float, + _ensure_float64, + _ensure_int64, + _ensure_platform_int, + is_list_like, + is_iterator, + is_sequence, + is_named_tuple) +from pandas.types.missing import isnull, notnull + +from pandas.core.common import (PandasError, _try_sort, + _default_index, + _values_from_object, + _maybe_box_datetimelike, + _dict_compat) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, @@ -268,7 +299,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, data = list(data) if len(data) > 0: if is_list_like(data[0]) and getattr(data[0], 'ndim', 1) == 1: - if com.is_named_tuple(data[0]) and columns is None: + if is_named_tuple(data[0]) and columns is None: columns = data[0]._fields arrays, columns = _to_arrays(data, columns, dtype=dtype) columns = _ensure_index(columns) @@ -940,7 +971,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, if columns is not None: columns = _ensure_index(columns) - if com.is_iterator(data): + if is_iterator(data): if nrows == 0: return cls() @@ -1051,7 +1082,7 @@ def to_records(self, index=True, convert_datetime64=True): y : recarray """ if index: - if com.is_datetime64_dtype(self.index) and convert_datetime64: + if is_datetime64_dtype(self.index) and convert_datetime64: ix_vals = [self.index.to_pydatetime()] else: if isinstance(self.index, MultiIndex): @@ -1920,7 +1951,7 @@ def _ixs(self, i, axis=0): copy = True else: new_values = self._data.fast_xs(i) - if lib.isscalar(new_values): + if is_scalar(new_values): return new_values # if we are a copy, mark as such @@ -2072,7 +2103,7 @@ def _getitem_multilevel(self, key): return self._get_item_cache(key) def _getitem_frame(self, key): - if key.values.size and not com.is_bool_dtype(key.values): + if key.values.size and not is_bool_dtype(key.values): raise ValueError('Must pass DataFrame with boolean values only') return self.where(key) @@ -2289,7 +2320,7 @@ def select_dtypes(self, include=None, exclude=None): 5 False """ include, exclude = include or (), exclude or () - if not (com.is_list_like(include) and com.is_list_like(exclude)): + if not (is_list_like(include) and is_list_like(exclude)): raise TypeError('include and exclude must both be non-string' ' sequences') selection = tuple(map(frozenset, (include, exclude))) @@ -2300,9 +2331,9 @@ def select_dtypes(self, include=None, exclude=None): # convert the myriad valid dtypes object to a single representation include, exclude = map( - lambda x: frozenset(map(com._get_dtype_from_object, x)), selection) + lambda x: frozenset(map(_get_dtype_from_object, x)), selection) for dtypes in (include, exclude): - com._invalidate_string_dtypes(dtypes) + _invalidate_string_dtypes(dtypes) # can't both include AND exclude! if not include.isdisjoint(exclude): @@ -2392,7 +2423,7 @@ def _setitem_array(self, key, value): def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 - if key.values.size and not com.is_bool_dtype(key.values): + if key.values.size and not is_bool_dtype(key.values): raise TypeError('Must pass DataFrame with boolean values only') self._check_inplace_setting(value) @@ -2586,7 +2617,7 @@ def reindexer(value): value = _sanitize_index(value, self.index, copy=False) if not isinstance(value, (np.ndarray, Index)): if isinstance(value, list) and len(value) > 0: - value = com._possibly_convert_platform(value) + value = _possibly_convert_platform(value) else: value = com._asarray_tuplesafe(value) elif value.ndim == 2: @@ -2602,7 +2633,7 @@ def reindexer(value): # upcast the scalar dtype, value = _infer_dtype_from_scalar(value) value = np.repeat(value, len(self.index)).astype(dtype) - value = com._possibly_cast_to_datetime(value, dtype) + value = _possibly_cast_to_datetime(value, dtype) # return internal types directly if is_extension_type(value): @@ -2916,8 +2947,8 @@ def _maybe_casted_values(index, labels=None): mask = labels == -1 values = values.take(labels) if mask.any(): - values, changed = com._maybe_upcast_putmask(values, mask, - np.nan) + values, changed = _maybe_upcast_putmask(values, mask, + np.nan) return values new_index = _default_index(len(new_obj)) @@ -3131,14 +3162,14 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, raise ValueError('When sorting by column, axis must be 0 (rows)') if not isinstance(by, list): by = [by] - if com.is_sequence(ascending) and len(by) != len(ascending): + if is_sequence(ascending) and len(by) != len(ascending): raise ValueError('Length of ascending (%d) != length of by (%d)' % (len(ascending), len(by))) if len(by) > 1: from pandas.core.groupby import _lexsort_indexer def trans(v): - if com.needs_i8_conversion(v): + if needs_i8_conversion(v): return v.view('i8') return v @@ -3151,7 +3182,7 @@ def trans(v): keys.append(trans(k)) indexer = _lexsort_indexer(keys, orders=ascending, na_position=na_position) - indexer = com._ensure_platform_int(indexer) + indexer = _ensure_platform_int(indexer) else: from pandas.core.groupby import _nargsort @@ -3320,7 +3351,7 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False, inplace=inplace, sort_remaining=sort_remaining) def _nsorted(self, columns, n, method, keep): - if not com.is_list_like(columns): + if not is_list_like(columns): columns = [columns] columns = list(columns) ser = getattr(self[columns[0]], method)(n, keep=keep) @@ -3658,28 +3689,28 @@ def combine(self, other, func, fill_value=None, overwrite=True): # if we have different dtypes, possibily promote new_dtype = this_dtype if this_dtype != other_dtype: - new_dtype = com._lcd_dtypes(this_dtype, other_dtype) + new_dtype = _lcd_dtypes(this_dtype, other_dtype) series = series.astype(new_dtype) otherSeries = otherSeries.astype(new_dtype) # see if we need to be represented as i8 (datetimelike) # try to keep us at this dtype - needs_i8_conversion = com.needs_i8_conversion(new_dtype) - if needs_i8_conversion: + needs_i8_conversion_i = needs_i8_conversion(new_dtype) + if needs_i8_conversion_i: this_dtype = new_dtype arr = func(series, otherSeries, True) else: arr = func(series, otherSeries) if do_fill: - arr = com.ensure_float(arr) + arr = _ensure_float(arr) arr[this_mask & other_mask] = NA # try to downcast back to the original dtype - if needs_i8_conversion: - arr = com._possibly_cast_to_datetime(arr, this_dtype) + if needs_i8_conversion_i: + arr = _possibly_cast_to_datetime(arr, this_dtype) else: - arr = com._possibly_downcast_to_dtype(arr, this_dtype) + arr = _possibly_downcast_to_dtype(arr, this_dtype) result[col] = arr @@ -4581,7 +4612,7 @@ def _dict_round(df, decimals): yield vals def _series_round(s, decimals): - if com.is_integer_dtype(s) or com.is_float_dtype(s): + if is_integer_dtype(s) or is_float_dtype(s): return s.round(decimals) return s @@ -4592,7 +4623,7 @@ def _series_round(s, decimals): if not decimals.index.is_unique: raise ValueError("Index of decimals must be unique") new_cols = [col for col in _dict_round(self, decimals)] - elif com.is_integer(decimals): + elif is_integer(decimals): # Dispatch to Series.round new_cols = [_series_round(v, decimals) for _, v in self.iteritems()] @@ -4634,14 +4665,14 @@ def corr(self, method='pearson', min_periods=1): mat = numeric_df.values if method == 'pearson': - correl = _algos.nancorr(com._ensure_float64(mat), minp=min_periods) + correl = _algos.nancorr(_ensure_float64(mat), minp=min_periods) elif method == 'spearman': - correl = _algos.nancorr_spearman(com._ensure_float64(mat), + correl = _algos.nancorr_spearman(_ensure_float64(mat), minp=min_periods) else: if min_periods is None: min_periods = 1 - mat = com._ensure_float64(mat).T + mat = _ensure_float64(mat).T corrf = nanops.get_corr_func(method) K = len(cols) correl = np.empty((K, K), dtype=float) @@ -4696,7 +4727,7 @@ def cov(self, min_periods=None): baseCov = np.cov(mat.T) baseCov = baseCov.reshape((len(cols), len(cols))) else: - baseCov = _algos.nancorr(com._ensure_float64(mat), cov=True, + baseCov = _algos.nancorr(_ensure_float64(mat), cov=True, minp=min_periods) return self._constructor(baseCov, index=cols, columns=cols) @@ -4825,7 +4856,7 @@ def _count_level(self, level, axis=0, numeric_only=False): level = count_axis._get_level_number(level) level_index = count_axis.levels[level] - labels = com._ensure_int64(count_axis.labels[level]) + labels = _ensure_int64(count_axis.labels[level]) counts = lib.count_level_2d(mask, labels, len(level_index), axis=0) result = DataFrame(counts, index=level_index, columns=agg_axis) @@ -4906,7 +4937,7 @@ def f(x): # try to coerce to the original dtypes item by item if we can if axis == 0: - result = com._coerce_to_dtypes(result, self.dtypes) + result = _coerce_to_dtypes(result, self.dtypes) return Series(result, index=labels) @@ -5376,13 +5407,13 @@ def _prep_ndarray(values, copy=True): return np.empty((0, 0), dtype=object) def convert(v): - return com._possibly_convert_platform(v) + return _possibly_convert_platform(v) # we could have a 1-dim or 2-dim list here # this is equiv of np.asarray, but does object conversion # and platform dtype preservation try: - if com.is_list_like(values[0]) or hasattr(values[0], 'len'): + if is_list_like(values[0]) or hasattr(values[0], 'len'): values = np.array([convert(v) for v in values]) else: values = convert(values) @@ -5570,7 +5601,7 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): def convert(arr): if dtype != object and dtype != np.object: arr = lib.maybe_convert_objects(arr, try_float=coerce_float) - arr = com._possibly_cast_to_datetime(arr, dtype) + arr = _possibly_cast_to_datetime(arr, dtype) return arr arrays = [convert(arr) for arr in content] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b4bcae47cbbdf..d6e6f571be53a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8,6 +8,29 @@ import pandas.lib as lib import pandas as pd + + +from pandas.types.common import (_coerce_to_dtype, + _ensure_int64, + needs_i8_conversion, + is_scalar, + is_integer, is_bool, + is_bool_dtype, + is_numeric_dtype, + is_datetime64_dtype, + is_timedelta64_dtype, + is_list_like, + is_dict_like, + is_re_compilable) +from pandas.types.cast import _maybe_promote, _maybe_upcast_putmask +from pandas.types.missing import isnull, notnull +from pandas.types.generic import ABCSeries, ABCPanel + +from pandas.core.common import (_values_from_object, + _maybe_box_datetimelike, + SettingWithCopyError, SettingWithCopyWarning, + AbstractMethodError) + from pandas.core.base import PandasObject from pandas.core.index import (Index, MultiIndex, _ensure_index, InvalidIndexError) @@ -25,11 +48,6 @@ from pandas.compat.numpy import function as nv from pandas.compat import (map, zip, lrange, string_types, isidentifier, set_function_name) -from pandas.core.common import (isnull, notnull, is_list_like, - _values_from_object, _maybe_promote, - _maybe_box_datetimelike, ABCSeries, - SettingWithCopyError, SettingWithCopyWarning, - AbstractMethodError) import pandas.core.nanops as nanops from pandas.util.decorators import Appender, Substitution, deprecate_kwarg from pandas.core import config @@ -46,10 +64,6 @@ Name or list of names which refer to the axis items.""") -def is_dictlike(x): - return isinstance(x, (dict, com.ABCSeries)) - - def _single_replace(self, to_replace, method, inplace, limit): if self.ndim != 1: raise TypeError('cannot replace {0} with method {1} on a {2}' @@ -116,7 +130,7 @@ def _validate_dtype(self, dtype): """ validate the passed dtype """ if dtype is not None: - dtype = com._coerce_to_dtype(dtype) + dtype = _coerce_to_dtype(dtype) # a compound dtype if dtype.kind == 'V': @@ -310,7 +324,7 @@ def _from_axes(cls, data, axes, **kwargs): def _get_axis_number(self, axis): axis = self._AXIS_ALIASES.get(axis, axis) - if com.is_integer(axis): + if is_integer(axis): if axis in self._AXIS_NAMES: return axis else: @@ -717,8 +731,8 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): 1 2 5 2 3 6 """ - non_mapper = lib.isscalar(mapper) or (com.is_list_like(mapper) and not - com.is_dict_like(mapper)) + non_mapper = is_scalar(mapper) or (is_list_like(mapper) and not + is_dict_like(mapper)) if non_mapper: return self._set_axis_name(mapper, axis=axis) else: @@ -912,7 +926,7 @@ def bool(self): v = self.squeeze() if isinstance(v, (bool, np.bool_)): return bool(v) - elif lib.isscalar(v): + elif is_scalar(v): raise ValueError("bool cannot act on a non-boolean single element " "{0}".format(self.__class__.__name__)) @@ -1764,10 +1778,10 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True): else: return self.take(loc, axis=axis, convert=True) - if not lib.isscalar(loc): + if not is_scalar(loc): new_index = self.index[loc] - if lib.isscalar(loc): + if is_scalar(loc): new_values = self._data.fast_xs(loc) # may need to box a datelike-scalar @@ -2340,7 +2354,7 @@ def _reindex_with_indexers(self, reindexers, fill_value=np.nan, copy=False, index = _ensure_index(index) if indexer is not None: - indexer = com._ensure_int64(indexer) + indexer = _ensure_int64(indexer) # TODO: speed up on homogeneous DataFrame objects new_data = new_data.reindex_indexer(index, indexer, axis=baxis, @@ -3202,10 +3216,10 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, return self if self.ndim == 1: - if isinstance(value, (dict, com.ABCSeries)): + if isinstance(value, (dict, ABCSeries)): from pandas import Series value = Series(value) - elif not com.is_list_like(value): + elif not is_list_like(value): pass else: raise ValueError("invalid fill value with a %s" % @@ -3215,7 +3229,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, inplace=inplace, downcast=downcast) - elif isinstance(value, (dict, com.ABCSeries)): + elif isinstance(value, (dict, ABCSeries)): if axis == 1: raise NotImplementedError('Currently only can fill ' 'with dict/Series column ' @@ -3228,7 +3242,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, obj = result[k] obj.fillna(v, limit=limit, inplace=True) return result - elif not com.is_list_like(value): + elif not is_list_like(value): new_data = self._data.fillna(value=value, limit=limit, inplace=inplace, downcast=downcast) @@ -3354,7 +3368,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, and play with this method to gain intuition about how it works. """ - if not com.is_bool(regex) and to_replace is not None: + if not is_bool(regex) and to_replace is not None: raise AssertionError("'to_replace' must be 'None' if 'regex' is " "not a bool") if axis is not None: @@ -3367,15 +3381,15 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, if value is None: # passing a single value that is scalar like # when value is None (GH5319), for compat - if not is_dictlike(to_replace) and not is_dictlike(regex): + if not is_dict_like(to_replace) and not is_dict_like(regex): to_replace = [to_replace] if isinstance(to_replace, (tuple, list)): return _single_replace(self, to_replace, method, inplace, limit) - if not is_dictlike(to_replace): - if not is_dictlike(regex): + if not is_dict_like(to_replace): + if not is_dict_like(regex): raise TypeError('If "to_replace" and "value" are both None' ' and "to_replace" is not a list, then ' 'regex must be a mapping') @@ -3385,7 +3399,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, items = list(compat.iteritems(to_replace)) keys, values = zip(*items) - are_mappings = [is_dictlike(v) for v in values] + are_mappings = [is_dict_like(v) for v in values] if any(are_mappings): if not all(are_mappings): @@ -3418,8 +3432,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, return self new_data = self._data - if is_dictlike(to_replace): - if is_dictlike(value): # {'A' : NA} -> {'A' : 0} + if is_dict_like(to_replace): + if is_dict_like(value): # {'A' : NA} -> {'A' : 0} res = self if inplace else self.copy() for c, src in compat.iteritems(to_replace): if c in value and c in self: @@ -3429,7 +3443,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, return None if inplace else res # {'A': NA} -> 0 - elif not com.is_list_like(value): + elif not is_list_like(value): for k, src in compat.iteritems(to_replace): if k in self: new_data = new_data.replace(to_replace=src, @@ -3441,8 +3455,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, raise TypeError('value argument must be scalar, dict, or ' 'Series') - elif com.is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] - if com.is_list_like(value): + elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] + if is_list_like(value): if len(to_replace) != len(value): raise ValueError('Replacement lists must match ' 'in length. Expecting %d got %d ' % @@ -3458,8 +3472,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, value=value, inplace=inplace, regex=regex) elif to_replace is None: - if not (com.is_re_compilable(regex) or - com.is_list_like(regex) or is_dictlike(regex)): + if not (is_re_compilable(regex) or + is_list_like(regex) or is_dict_like(regex)): raise TypeError("'regex' must be a string or a compiled " "regular expression or a list or dict of " "strings or regular expressions, you " @@ -3470,7 +3484,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, else: # dest iterable dict-like - if is_dictlike(value): # NA -> {'A' : 0, 'B' : -1} + if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} new_data = self._data for k, v in compat.iteritems(value): @@ -3480,7 +3494,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, inplace=inplace, regex=regex) - elif not com.is_list_like(value): # NA -> 0 + elif not is_list_like(value): # NA -> 0 new_data = self._data.replace(to_replace=to_replace, value=value, inplace=inplace, regex=regex) @@ -3792,14 +3806,14 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): 3 0.230930 0.000000 4 1.100000 0.570967 """ - if isinstance(self, com.ABCPanel): + if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") axis = nv.validate_clip_with_axis(axis, args, kwargs) # GH 2747 (arguments were reversed) if lower is not None and upper is not None: - if lib.isscalar(lower) and lib.isscalar(upper): + if is_scalar(lower) and is_scalar(upper): lower, upper = min(lower, upper), max(lower, upper) result = self @@ -4485,10 +4499,12 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, new_other = np.array(other, dtype=self.dtype) except ValueError: new_other = np.array(other) + except TypeError: + new_other = other # we can end up comparing integers and m8[ns] # which is a numpy no no - is_i8 = com.needs_i8_conversion(self.dtype) + is_i8 = needs_i8_conversion(self.dtype) if is_i8: matches = False else: @@ -4497,7 +4513,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, if matches is False or not matches.all(): # coerce other to a common dtype if we can - if com.needs_i8_conversion(self.dtype): + if needs_i8_conversion(self.dtype): try: other = np.array(other, dtype=self.dtype) except: @@ -4550,7 +4566,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, dtype, fill_value = _maybe_promote(other.dtype) new_other = np.empty(len(icond), dtype=dtype) new_other.fill(fill_value) - com._maybe_upcast_putmask(new_other, icond, other) + _maybe_upcast_putmask(new_other, icond, other) other = new_other else: @@ -5058,7 +5074,7 @@ def describe_categorical_1d(data): if result[1] > 0: top, freq = objcounts.index[0], objcounts.iloc[0] - if com.is_datetime64_dtype(data): + if is_datetime64_dtype(data): asint = data.dropna().values.view('i8') names += ['top', 'freq', 'first', 'last'] result += [lib.Timestamp(top), freq, @@ -5071,11 +5087,11 @@ def describe_categorical_1d(data): return pd.Series(result, index=names, name=data.name) def describe_1d(data): - if com.is_bool_dtype(data): + if is_bool_dtype(data): return describe_categorical_1d(data) - elif com.is_numeric_dtype(data): + elif is_numeric_dtype(data): return describe_numeric_1d(data) - elif com.is_timedelta64_dtype(data): + elif is_timedelta64_dtype(data): return describe_numeric_1d(data) else: return describe_categorical_1d(data) @@ -5162,7 +5178,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1) if freq is None: - mask = com.isnull(_values_from_object(self)) + mask = isnull(_values_from_object(self)) np.putmask(rs.values, mask, np.nan) return rs diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 077acc1e81444..6179857978b7b 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -13,6 +13,25 @@ from pandas import compat from pandas.compat.numpy import function as nv from pandas.compat.numpy import _np_version_under1p8 + +from pandas.types.common import (_DATELIKE_DTYPES, + is_numeric_dtype, + is_timedelta64_dtype, is_datetime64_dtype, + is_categorical_dtype, + is_datetime_or_timedelta_dtype, + is_bool, is_integer_dtype, + is_complex_dtype, + is_bool_dtype, + is_scalar, + _ensure_float64, + _ensure_platform_int, + _ensure_int64, + _ensure_object, + _ensure_float) +from pandas.types.cast import _possibly_downcast_to_dtype +from pandas.types.missing import isnull, notnull, _maybe_fill + +from pandas.core.common import _values_from_object, AbstractMethodError from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError) from pandas.core.categorical import Categorical @@ -30,14 +49,7 @@ import pandas.core.algorithms as algos import pandas.core.common as com -from pandas.core.common import(_possibly_downcast_to_dtype, isnull, - notnull, _DATELIKE_DTYPES, is_numeric_dtype, - is_timedelta64_dtype, is_datetime64_dtype, - is_categorical_dtype, _values_from_object, - is_datetime_or_timedelta_dtype, is_bool, - is_bool_dtype, AbstractMethodError, - _maybe_fill) -from pandas.core.config import option_context, is_callable +from pandas.core.config import option_context import pandas.lib as lib from pandas.lib import Timestamp import pandas.tslib as tslib @@ -662,7 +674,7 @@ def apply(self, func, *args, **kwargs): # resolve functions to their callable functions prior, this # wouldn't be needed if args or kwargs: - if is_callable(func): + if callable(func): @wraps(func) def f(g): @@ -752,7 +764,7 @@ def _try_cast(self, result, obj): else: dtype = obj.dtype - if not lib.isscalar(result): + if not is_scalar(result): result = _possibly_downcast_to_dtype(result, dtype) return result @@ -817,7 +829,7 @@ def _python_agg_general(self, func, *args, **kwargs): # since we are masking, make sure that we have a float object values = result if is_numeric_dtype(values.dtype): - values = com.ensure_float(values) + values = _ensure_float(values) output[name] = self._try_cast(values[mask], result) @@ -1595,7 +1607,7 @@ def size(self): """ ids, _, ngroup = self.group_info - ids = com._ensure_platform_int(ids) + ids = _ensure_platform_int(ids) out = np.bincount(ids[ids != -1], minlength=ngroup or None) return Series(out, index=self.result_index, dtype='int64') @@ -1631,7 +1643,7 @@ def group_info(self): comp_ids, obs_group_ids = self._get_compressed_labels() ngroups = len(obs_group_ids) - comp_ids = com._ensure_int64(comp_ids) + comp_ids = _ensure_int64(comp_ids) return comp_ids, obs_group_ids, ngroups def _get_compressed_labels(self): @@ -1671,7 +1683,7 @@ def get_group_levels(self): name_list = [] for ping, labels in zip(self.groupings, self.recons_labels): - labels = com._ensure_platform_int(labels) + labels = _ensure_platform_int(labels) levels = ping.group_index.take(labels) name_list.append(levels) @@ -1780,11 +1792,11 @@ def _cython_operation(self, kind, values, how, axis): values = values.view('int64') is_numeric = True elif is_bool_dtype(values.dtype): - values = _algos.ensure_float64(values) - elif com.is_integer_dtype(values): + values = _ensure_float64(values) + elif is_integer_dtype(values): values = values.astype('int64', copy=False) - elif is_numeric and not com.is_complex_dtype(values): - values = _algos.ensure_float64(values) + elif is_numeric and not is_complex_dtype(values): + values = _ensure_float64(values) else: values = values.astype(object) @@ -1793,7 +1805,7 @@ def _cython_operation(self, kind, values, how, axis): kind, how, values, is_numeric) except NotImplementedError: if is_numeric: - values = _algos.ensure_float64(values) + values = _ensure_float64(values) func, dtype_str = self._get_cython_function( kind, how, values, is_numeric) else: @@ -1821,7 +1833,7 @@ def _cython_operation(self, kind, values, how, axis): result = self._transform( result, accum, values, labels, func, is_numeric) - if com.is_integer_dtype(result): + if is_integer_dtype(result): if len(result[result == tslib.iNaT]) > 0: result = result.astype('float64') result[result == tslib.iNaT] = np.nan @@ -1834,7 +1846,7 @@ def _cython_operation(self, kind, values, how, axis): result, (counts > 0).view(np.uint8)) except ValueError: result = lib.row_bool_subset_object( - com._ensure_object(result), + _ensure_object(result), (counts > 0).view(np.uint8)) else: result = result[counts > 0] @@ -1996,7 +2008,7 @@ def generate_bins_generic(values, binner, closed): class BinGrouper(BaseGrouper): def __init__(self, bins, binlabels, filter_empty=False, mutated=False): - self.bins = com._ensure_int64(bins) + self.bins = _ensure_int64(bins) self.binlabels = _ensure_index(binlabels) self._filter_empty_groups = filter_empty self.mutated = mutated @@ -2061,7 +2073,7 @@ def group_info(self): obs_group_ids = np.arange(ngroups) rep = np.diff(np.r_[0, self.bins]) - rep = com._ensure_platform_int(rep) + rep = _ensure_platform_int(rep) if ngroups == len(self.bins): comp_ids = np.repeat(np.arange(ngroups), rep) else: @@ -2449,7 +2461,7 @@ def is_in_obj(gpr): def _is_label_like(val): return (isinstance(val, compat.string_types) or - (val is not None and lib.isscalar(val))) + (val is not None and is_scalar(val))) def _convert_grouper(axis, grouper): @@ -2671,7 +2683,7 @@ def _aggregate_multiple_funcs(self, arg, _level): results[name] = obj.aggregate(func) if isinstance(list(compat.itervalues(results))[0], - com.ABCDataFrame): + DataFrame): # let higher level handle if _level: @@ -2870,9 +2882,9 @@ def nunique(self, dropna=True): 'val.dtype must be object, got %s' % val.dtype val, _ = algos.factorize(val, sort=False) sorter = np.lexsort((val, ids)) - isnull = lambda a: a == -1 + _isnull = lambda a: a == -1 else: - isnull = com.isnull + _isnull = isnull ids, val = ids[sorter], val[sorter] @@ -2882,7 +2894,7 @@ def nunique(self, dropna=True): inc = np.r_[1, val[1:] != val[:-1]] # 1st item of each group is a new unique observation - mask = isnull(val) + mask = _isnull(val) if dropna: inc[idx] = 1 inc[mask] = 0 @@ -2998,8 +3010,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, mi = MultiIndex(levels=levels, labels=labels, names=names, verify_integrity=False) - if com.is_integer_dtype(out): - out = com._ensure_int64(out) + if is_integer_dtype(out): + out = _ensure_int64(out) return Series(out, index=mi, name=self.name) # for compat. with algos.value_counts need to ensure every @@ -3029,8 +3041,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, mi = MultiIndex(levels=levels, labels=labels, names=names, verify_integrity=False) - if com.is_integer_dtype(out): - out = com._ensure_int64(out) + if is_integer_dtype(out): + out = _ensure_int64(out) return Series(out, index=mi, name=self.name) def count(self): @@ -3039,7 +3051,7 @@ def count(self): val = self.obj.get_values() mask = (ids != -1) & ~isnull(val) - ids = com._ensure_platform_int(ids) + ids = _ensure_platform_int(ids) out = np.bincount(ids[mask], minlength=ngroups or None) return Series(out, @@ -3616,7 +3628,7 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa pass # interpret the result of the filter - if is_bool(res) or (lib.isscalar(res) and isnull(res)): + if is_bool(res) or (is_scalar(res) and isnull(res)): if res and notnull(res): indices.append(self._get_index(name)) else: @@ -3813,7 +3825,7 @@ def count(self): """ Compute count of group, excluding missing values """ from functools import partial from pandas.lib import count_level_2d - from pandas.core.common import _isnull_ndarraylike as isnull + from pandas.types.missing import _isnull_ndarraylike as isnull data, _ = self._get_data_to_aggregate() ids, _, ngroups = self.grouper.group_info @@ -3934,7 +3946,7 @@ class DataSplitter(object): def __init__(self, data, labels, ngroups, axis=0): self.data = data - self.labels = com._ensure_int64(labels) + self.labels = _ensure_int64(labels) self.ngroups = ngroups self.axis = axis @@ -4115,7 +4127,7 @@ def loop(labels, shape): def maybe_lift(lab, size): # pormote nan values return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) - labels = map(com._ensure_int64, labels) + labels = map(_ensure_int64, labels) if not xnull: labels, shape = map(list, zip(*map(maybe_lift, labels, shape))) @@ -4331,9 +4343,9 @@ def _get_group_index_sorter(group_index, ngroups): alpha = 0.0 # taking complexities literally; there may be beta = 1.0 # some room for fine-tuning these parameters if alpha + beta * ngroups < count * np.log(count): - sorter, _ = _algos.groupsort_indexer(com._ensure_int64(group_index), + sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index), ngroups) - return com._ensure_platform_int(sorter) + return _ensure_platform_int(sorter) else: return group_index.argsort(kind='mergesort') @@ -4348,7 +4360,7 @@ def _compress_group_index(group_index, sort=True): size_hint = min(len(group_index), _hash._SIZE_HINT_LIMIT) table = _hash.Int64HashTable(size_hint) - group_index = com._ensure_int64(group_index) + group_index = _ensure_int64(group_index) # note, group labels come out ascending (ie, 1,2,3 etc) comp_ids, obs_group_ids = table.get_labels_groupby(group_index) @@ -4390,7 +4402,7 @@ def _groupby_indices(values): _, counts = _hash.value_count_scalar64(codes, False) else: reverse, codes, counts = _algos.group_labels( - _values_from_object(com._ensure_object(values))) + _values_from_object(_ensure_object(values))) return _algos.groupby_indices(reverse, codes, counts) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9485f50ed07f1..0cba8308c1c53 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,17 +1,24 @@ # pylint: disable=W0223 -from pandas.core.index import Index, MultiIndex +import numpy as np from pandas.compat import range, zip import pandas.compat as compat +from pandas.types.generic import ABCDataFrame, ABCPanel, ABCSeries +from pandas.types.common import (is_integer_dtype, + is_integer, is_float, + is_categorical_dtype, + is_list_like, + is_sequence, + is_scalar, + _ensure_platform_int) +from pandas.types.missing import isnull, _infer_fill_value + +from pandas.core.index import Index, MultiIndex + import pandas.core.common as com -import pandas.lib as lib -from pandas.core.common import (is_bool_indexer, is_integer_dtype, - _asarray_tuplesafe, is_list_like, isnull, - is_null_slice, is_full_slice, ABCSeries, - ABCDataFrame, ABCPanel, is_float, - _values_from_object, _infer_fill_value, - is_integer) -import numpy as np +from pandas.core.common import (is_bool_indexer, _asarray_tuplesafe, + is_null_slice, is_full_slice, + _values_from_object) # the supported indexers @@ -67,7 +74,7 @@ def __getitem__(self, key): key = tuple(com._apply_if_callable(x, self.obj) for x in key) try: values = self.obj.get_value(*key) - if lib.isscalar(values): + if is_scalar(values): return values except Exception: pass @@ -625,7 +632,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False): # we have a frame, with multiple indexers on both axes; and a # series, so need to broadcast (see GH5206) if (sum_aligners == self.ndim and - all([com.is_sequence(_) for _ in indexer])): + all([is_sequence(_) for _ in indexer])): ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values # single indexer @@ -639,7 +646,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False): ax = obj.axes[i] # multiple aligners (or null slices) - if com.is_sequence(idx) or isinstance(idx, slice): + if is_sequence(idx) or isinstance(idx, slice): if single_aligner and is_null_slice(idx): continue new_ix = ax[idx] @@ -685,7 +692,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False): return ser - elif lib.isscalar(indexer): + elif is_scalar(indexer): ax = self.obj._get_axis(1) if ser.index.equals(ax): @@ -710,7 +717,7 @@ def _align_frame(self, indexer, df): sindexers = [] for i, ix in enumerate(indexer): ax = self.obj.axes[i] - if com.is_sequence(ix) or isinstance(ix, slice): + if is_sequence(ix) or isinstance(ix, slice): if idx is None: idx = ax[ix].ravel() elif cols is None: @@ -761,7 +768,7 @@ def _align_frame(self, indexer, df): val = df.reindex(index=ax)._values return val - elif lib.isscalar(indexer) and is_panel: + elif is_scalar(indexer) and is_panel: idx = self.obj.axes[1] cols = self.obj.axes[2] @@ -857,7 +864,7 @@ def _convert_for_reindex(self, key, axis=0): keyarr = _asarray_tuplesafe(key) if is_integer_dtype(keyarr) and not labels.is_integer(): - keyarr = com._ensure_platform_int(keyarr) + keyarr = _ensure_platform_int(keyarr) return labels.take(keyarr) return keyarr @@ -968,7 +975,7 @@ def _getitem_nested_tuple(self, tup): axis += 1 # if we have a scalar, we are done - if lib.isscalar(obj) or not hasattr(obj, 'ndim'): + if is_scalar(obj) or not hasattr(obj, 'ndim'): break # has the dim of the obj changed? @@ -1038,7 +1045,7 @@ def _getitem_iterable(self, key, axis=0): # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) - if com.is_categorical_dtype(labels): + if is_categorical_dtype(labels): keyarr = labels._shallow_copy(keyarr) # have the index handle the indexer and possibly return @@ -1799,7 +1806,7 @@ def check_bool_indexer(ax, key): result = key if isinstance(key, ABCSeries) and not key.index.equals(ax): result = result.reindex(ax) - mask = com.isnull(result._values) + mask = isnull(result._values) if mask.any(): raise IndexingError('Unalignable boolean Series key provided') @@ -1941,9 +1948,9 @@ def _non_reducing_slice(slice_): def pred(part): # true when slice does *not* reduce - return isinstance(part, slice) or com.is_list_like(part) + return isinstance(part, slice) or is_list_like(part) - if not com.is_list_like(slice_): + if not is_list_like(slice_): if not isinstance(slice_, slice): # a 1-d slice, like df.loc[1] slice_ = [[slice_]] diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1ea567f15cb7f..363ac8249eb06 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -10,29 +10,48 @@ from pandas.core.base import PandasObject -from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE, - _TD_DTYPE, ABCSeries, is_list_like, - _infer_dtype_from_scalar, is_null_slice, - is_dtype_equal, is_null_datelike_scalar, - _maybe_promote, is_timedelta64_dtype, - is_datetime64_dtype, is_datetimetz, is_sparse, - array_equivalent, _is_na_compat, - _maybe_convert_string_to_object, - _maybe_convert_scalar, - is_categorical, is_datetimelike_v_numeric, - is_numeric_v_string_like, is_extension_type) +from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype +from pandas.types.common import (_TD_DTYPE, _NS_DTYPE, + _ensure_int64, _ensure_platform_int, + is_integer, + is_dtype_equal, + is_timedelta64_dtype, + is_datetime64_dtype, is_datetimetz, is_sparse, + is_categorical, is_categorical_dtype, + is_integer_dtype, + is_datetime64tz_dtype, + is_object_dtype, + is_datetimelike_v_numeric, + is_numeric_v_string_like, is_extension_type, + is_list_like, + is_re, + is_re_compilable, + is_scalar, + _get_dtype) +from pandas.types.cast import (_possibly_downcast_to_dtype, + _maybe_convert_string_to_object, + _maybe_upcast, + _maybe_convert_scalar, _maybe_promote, + _infer_dtype_from_scalar, + _soft_convert_objects, + _possibly_convert_objects, + _astype_nansafe) +from pandas.types.missing import (isnull, array_equivalent, + _is_na_compat, + is_null_datelike_scalar) +import pandas.types.concat as _concat + +from pandas.types.generic import ABCSeries +from pandas.core.common import is_null_slice import pandas.core.algorithms as algos -from pandas.types.api import DatetimeTZDtype from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer from pandas.core.categorical import Categorical, maybe_to_categorical from pandas.tseries.index import DatetimeIndex from pandas.formats.printing import pprint_thing -import pandas.core.common as com -import pandas.types.concat as _concat + import pandas.core.missing as missing -import pandas.core.convert as convert from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib import pandas.tslib as tslib @@ -112,8 +131,8 @@ def is_categorical_astype(self, dtype): validate that we have a astypeable to categorical, returns a boolean if we are a categorical """ - if com.is_categorical_dtype(dtype): - if dtype == com.CategoricalDtype(): + if is_categorical_dtype(dtype): + if dtype == CategoricalDtype(): return True # this is a pd.Categorical, but is not @@ -137,7 +156,7 @@ def get_values(self, dtype=None): return an internal format, currently just the ndarray this is often overriden to handle to_dense like operations """ - if com.is_object_dtype(dtype): + if is_object_dtype(dtype): return self.values.astype(object) return self.values @@ -481,7 +500,7 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None, values = self.get_values(dtype=dtype) # _astype_nansafe works fine with 1-d only - values = com._astype_nansafe(values.ravel(), dtype, copy=True) + values = _astype_nansafe(values.ravel(), dtype, copy=True) values = values.reshape(self.shape) newb = make_block(values, placement=self.mgr_locs, dtype=dtype, @@ -651,7 +670,7 @@ def setitem(self, indexer, value, mgr=None): # cast the values to a type that can hold nan (if necessary) if not self._can_hold_element(value): - dtype, _ = com._maybe_promote(arr_value.dtype) + dtype, _ = _maybe_promote(arr_value.dtype) values = values.astype(dtype) transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) @@ -684,7 +703,7 @@ def _is_scalar_indexer(indexer): if arr_value.ndim == 1: if not isinstance(indexer, tuple): indexer = tuple([indexer]) - return all([lib.isscalar(idx) for idx in indexer]) + return all([is_scalar(idx) for idx in indexer]) return False def _is_empty_indexer(indexer): @@ -724,7 +743,7 @@ def _is_empty_indexer(indexer): if hasattr(value, 'dtype') and is_dtype_equal(values.dtype, value.dtype): dtype = value.dtype - elif lib.isscalar(value): + elif is_scalar(value): dtype, _ = _infer_dtype_from_scalar(value) else: dtype = 'infer' @@ -838,7 +857,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, n = np.array(new) # type of the new block - dtype, _ = com._maybe_promote(n.dtype) + dtype, _ = _maybe_promote(n.dtype) # we need to explicitly astype here to make a copy n = n.astype(dtype) @@ -1027,7 +1046,7 @@ def shift(self, periods, axis=0, mgr=None): # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also - new_values, fill_value = com._maybe_upcast(self.values) + new_values, fill_value = _maybe_upcast(self.values) # make sure array sent to np.roll is c_contiguous f_ordered = new_values.flags.f_contiguous @@ -1036,7 +1055,7 @@ def shift(self, periods, axis=0, mgr=None): axis = new_values.ndim - axis - 1 if np.prod(new_values.shape): - new_values = np.roll(new_values, com._ensure_platform_int(periods), + new_values = np.roll(new_values, _ensure_platform_int(periods), axis=axis) axis_indexer = [slice(None)] * self.ndim @@ -1306,7 +1325,7 @@ def quantile(self, qs, interpolation='linear', axis=0, mgr=None): from pandas import Float64Index is_empty = values.shape[axis] == 0 - if com.is_list_like(qs): + if is_list_like(qs): ax = Float64Index(qs) if is_empty: @@ -1350,7 +1369,7 @@ def quantile(self, qs, interpolation='linear', axis=0, mgr=None): ndim = getattr(result, 'ndim', None) or 0 result = self._try_coerce_result(result) - if lib.isscalar(result): + if is_scalar(result): return ax, self.make_block_scalar(result) return ax, make_block(result, placement=np.arange(len(result)), @@ -1591,7 +1610,7 @@ def _can_hold_element(self, element): tipo = element.dtype.type return (issubclass(tipo, np.integer) and not issubclass(tipo, (np.datetime64, np.timedelta64))) - return com.is_integer(element) + return is_integer(element) def _try_cast(self, element): try: @@ -1600,7 +1619,7 @@ def _try_cast(self, element): return element def should_store(self, value): - return com.is_integer_dtype(value) and value.dtype == self.dtype + return is_integer_dtype(value) and value.dtype == self.dtype class DatetimeLikeBlockMixin(object): @@ -1621,7 +1640,7 @@ def get_values(self, dtype=None): """ return object dtype as boxed values, such as Timestamps/Timedelta """ - if com.is_object_dtype(dtype): + if is_object_dtype(dtype): return lib.map_infer(self.values.ravel(), self._box_func).reshape(self.values.shape) return self.values @@ -1641,7 +1660,7 @@ def fillna(self, value, **kwargs): # allow filling with integers to be # interpreted as seconds - if not isinstance(value, np.timedelta64) and com.is_integer(value): + if not isinstance(value, np.timedelta64) and is_integer(value): value = Timedelta(value, unit='s') return super(TimeDeltaBlock, self).fillna(value, **kwargs) @@ -1795,10 +1814,10 @@ def convert(self, *args, **kwargs): new_style |= kw in kwargs if new_style: - fn = convert._soft_convert_objects + fn = _soft_convert_objects fn_inputs = new_inputs else: - fn = convert._possibly_convert_objects + fn = _possibly_convert_objects fn_inputs = ['convert_dates', 'convert_numeric', 'convert_timedeltas'] fn_inputs += ['copy'] @@ -1884,15 +1903,15 @@ def should_store(self, value): def replace(self, to_replace, value, inplace=False, filter=None, regex=False, convert=True, mgr=None): - to_rep_is_list = com.is_list_like(to_replace) - value_is_list = com.is_list_like(value) + to_rep_is_list = is_list_like(to_replace) + value_is_list = is_list_like(value) both_lists = to_rep_is_list and value_is_list either_list = to_rep_is_list or value_is_list result_blocks = [] blocks = [self] - if not either_list and com.is_re(to_replace): + if not either_list and is_re(to_replace): return self._replace_single(to_replace, value, inplace=inplace, filter=filter, regex=True, convert=convert, mgr=mgr) @@ -1930,10 +1949,10 @@ def replace(self, to_replace, value, inplace=False, filter=None, def _replace_single(self, to_replace, value, inplace=False, filter=None, regex=False, convert=True, mgr=None): # to_replace is regex compilable - to_rep_re = regex and com.is_re_compilable(to_replace) + to_rep_re = regex and is_re_compilable(to_replace) # regex is regex compilable - regex_re = com.is_re_compilable(regex) + regex_re = is_re_compilable(regex) # only one will survive if to_rep_re and regex_re: @@ -2046,7 +2065,7 @@ def _try_coerce_result(self, result): # GH12564: CategoricalBlock is 1-dim only # while returned results could be any dim - if ((not com.is_categorical_dtype(result)) and + if ((not is_categorical_dtype(result)) and isinstance(result, np.ndarray)): result = _block_shape(result, ndim=self.ndim) @@ -2151,7 +2170,7 @@ def _astype(self, dtype, mgr=None, **kwargs): """ # if we are passed a datetime64[ns, tz] - if com.is_datetime64tz_dtype(dtype): + if is_datetime64tz_dtype(dtype): dtype = DatetimeTZDtype(dtype) values = self.values @@ -2167,7 +2186,7 @@ def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) return element.dtype == _NS_DTYPE or element.dtype == np.int64 - return (com.is_integer(element) or isinstance(element, datetime) or + return (is_integer(element) or isinstance(element, datetime) or isnull(element)) def _try_cast(self, element): @@ -2209,7 +2228,7 @@ def _try_coerce_args(self, values, other): "naive Block") other_mask = isnull(other) other = other.asm8.view('i8') - elif hasattr(other, 'dtype') and com.is_integer_dtype(other): + elif hasattr(other, 'dtype') and is_integer_dtype(other): other = other.view('i8') else: try: @@ -2315,7 +2334,7 @@ def external_values(self): def get_values(self, dtype=None): # return object dtype as Timestamps with the zones - if com.is_object_dtype(dtype): + if is_object_dtype(dtype): f = lambda x: lib.Timestamp(x, tz=self.values.tz) return lib.map_infer( self.values.ravel(), f).reshape(self.values.shape) @@ -2561,7 +2580,7 @@ def shift(self, periods, axis=0, mgr=None): new_values = self.values.to_dense().take(indexer) # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also - new_values, fill_value = com._maybe_upcast(new_values) + new_values, fill_value = _maybe_upcast(new_values) if periods > 0: new_values[:periods] = fill_value else: @@ -3491,7 +3510,7 @@ def get(self, item, fastpath=True): indexer = np.arange(len(self.items))[isnull(self.items)] # allow a single nan location indexer - if not lib.isscalar(indexer): + if not is_scalar(indexer): if len(indexer) == 1: loc = indexer.item() else: @@ -3823,7 +3842,7 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: if allow_fill and fill_tuple[0] is None: - _, fill_value = com._maybe_promote(blk.dtype) + _, fill_value = _maybe_promote(blk.dtype) fill_tuple = (fill_value, ) return [blk.take_nd(slobj, axis=0, @@ -3881,7 +3900,7 @@ def _make_na_block(self, placement, fill_value=None): block_shape = list(self.shape) block_shape[0] = len(placement) - dtype, fill_value = com._infer_dtype_from_scalar(fill_value) + dtype, fill_value = _infer_dtype_from_scalar(fill_value) block_values = np.empty(block_shape, dtype=dtype) block_values.fill(fill_value) return make_block(block_values, placement=placement) @@ -4560,7 +4579,7 @@ def _possibly_compare(a, b, op): else: result = op(a, b) - if lib.isscalar(result) and (is_a_array or is_b_array): + if is_scalar(result) and (is_a_array or is_b_array): type_names = [type(a).__name__, type(b).__name__] if is_a_array: @@ -4611,7 +4630,7 @@ def _factor_indexer(shape, labels): expanded label indexer """ mult = np.array(shape)[::-1].cumprod()[::-1] - return com._ensure_platform_int( + return _ensure_platform_int( np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T) @@ -4631,7 +4650,7 @@ def _get_blkno_placements(blknos, blk_count, group=True): """ - blknos = com._ensure_int64(blknos) + blknos = _ensure_int64(blknos) # FIXME: blk_count is unused, but it may avoid the use of dicts in cython for blkno, indexer in lib.get_blkno_indexers(blknos, group): @@ -4721,7 +4740,7 @@ def _putmask_smart(v, m, n): pass # change the dtype - dtype, _ = com._maybe_promote(n.dtype) + dtype, _ = _maybe_promote(n.dtype) nv = v.astype(dtype) try: nv[m] = n[m] @@ -4787,9 +4806,9 @@ def get_empty_dtype_and_na(join_units): if dtype is None: continue - if com.is_categorical_dtype(dtype): + if is_categorical_dtype(dtype): upcast_cls = 'category' - elif com.is_datetimetz(dtype): + elif is_datetimetz(dtype): upcast_cls = 'datetimetz' elif issubclass(dtype.type, np.bool_): upcast_cls = 'bool' @@ -5062,8 +5081,8 @@ def dtype(self): if not self.needs_filling: return self.block.dtype else: - return com._get_dtype(com._maybe_promote(self.block.dtype, - self.block.fill_value)[0]) + return _get_dtype(_maybe_promote(self.block.dtype, + self.block.fill_value)[0]) return self._dtype diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 911fcaf529f98..b847415f274db 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -5,10 +5,15 @@ import numpy as np from distutils.version import LooseVersion -import pandas.core.common as com import pandas.algos as algos import pandas.lib as lib from pandas.compat import range, string_types +from pandas.types.common import (is_numeric_v_string_like, + is_float_dtype, is_datetime64_dtype, + is_integer_dtype, _ensure_float64, + is_scalar, + _DATELIKE_DTYPES) +from pandas.types.missing import isnull def mask_missing(arr, values_to_mask): @@ -24,7 +29,7 @@ def mask_missing(arr, values_to_mask): except Exception: values_to_mask = np.array(values_to_mask, dtype=object) - na_mask = com.isnull(values_to_mask) + na_mask = isnull(values_to_mask) nonna = values_to_mask[~na_mask] mask = None @@ -32,28 +37,28 @@ def mask_missing(arr, values_to_mask): if mask is None: # numpy elementwise comparison warning - if com.is_numeric_v_string_like(arr, x): + if is_numeric_v_string_like(arr, x): mask = False else: mask = arr == x # if x is a string and arr is not, then we get False and we must # expand the mask to size arr.shape - if lib.isscalar(mask): + if is_scalar(mask): mask = np.zeros(arr.shape, dtype=bool) else: # numpy elementwise comparison warning - if com.is_numeric_v_string_like(arr, x): + if is_numeric_v_string_like(arr, x): mask |= False else: mask |= arr == x if na_mask.any(): if mask is None: - mask = com.isnull(arr) + mask = isnull(arr) else: - mask |= com.isnull(arr) + mask |= isnull(arr) return mask @@ -110,7 +115,7 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, """ # Treat the original, non-scipy methods first. - invalid = com.isnull(yvalues) + invalid = isnull(yvalues) valid = ~invalid if not valid.any(): @@ -442,12 +447,12 @@ def pad_1d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None - if com.is_float_dtype(values): + if is_float_dtype(values): _method = getattr(algos, 'pad_inplace_%s' % dtype.name, None) - elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): _method = _pad_1d_datetime - elif com.is_integer_dtype(values): - values = com._ensure_float64(values) + elif is_integer_dtype(values): + values = _ensure_float64(values) _method = algos.pad_inplace_float64 elif values.dtype == np.object_: _method = algos.pad_inplace_object @@ -456,7 +461,7 @@ def pad_1d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) if mask is None: - mask = com.isnull(values) + mask = isnull(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) return values @@ -467,12 +472,12 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None - if com.is_float_dtype(values): + if is_float_dtype(values): _method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None) - elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): _method = _backfill_1d_datetime - elif com.is_integer_dtype(values): - values = com._ensure_float64(values) + elif is_integer_dtype(values): + values = _ensure_float64(values) _method = algos.backfill_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_inplace_object @@ -481,7 +486,7 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name) if mask is None: - mask = com.isnull(values) + mask = isnull(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) @@ -493,12 +498,12 @@ def pad_2d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None - if com.is_float_dtype(values): + if is_float_dtype(values): _method = getattr(algos, 'pad_2d_inplace_%s' % dtype.name, None) - elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): _method = _pad_2d_datetime - elif com.is_integer_dtype(values): - values = com._ensure_float64(values) + elif is_integer_dtype(values): + values = _ensure_float64(values) _method = algos.pad_2d_inplace_float64 elif values.dtype == np.object_: _method = algos.pad_2d_inplace_object @@ -507,7 +512,7 @@ def pad_2d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for pad_2d [%s]' % dtype.name) if mask is None: - mask = com.isnull(values) + mask = isnull(values) mask = mask.view(np.uint8) if np.all(values.shape): @@ -523,12 +528,12 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None - if com.is_float_dtype(values): + if is_float_dtype(values): _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None) - elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): _method = _backfill_2d_datetime - elif com.is_integer_dtype(values): - values = com._ensure_float64(values) + elif is_integer_dtype(values): + values = _ensure_float64(values) _method = algos.backfill_2d_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_2d_inplace_object @@ -537,7 +542,7 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) if mask is None: - mask = com.isnull(values) + mask = isnull(values) mask = mask.view(np.uint8) if np.all(values.shape): @@ -570,22 +575,22 @@ def fill_zeros(result, x, y, name, fill): mask the nan's from x """ - if fill is None or com.is_float_dtype(result): + if fill is None or is_float_dtype(result): return result if name.startswith(('r', '__r')): x, y = y, x - is_typed_variable = (hasattr(y, 'dtype') or hasattr(y, 'type')) - is_scalar = lib.isscalar(y) + is_variable_type = (hasattr(y, 'dtype') or hasattr(y, 'type')) + is_scalar_type = is_scalar(y) - if not is_typed_variable and not is_scalar: + if not is_variable_type and not is_scalar_type: return result - if is_scalar: + if is_scalar_type: y = np.array(y) - if com.is_integer_dtype(y): + if is_integer_dtype(y): if (y == 0).any(): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index f390e3f04a6c3..7b89373dda7ba 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -11,16 +11,19 @@ import pandas.hashtable as _hash from pandas import compat, lib, algos, tslib -from pandas.core.common import (isnull, notnull, _values_from_object, - _maybe_upcast_putmask, _ensure_float64, - _ensure_int64, _ensure_object, is_float, - is_integer, is_complex, is_float_dtype, - is_complex_dtype, is_integer_dtype, - is_bool_dtype, is_object_dtype, - is_datetime64_dtype, is_timedelta64_dtype, - is_datetime_or_timedelta_dtype, _get_dtype, - is_int_or_datetime_dtype, is_any_int_dtype, - _int64_max) +from pandas.types.common import (_ensure_int64, _ensure_object, + _ensure_float64, _get_dtype, + is_float, is_scalar, + is_integer, is_complex, is_float_dtype, + is_complex_dtype, is_integer_dtype, + is_bool_dtype, is_object_dtype, + is_datetime64_dtype, is_timedelta64_dtype, + is_datetime_or_timedelta_dtype, + is_int_or_datetime_dtype, is_any_int_dtype) +from pandas.types.cast import _int64_max, _maybe_upcast_putmask +from pandas.types.missing import isnull, notnull + +from pandas.core.common import _values_from_object class disallow(object): @@ -351,7 +354,7 @@ def _get_counts_nanvar(mask, axis, ddof, dtype=float): d = count - dtype.type(ddof) # always return NaN, never inf - if lib.isscalar(count): + if is_scalar(count): if count <= ddof: count = np.nan d = np.nan @@ -623,7 +626,7 @@ def _get_counts(mask, axis, dtype=float): return dtype.type(mask.size - mask.sum()) count = mask.shape[axis] - mask.sum(axis) - if lib.isscalar(count): + if is_scalar(count): return dtype.type(count) try: return count.astype(dtype) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 3aaca1eea486e..d76f011df3dd8 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -13,21 +13,25 @@ from pandas import compat, lib, tslib import pandas.index as _index from pandas.util.decorators import Appender -import pandas.core.common as com import pandas.computation.expressions as expressions from pandas.lib import isscalar from pandas.tslib import iNaT from pandas.compat import bind_method import pandas.core.missing as missing import pandas.algos as _algos -from pandas.core.common import (is_list_like, notnull, isnull, - _values_from_object, _maybe_match_name, - needs_i8_conversion, is_datetimelike_v_numeric, - is_integer_dtype, is_categorical_dtype, - is_object_dtype, is_timedelta64_dtype, - is_datetime64_dtype, is_datetime64tz_dtype, - is_bool_dtype, PerformanceWarning, - ABCSeries, ABCIndex) +from pandas.core.common import (_values_from_object, _maybe_match_name, + PerformanceWarning) +from pandas.types.missing import notnull, isnull +from pandas.types.common import (needs_i8_conversion, + is_datetimelike_v_numeric, + is_integer_dtype, is_categorical_dtype, + is_object_dtype, is_timedelta64_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, + is_bool_dtype, is_datetimetz, + is_list_like, + _ensure_object) +from pandas.types.cast import _maybe_upcast_putmask +from pandas.types.generic import ABCSeries, ABCIndex # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory @@ -446,7 +450,7 @@ def _convert_to_array(self, values, name=None, other=None): supplied_dtype = values.dtype inferred_type = supplied_dtype or lib.infer_dtype(values) if (inferred_type in ('datetime64', 'datetime', 'date', 'time') or - com.is_datetimetz(inferred_type)): + is_datetimetz(inferred_type)): # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path if (supplied_dtype is None and other is not None and @@ -463,7 +467,7 @@ def _convert_to_array(self, values, name=None, other=None): hasattr(ovalues, 'tz')): values = pd.DatetimeIndex(values) # datetime array with tz - elif com.is_datetimetz(values): + elif is_datetimetz(values): if isinstance(values, ABCSeries): values = values._values elif not (isinstance(values, (np.ndarray, ABCSeries)) and @@ -625,7 +629,7 @@ def na_op(x, y): "{op}".format(typ=type(x).__name__, op=str_rep)) - result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result @@ -820,8 +824,8 @@ def na_op(x, y): if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: - x = com._ensure_object(x) - y = com._ensure_object(y) + x = _ensure_object(x) + y = _ensure_object(y) result = lib.vec_binop(x, y, op) else: try: @@ -1095,7 +1099,7 @@ def na_op(x, y): "objects of type {x} and {y}".format( op=name, x=type(x), y=type(y))) - result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = missing.fill_zeros(result, x, y, name, fill_zeros) @@ -1220,7 +1224,7 @@ def na_op(x, y): result = np.empty(len(x), dtype=x.dtype) mask = notnull(x) result[mask] = op(x[mask], y) - result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 7d0bedcc2b381..4d61563cccce5 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -8,17 +8,21 @@ import numpy as np +from pandas.types.cast import (_infer_dtype_from_scalar, + _possibly_cast_item) +from pandas.types.common import (is_integer, is_list_like, + is_string_like, is_scalar) +from pandas.types.missing import notnull + import pandas.computation.expressions as expressions import pandas.core.common as com import pandas.core.ops as ops import pandas.core.missing as missing from pandas import compat -from pandas import lib from pandas.compat import (map, zip, range, u, OrderedDict, OrderedDefaultdict) from pandas.compat.numpy import function as nv from pandas.core.categorical import Categorical -from pandas.core.common import (PandasError, _try_sort, _default_index, - _infer_dtype_from_scalar, is_list_like) +from pandas.core.common import PandasError, _try_sort, _default_index from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -168,7 +172,7 @@ def _init_data(self, data, copy, dtype, **kwargs): mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) copy = False dtype = None - elif lib.isscalar(data) and all(x is not None for x in passed_axes): + elif is_scalar(data) and all(x is not None for x in passed_axes): if dtype is None: dtype, data = _infer_dtype_from_scalar(data) values = np.empty([len(x) for x in passed_axes], dtype=dtype) @@ -552,7 +556,7 @@ def set_value(self, *args, **kwargs): made_bigger = not np.array_equal(axes[0], self._info_axis) # how to make this logic simpler? if made_bigger: - com._possibly_cast_item(result, args[0], likely_dtype) + _possibly_cast_item(result, args[0], likely_dtype) return result.set_value(*args) @@ -582,7 +586,7 @@ def __setitem__(self, key, value): 'object was {1}'.format( shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) - elif lib.isscalar(value): + elif is_scalar(value): dtype, value = _infer_dtype_from_scalar(value) mat = np.empty(shape[1:], dtype=dtype) mat.fill(value) @@ -653,7 +657,7 @@ def round(self, decimals=0, *args, **kwargs): """ nv.validate_round(args, kwargs) - if com.is_integer(decimals): + if is_integer(decimals): result = np.apply_along_axis(np.round, 0, self.values) return self._wrap_result(result, axis=0) raise TypeError("decimals must be an integer") @@ -687,7 +691,7 @@ def dropna(self, axis=0, how='any', inplace=False): axis = self._get_axis_number(axis) values = self.values - mask = com.notnull(values) + mask = notnull(values) for ax in reversed(sorted(set(range(self._AXIS_LEN)) - set([axis]))): mask = mask.sum(ax) @@ -711,7 +715,7 @@ def _combine(self, other, func, axis=0): return self._combine_panel(other, func) elif isinstance(other, DataFrame): return self._combine_frame(other, func, axis=axis) - elif lib.isscalar(other): + elif is_scalar(other): return self._combine_const(other, func) else: raise NotImplementedError("%s is not supported in combine " @@ -924,7 +928,7 @@ def to_frame(self, filter_observations=True): if filter_observations: # shaped like the return DataFrame - mask = com.notnull(self.values).all(axis=0) + mask = notnull(self.values).all(axis=0) # size = mask.sum() selector = mask.ravel() else: @@ -1218,7 +1222,7 @@ def transpose(self, *args, **kwargs): # check if a list of axes was passed in instead as a # single *args element if (len(args) == 1 and hasattr(args[0], '__iter__') and - not com.is_string_like(args[0])): + not is_string_like(args[0])): axes = args[0] else: axes = args diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 8d237016d1b33..4f601a2d377a6 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -6,6 +6,11 @@ import numpy as np +from pandas.types.common import _ensure_platform_int, is_list_like +from pandas.types.cast import _maybe_promote +from pandas.types.missing import notnull +import pandas.types.concat as _concat + from pandas.core.series import Series from pandas.core.frame import DataFrame @@ -14,11 +19,8 @@ from pandas._sparse import IntIndex from pandas.core.categorical import Categorical -from pandas.core.common import notnull, _ensure_platform_int, _maybe_promote from pandas.core.groupby import get_group_index, _compress_group_index -import pandas.core.common as com -import pandas.types.concat as _concat import pandas.core.algorithms as algos import pandas.algos as _algos @@ -1063,7 +1065,7 @@ def check_len(item, name): length_msg = ("Length of '{0}' ({1}) did not match the length of " "the columns being encoded ({2}).") - if com.is_list_like(item): + if is_list_like(item): if not len(item) == len(columns_to_encode): raise ValueError(length_msg.format(name, len(item), len(columns_to_encode))) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8015670212181..2c7f298dde2ec 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,18 +13,33 @@ import numpy as np import numpy.ma as ma -from pandas.core.common import (isnull, notnull, is_bool_indexer, - _default_index, _maybe_upcast, - _asarray_tuplesafe, _infer_dtype_from_scalar, - is_list_like, _values_from_object, - is_categorical_dtype, - _possibly_cast_to_datetime, - _possibly_castable, _possibly_convert_platform, - _try_sort, is_extension_type, is_datetimetz, - _maybe_match_name, ABCSparseArray, - _coerce_to_dtype, SettingWithCopyError, - _maybe_box_datetimelike, ABCDataFrame, - _dict_compat, is_integer) +from pandas.types.common import (_coerce_to_dtype, is_categorical_dtype, + is_integer, is_integer_dtype, + is_float_dtype, + is_extension_type, is_datetimetz, + is_datetimelike, + is_timedelta64_dtype, + is_list_like, + is_hashable, + is_iterator, + is_dict_like, + is_scalar, + _ensure_platform_int) +from pandas.types.generic import ABCSparseArray, ABCDataFrame +from pandas.types.cast import (_maybe_upcast, _infer_dtype_from_scalar, + _possibly_convert_platform, + _possibly_cast_to_datetime, _possibly_castable) +from pandas.types.missing import isnull, notnull + +from pandas.core.common import (is_bool_indexer, + _default_index, + _asarray_tuplesafe, + _values_from_object, + _try_sort, + _maybe_match_name, + SettingWithCopyError, + _maybe_box_datetimelike, + _dict_compat) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices @@ -303,7 +318,7 @@ def name(self): @name.setter def name(self, value): - if value is not None and not com.is_hashable(value): + if value is not None and not is_hashable(value): raise TypeError('Series.name must be a hashable type') object.__setattr__(self, '_name', value) @@ -580,7 +595,7 @@ def __getitem__(self, key): try: result = self.index.get_value(self, key) - if not lib.isscalar(result): + if not is_scalar(result): if is_list_like(result) and not isinstance(result, Series): # we need to box if we have a non-unique index here @@ -613,10 +628,10 @@ def __getitem__(self, key): except Exception: raise - if com.is_iterator(key): + if is_iterator(key): key = list(key) - if is_bool_indexer(key): + if com.is_bool_indexer(key): key = check_bool_indexer(self.index, key) return self._get_with(key) @@ -710,9 +725,9 @@ def setitem(key, value): elif key is Ellipsis: self[:] = value return - elif is_bool_indexer(key): + elif com.is_bool_indexer(key): pass - elif com.is_timedelta64_dtype(self.dtype): + elif is_timedelta64_dtype(self.dtype): # reassign a null value to iNaT if isnull(value): value = tslib.iNaT @@ -736,7 +751,7 @@ def setitem(key, value): if 'unorderable' in str(e): # pragma: no cover raise IndexError(key) - if is_bool_indexer(key): + if com.is_bool_indexer(key): key = check_bool_indexer(self.index, key) try: self._where(~key, value, inplace=True) @@ -1060,7 +1075,7 @@ def _get_repr(self, name=False, header=True, index=True, length=True, def __iter__(self): """ provide iteration over the values of the Series box values if necessary """ - if com.is_datetimelike(self): + if is_datetimelike(self): return (_maybe_box_datetimelike(x) for x in self._values) else: return iter(self._values) @@ -1349,7 +1364,7 @@ def quantile(self, q=0.5, interpolation='linear'): result = self._data.quantile(qs=q, interpolation=interpolation) - if com.is_list_like(q): + if is_list_like(q): return self._constructor(result, index=Float64Index(q), name=self.name) @@ -1481,7 +1496,7 @@ def dot(self, other): @Appender(base._shared_docs['searchsorted']) def searchsorted(self, v, side='left', sorter=None): if sorter is not None: - sorter = com._ensure_platform_int(sorter) + sorter = _ensure_platform_int(sorter) return self._values.searchsorted(Series(v)._values, side=side, sorter=sorter) @@ -1727,7 +1742,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, elif isinstance(index, MultiIndex): from pandas.core.groupby import _lexsort_indexer indexer = _lexsort_indexer(index.labels, orders=ascending) - indexer = com._ensure_platform_int(indexer) + indexer = _ensure_platform_int(indexer) new_index = index.take(indexer) else: new_index, indexer = index.sort_values(return_indexer=True, @@ -2265,8 +2280,8 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, @Appender(generic._shared_docs['rename'] % _shared_doc_kwargs) def rename(self, index=None, **kwargs): - non_mapping = lib.isscalar(index) or (com.is_list_like(index) and - not com.is_dict_like(index)) + non_mapping = is_scalar(index) or (is_list_like(index) and + not is_dict_like(index)) if non_mapping: return self._set_name(index, inplace=kwargs.get('inplace')) return super(Series, self).rename(index=index, **kwargs) @@ -2345,7 +2360,7 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): if convert: indices = maybe_convert_indices(indices, len(self._get_axis(axis))) - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) new_index = self.index.take(indices) new_values = self._values.take(indices) return self._constructor(new_values, @@ -2771,7 +2786,7 @@ def _try_cast(arr, take_fast_path): subarr = np.array(data, copy=False) # possibility of nan -> garbage - if com.is_float_dtype(data.dtype) and com.is_integer_dtype(dtype): + if is_float_dtype(data.dtype) and is_integer_dtype(dtype): if not isnull(data).any(): subarr = _try_cast(data, True) elif copy: diff --git a/pandas/core/strings.py b/pandas/core/strings.py index a3f687b7fd73c..6ec28f9735850 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,14 +1,19 @@ import numpy as np from pandas.compat import zip -from pandas.core.common import (isnull, notnull, _values_from_object, - is_bool_dtype, - is_list_like, is_categorical_dtype, - is_object_dtype, is_string_like) +from pandas.types.generic import ABCSeries, ABCIndex +from pandas.types.missing import isnull, notnull +from pandas.types.common import (is_bool_dtype, + is_categorical_dtype, + is_object_dtype, + is_string_like, + is_list_like, + is_scalar) +from pandas.core.common import _values_from_object + from pandas.core.algorithms import take_1d import pandas.compat as compat from pandas.core.base import AccessorProperty, NoNewAttributesMixin -from pandas.types import api as gt from pandas.util.decorators import Appender, deprecate_kwarg import re import pandas.lib as lib @@ -152,7 +157,7 @@ def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object): if not len(arr): return np.ndarray(0, dtype=dtype) - if isinstance(arr, gt.ABCSeries): + if isinstance(arr, ABCSeries): arr = arr.values if not isinstance(arr, np.ndarray): arr = np.asarray(arr, dtype=object) @@ -343,7 +348,7 @@ def str_repeat(arr, repeats): ------- repeated : Series/Index of objects """ - if lib.isscalar(repeats): + if is_scalar(repeats): def rep(x): try: @@ -696,7 +701,7 @@ def str_extractall(arr, pat, flags=0): if regex.groups == 0: raise ValueError("pattern contains no capture groups") - if isinstance(arr, gt.ABCIndex): + if isinstance(arr, ABCIndex): arr = arr.to_series().reset_index(drop=True) names = dict(zip(regex.groupindex.values(), regex.groupindex.keys())) @@ -1538,7 +1543,7 @@ def rjust(self, width, fillchar=' '): return self.pad(width, side='left', fillchar=fillchar) def zfill(self, width): - """" + """ Filling left side of strings in the Series/Index with 0. Equivalent to :meth:`str.zfill`. @@ -1820,7 +1825,7 @@ class StringAccessorMixin(object): def _make_str_accessor(self): from pandas.core.index import Index - if (isinstance(self, gt.ABCSeries) and + if (isinstance(self, ABCSeries) and not ((is_categorical_dtype(self.dtype) and is_object_dtype(self.values.categories)) or (is_object_dtype(self.dtype)))): diff --git a/pandas/core/window.py b/pandas/core/window.py index 1e34d18fe3e54..bc4d34529287b 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -11,6 +11,15 @@ import numpy as np from collections import defaultdict +from pandas.types.generic import ABCSeries, ABCDataFrame +from pandas.types.common import (is_integer, + is_bool, + is_float_dtype, + is_integer_dtype, + needs_i8_conversion, + is_timedelta64_dtype, + is_list_like, + _ensure_float64) import pandas as pd from pandas.lib import isscalar from pandas.core.base import (PandasObject, SelectionMixin, @@ -64,10 +73,10 @@ def _constructor(self): return Window def validate(self): - if self.center is not None and not com.is_bool(self.center): + if self.center is not None and not is_bool(self.center): raise ValueError("center must be a boolean") if self.min_periods is not None and not \ - com.is_integer(self.min_periods): + is_integer(self.min_periods): raise ValueError("min_periods must be an integer") def _convert_freq(self, how=None): @@ -75,7 +84,7 @@ def _convert_freq(self, how=None): obj = self._selected_obj if (self.freq is not None and - isinstance(obj, (com.ABCSeries, com.ABCDataFrame))): + isinstance(obj, (ABCSeries, ABCDataFrame))): if how is not None: warnings.warn("The how kw argument is deprecated and removed " "in a future version. You can resample prior " @@ -111,7 +120,7 @@ def _gotitem(self, key, ndim, subset=None): self = self._shallow_copy(subset) self._reset_cache() if subset.ndim == 2: - if isscalar(key) and key in subset or com.is_list_like(key): + if isscalar(key) and key in subset or is_list_like(key): self._selection = key return self @@ -150,11 +159,11 @@ def _prep_values(self, values=None, kill_inf=True, how=None): # GH #12373 : rolling functions error on float32 data # make sure the data is coerced to float64 - if com.is_float_dtype(values.dtype): - values = com._ensure_float64(values) - elif com.is_integer_dtype(values.dtype): - values = com._ensure_float64(values) - elif com.needs_i8_conversion(values.dtype): + if is_float_dtype(values.dtype): + values = _ensure_float64(values) + elif is_integer_dtype(values.dtype): + values = _ensure_float64(values) + elif needs_i8_conversion(values.dtype): raise NotImplementedError("ops for {action} for this " "dtype {dtype} are not " "implemented".format( @@ -162,7 +171,7 @@ def _prep_values(self, values=None, kill_inf=True, how=None): dtype=values.dtype)) else: try: - values = com._ensure_float64(values) + values = _ensure_float64(values) except (ValueError, TypeError): raise TypeError("cannot handle this type -> {0}" "".format(values.dtype)) @@ -184,7 +193,7 @@ def _wrap_result(self, result, block=None, obj=None): # coerce if necessary if block is not None: - if com.is_timedelta64_dtype(block.values.dtype): + if is_timedelta64_dtype(block.values.dtype): result = pd.to_timedelta( result.ravel(), unit='ns').values.reshape(result.shape) @@ -345,7 +354,7 @@ def _prep_window(self, **kwargs): window = self._get_window() if isinstance(window, (list, tuple, np.ndarray)): return com._asarray_tuplesafe(window).astype(float) - elif com.is_integer(window): + elif is_integer(window): import scipy.signal as sig # the below may pop from kwargs @@ -543,7 +552,7 @@ def _apply(self, func, name=None, window=None, center=None, def func(arg, window, min_periods=None): minp = check_minp(min_periods, window) # GH #12373: rolling functions error on float32 data - return cfunc(com._ensure_float64(arg), + return cfunc(_ensure_float64(arg), window, minp, **kwargs) # calculation function @@ -586,7 +595,7 @@ def count(self): results = [] for b in blocks: - if com.needs_i8_conversion(b.values): + if needs_i8_conversion(b.values): result = b.notnull().astype(int) else: try: @@ -850,7 +859,7 @@ class Rolling(_Rolling_and_Expanding): def validate(self): super(Rolling, self).validate() - if not com.is_integer(self.window): + if not is_integer(self.window): raise ValueError("window must be an integer") elif self.window < 0: raise ValueError("window must be non-negative") @@ -1484,7 +1493,7 @@ def _get_center_of_mass(com, span, halflife, alpha): def _offset(window, center): - if not com.is_integer(window): + if not is_integer(window): window = len(window) offset = (window - 1) / 2. if center else 0 try: diff --git a/pandas/formats/format.py b/pandas/formats/format.py index cc46ed57aeff0..436a9d5d5d4c8 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -10,8 +10,19 @@ import sys +from pandas.types.missing import isnull, notnull +from pandas.types.common import (is_categorical_dtype, + is_float_dtype, + is_period_arraylike, + is_integer_dtype, + is_datetimetz, + is_integer, + is_float, + is_numeric_dtype, + is_datetime64_dtype, + is_timedelta64_dtype) + from pandas.core.base import PandasObject -from pandas.core.common import isnull, notnull, is_numeric_dtype from pandas.core.index import Index, MultiIndex, _ensure_index from pandas import compat from pandas.compat import (StringIO, lzip, range, map, zip, reduce, u, @@ -194,7 +205,7 @@ def _get_footer(self): # level infos are added to the end and in a new line, like it is done # for Categoricals - if com.is_categorical_dtype(self.tr_series.dtype): + if is_categorical_dtype(self.tr_series.dtype): level_info = self.tr_series._values._repr_categories_info() if footer: footer += "\n" @@ -316,12 +327,12 @@ def should_show_dimensions(self): def _get_formatter(self, i): if isinstance(self.formatters, (list, tuple)): - if com.is_integer(i): + if is_integer(i): return self.formatters[i] else: return None else: - if com.is_integer(i) and i not in self.columns: + if is_integer(i) and i not in self.columns: i = self.columns[i] return self.formatters.get(i, None) @@ -1646,7 +1657,7 @@ def __init__(self, df, na_rep='', float_format=None, cols=None, def _format_value(self, val): if lib.checknull(val): val = self.na_rep - elif com.is_float(val): + elif is_float(val): if lib.isposinf_scalar(val): val = self.inf_rep elif lib.isneginf_scalar(val): @@ -1867,19 +1878,19 @@ def get_formatted_cells(self): def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right', decimal='.'): - if com.is_categorical_dtype(values): + if is_categorical_dtype(values): fmt_klass = CategoricalArrayFormatter - elif com.is_float_dtype(values.dtype): + elif is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter - elif com.is_period_arraylike(values): + elif is_period_arraylike(values): fmt_klass = PeriodArrayFormatter - elif com.is_integer_dtype(values.dtype): + elif is_integer_dtype(values.dtype): fmt_klass = IntArrayFormatter - elif com.is_datetimetz(values): + elif is_datetimetz(values): fmt_klass = Datetime64TZFormatter - elif com.is_datetime64_dtype(values.dtype): + elif is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter - elif com.is_timedelta64_dtype(values.dtype): + elif is_timedelta64_dtype(values.dtype): fmt_klass = Timedelta64Formatter else: fmt_klass = GenericArrayFormatter @@ -1949,14 +1960,14 @@ def _format(x): if isinstance(vals, Index): vals = vals._values - is_float = lib.map_infer(vals, com.is_float) & notnull(vals) - leading_space = is_float.any() + is_float_type = lib.map_infer(vals, is_float) & notnull(vals) + leading_space = is_float_type.any() fmt_values = [] for i, v in enumerate(vals): - if not is_float[i] and leading_space: + if not is_float_type[i] and leading_space: fmt_values.append(' %s' % _format(v)) - elif is_float[i]: + elif is_float_type[i]: fmt_values.append(float_format(v)) else: fmt_values.append(' %s' % _format(v)) diff --git a/pandas/formats/printing.py b/pandas/formats/printing.py index a4eaec8d5334b..37bd4b63d6f7a 100644 --- a/pandas/formats/printing.py +++ b/pandas/formats/printing.py @@ -2,9 +2,9 @@ printing tools """ +from pandas.types.inference import is_sequence from pandas import compat from pandas.compat import u -import pandas.core.common as com from pandas.core.config import get_option @@ -213,7 +213,7 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) - elif (com.is_sequence(thing) and + elif (is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, diff --git a/pandas/formats/style.py b/pandas/formats/style.py index 477ecccc03f4f..472fd958d35eb 100644 --- a/pandas/formats/style.py +++ b/pandas/formats/style.py @@ -17,10 +17,11 @@ "or `pip install Jinja2`" raise ImportError(msg) +from pandas.types.common import is_float, is_string_like + import numpy as np import pandas as pd from pandas.compat import lzip, range -import pandas.core.common as com from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice try: import matplotlib.pyplot as plt @@ -153,7 +154,7 @@ def __init__(self, data, precision=None, table_styles=None, uuid=None, # display_funcs maps (row, col) -> formatting function def default_display_func(x): - if com.is_float(x): + if is_float(x): return '{:>.{precision}g}'.format(x, precision=self.precision) else: return x @@ -893,7 +894,7 @@ def _highlight_extrema(data, color='yellow', max_=True): def _maybe_wrap_formatter(formatter): - if com.is_string_like(formatter): + if is_string_like(formatter): return lambda x: formatter.format(x) elif callable(formatter): return formatter diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 0bb80be013275..5c9938c932da2 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -12,6 +12,28 @@ from pandas.compat import range, u from pandas.compat.numpy import function as nv from pandas import compat + + +from pandas.types.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex +from pandas.types.missing import isnull, array_equivalent +from pandas.types.common import (_ensure_int64, _ensure_object, + _ensure_platform_int, + is_datetimetz, + is_integer, + is_float, + is_dtype_equal, + is_object_dtype, + is_categorical_dtype, + is_bool_dtype, + is_integer_dtype, is_float_dtype, + needs_i8_conversion, + is_iterator, is_list_like, + is_scalar) +from pandas.types.cast import _coerce_indexer_dtype +from pandas.core.common import (is_bool_indexer, + _values_from_object, + _asarray_tuplesafe) + from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin) import pandas.core.base as base @@ -22,15 +44,6 @@ import pandas.core.missing as missing import pandas.core.algorithms as algos from pandas.formats.printing import pprint_thing -from pandas.core.common import (isnull, array_equivalent, - is_object_dtype, is_datetimetz, ABCSeries, - ABCPeriodIndex, ABCMultiIndex, - _values_from_object, is_float, is_integer, - is_iterator, is_categorical_dtype, - _ensure_object, _ensure_int64, is_bool_indexer, - is_list_like, is_bool_dtype, - is_integer_dtype, is_float_dtype, - needs_i8_conversion) from pandas.core.ops import _comp_method_OBJECT_ARRAY from pandas.core.strings import StringAccessorMixin @@ -223,7 +236,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): subarr = data.astype('object') else: - subarr = com._asarray_tuplesafe(data, dtype=object) + subarr = _asarray_tuplesafe(data, dtype=object) # _asarray_tuplesafe does not always copy underlying data, # so need to make sure that this happens @@ -264,7 +277,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif hasattr(data, '__array__'): return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) - elif data is None or lib.isscalar(data): + elif data is None or is_scalar(data): cls._scalar_data_error(data) else: if (tupleize_cols and isinstance(data, list) and data and @@ -284,7 +297,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # python2 - MultiIndex fails on mixed types pass # other iterable of some kind - subarr = com._asarray_tuplesafe(data, dtype=object) + subarr = _asarray_tuplesafe(data, dtype=object) return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) """ @@ -539,7 +552,7 @@ def _coerce_to_ndarray(cls, data): """ if not isinstance(data, (np.ndarray, Index)): - if data is None or lib.isscalar(data): + if data is None or is_scalar(data): cls._scalar_data_error(data) # other iterable of some kind @@ -841,7 +854,7 @@ def to_datetime(self, dayfirst=False): return DatetimeIndex(self.values) def _assert_can_do_setop(self, other): - if not com.is_list_like(other): + if not is_list_like(other): raise TypeError('Input must be Index or array-like') return True @@ -1325,7 +1338,7 @@ def __getitem__(self, key): getitem = self._data.__getitem__ promote = self._shallow_copy - if lib.isscalar(key): + if is_scalar(key): return getitem(key) if isinstance(key, slice): @@ -1338,7 +1351,7 @@ def __getitem__(self, key): key = _values_from_object(key) result = getitem(key) - if not lib.isscalar(result): + if not is_scalar(result): return promote(result) else: return result @@ -1426,7 +1439,7 @@ def _ensure_compat_concat(indexes): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) if self._can_hold_na: taken = self._assert_take_fillable(self.values, indices, allow_fill=allow_fill, @@ -1442,7 +1455,7 @@ def take(self, indices, axis=0, allow_fill=True, def _assert_take_fillable(self, values, indices, allow_fill=True, fill_value=None, na_value=np.nan): """ Internal method to handle NA filling of take """ - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) # only fill if we are passing a non-None fill_value if allow_fill and fill_value is not None: @@ -1491,7 +1504,7 @@ def _convert_for_op(self, value): def _assert_can_do_op(self, value): """ Check value is valid for scalar op """ - if not lib.isscalar(value): + if not is_scalar(value): msg = "'value' must be a scalar, passed: {0}" raise TypeError(msg.format(type(value).__name__)) @@ -1706,7 +1719,7 @@ def argsort(self, *args, **kwargs): return result.argsort(*args, **kwargs) def __add__(self, other): - if com.is_list_like(other): + if is_list_like(other): warnings.warn("using '+' to provide set union with Indexes is " "deprecated, use '|' or .union()", FutureWarning, stacklevel=2) @@ -1783,7 +1796,7 @@ def union(self, other): if len(self) == 0: return other._get_consensus_name(self) - if not com.is_dtype_equal(self.dtype, other.dtype): + if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.union(other) @@ -1866,7 +1879,7 @@ def intersection(self, other): if self.equals(other): return self._get_consensus_name(other) - if not com.is_dtype_equal(self.dtype, other.dtype): + if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.intersection(other) @@ -2028,7 +2041,7 @@ def get_value(self, series, key): # if we have something that is Index-like, then # use this, e.g. DatetimeIndex s = getattr(series, '_values', None) - if isinstance(s, Index) and lib.isscalar(key): + if isinstance(s, Index) and is_scalar(key): try: return s[key] except (IndexError, ValueError): @@ -2061,7 +2074,7 @@ def get_value(self, series, key): raise e1 except TypeError: # python 3 - if lib.isscalar(key): # pragma: no cover + if is_scalar(key): # pragma: no cover raise IndexError(key) raise InvalidIndexError(key) @@ -2137,7 +2150,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return pself.get_indexer(ptarget, method=method, limit=limit, tolerance=tolerance) - if not com.is_dtype_equal(self.dtype, target.dtype): + if not is_dtype_equal(self.dtype, target.dtype): this = self.astype(object) target = target.astype(object) return this.get_indexer(target, method=method, limit=limit, @@ -2161,7 +2174,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): indexer = self._engine.get_indexer(target._values) - return com._ensure_platform_int(indexer) + return _ensure_platform_int(indexer) def _convert_tolerance(self, tolerance): # override this method on subclasses @@ -2443,7 +2456,7 @@ def _reindex_non_unique(self, target): if len(missing): l = np.arange(len(indexer)) - missing = com._ensure_platform_int(missing) + missing = _ensure_platform_int(missing) missing_labels = target.take(missing) missing_indexer = _ensure_int64(l[~check]) cur_labels = self.take(indexer[check])._values @@ -2541,7 +2554,7 @@ def join(self, other, how='left', level=None, return_indexers=False): result = x, z, y return result - if not com.is_dtype_equal(self.dtype, other.dtype): + if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.join(other, how=how, return_indexers=return_indexers) @@ -2637,8 +2650,8 @@ def _join_non_unique(self, other, how='left', return_indexers=False): [other._values], how=how, sort=True) - left_idx = com._ensure_platform_int(left_idx) - right_idx = com._ensure_platform_int(right_idx) + left_idx = _ensure_platform_int(left_idx) + right_idx = _ensure_platform_int(right_idx) join_index = self.values.take(left_idx) mask = left_idx == -1 @@ -2850,9 +2863,9 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): kind=kind) # return a slice - if not lib.isscalar(start_slice): + if not is_scalar(start_slice): raise AssertionError("Start slice bound is non-scalar") - if not lib.isscalar(end_slice): + if not is_scalar(end_slice): raise AssertionError("End slice bound is non-scalar") return slice(start_slice, end_slice, step) @@ -3483,7 +3496,7 @@ def _get_na_value(dtype): def _ensure_frozen(array_like, categories, copy=False): - array_like = com._coerce_indexer_dtype(array_like, categories) + array_like = _coerce_indexer_dtype(array_like, categories) array_like = array_like.view(FrozenNDArray) if copy: array_like = array_like.copy() diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 84b8926f4177f..f1d4fe2f26bdd 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -1,15 +1,21 @@ import numpy as np -import pandas.lib as lib import pandas.index as _index from pandas import compat from pandas.compat.numpy import function as nv +from pandas.types.generic import ABCCategorical, ABCSeries +from pandas.types.common import (is_categorical_dtype, + _ensure_platform_int, + is_list_like, + is_scalar) +from pandas.types.missing import array_equivalent + + from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg) from pandas.core.config import get_option from pandas.indexes.base import Index, _index_shared_docs import pandas.core.base as base -import pandas.core.common as com import pandas.core.missing as missing import pandas.indexes.base as ibase @@ -49,7 +55,7 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None, if name is None and hasattr(data, 'name'): name = data.name - if isinstance(data, com.ABCCategorical): + if isinstance(data, ABCCategorical): data = cls._create_categorical(cls, data, categories, ordered) elif isinstance(data, CategoricalIndex): data = data._data @@ -58,7 +64,7 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None, # don't allow scalars # if data is None, then categories must be provided - if lib.isscalar(data): + if is_scalar(data): if data is not None or categories is None: cls._scalar_data_error(data) data = [] @@ -116,7 +122,7 @@ def _create_categorical(self, data, categories=None, ordered=None): ------- Categorical """ - if not isinstance(data, com.ABCCategorical): + if not isinstance(data, ABCCategorical): from pandas.core.categorical import Categorical data = Categorical(data, categories=categories, ordered=ordered) else: @@ -164,7 +170,7 @@ def _is_dtype_compat(self, other): ------ TypeError if the dtypes are not compatible """ - if com.is_categorical_dtype(other): + if is_categorical_dtype(other): if isinstance(other, CategoricalIndex): other = other._values if not other.is_dtype_equal(self): @@ -172,7 +178,7 @@ def _is_dtype_compat(self, other): "when appending") else: values = other - if not com.is_list_like(values): + if not is_list_like(values): values = [values] other = CategoricalIndex(self._create_categorical( self, other, categories=self.categories, ordered=self.ordered)) @@ -191,7 +197,7 @@ def equals(self, other): try: other = self._is_dtype_compat(other) - return com.array_equivalent(self._data, other) + return array_equivalent(self._data, other) except (TypeError, ValueError): pass @@ -360,7 +366,7 @@ def reindex(self, target, method=None, level=None, limit=None, target = ibase._ensure_index(target) - if not com.is_categorical_dtype(target) and not target.is_unique: + if not is_categorical_dtype(target) and not target.is_unique: raise ValueError("cannot reindex with a non-unique indexer") indexer, missing = self.get_indexer_non_unique(np.array(target)) @@ -388,7 +394,7 @@ def reindex(self, target, method=None, level=None, limit=None, # unless we had an inital Categorical to begin with # in which case we are going to conform to the passed Categorical new_target = np.asarray(new_target) - if com.is_categorical_dtype(target): + if is_categorical_dtype(target): new_target = target._shallow_copy(new_target, name=self.name) else: new_target = Index(new_target, name=self.name) @@ -460,7 +466,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): codes = self.categories.get_indexer(target) indexer, _ = self._engine.get_indexer_non_unique(codes) - return com._ensure_platform_int(indexer) + return _ensure_platform_int(indexer) def get_indexer_non_unique(self, target): """ this is the same for a CategoricalIndex for get_indexer; the API @@ -491,7 +497,7 @@ def _convert_list_indexer(self, keyarr, kind=None): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) taken = self._assert_take_fillable(self.codes, indices, allow_fill=allow_fill, fill_value=fill_value, @@ -591,12 +597,12 @@ def _evaluate_compare(self, other): self, other._values, categories=self.categories, ordered=self.ordered) - if isinstance(other, (com.ABCCategorical, np.ndarray, - com.ABCSeries)): + if isinstance(other, (ABCCategorical, np.ndarray, + ABCSeries)): if len(self.values) != len(other): raise ValueError("Lengths must match to compare") - if isinstance(other, com.ABCCategorical): + if isinstance(other, ABCCategorical): if not self.values.is_dtype_equal(other): raise TypeError("categorical index comparisions must " "have the same categories and ordered " @@ -619,7 +625,7 @@ def _delegate_method(self, name, *args, **kwargs): if 'inplace' in kwargs: raise ValueError("cannot use inplace with CategoricalIndex") res = method(*args, **kwargs) - if lib.isscalar(res): + if is_scalar(res): return res return CategoricalIndex(res, name=self.name) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 05b2045a4850f..365a971f82a3b 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -13,6 +13,21 @@ from pandas.compat import range, zip, lrange, lzip, map from pandas.compat.numpy import function as nv from pandas import compat + + +from pandas.types.common import (_ensure_int64, + _ensure_platform_int, + is_object_dtype, + is_iterator, + is_list_like, + is_scalar) +from pandas.types.missing import isnull, array_equivalent +from pandas.core.common import (_values_from_object, + is_bool_indexer, + is_null_slice, + PerformanceWarning) + + from pandas.core.base import FrozenList import pandas.core.base as base from pandas.util.decorators import (Appender, cache_readonly, @@ -21,13 +36,6 @@ import pandas.core.missing as missing import pandas.core.algorithms as algos from pandas.formats.printing import pprint_thing -from pandas.core.common import (isnull, array_equivalent, - is_object_dtype, - _values_from_object, - is_iterator, - _ensure_int64, is_bool_indexer, - is_list_like, is_null_slice, - PerformanceWarning) from pandas.core.config import get_option @@ -798,7 +806,7 @@ def lexsort_depth(self): else: return 0 - int64_labels = [com._ensure_int64(lab) for lab in self.labels] + int64_labels = [_ensure_int64(lab) for lab in self.labels] for k in range(self.nlevels, 0, -1): if lib.is_lexsorted(int64_labels[:k]): return k @@ -984,7 +992,7 @@ def __setstate__(self, state): self._reset_identity() def __getitem__(self, key): - if lib.isscalar(key): + if is_scalar(key): retval = [] for lev, lab in zip(self.levels, self.labels): if lab[key] == -1: @@ -1011,7 +1019,7 @@ def __getitem__(self, key): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) taken = self._assert_take_fillable(self.labels, indices, allow_fill=allow_fill, fill_value=fill_value, @@ -1313,7 +1321,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): if not ascending: indexer = indexer[::-1] - indexer = com._ensure_platform_int(indexer) + indexer = _ensure_platform_int(indexer) new_labels = [lab.take(indexer) for lab in self.labels] new_index = MultiIndex(labels=new_labels, levels=self.levels, @@ -1377,7 +1385,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): else: indexer = self_index._engine.get_indexer(target._values) - return com._ensure_platform_int(indexer) + return _ensure_platform_int(indexer) def reindex(self, target, method=None, level=None, limit=None, tolerance=None): @@ -1759,7 +1767,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): # selected from pandas import Series mapper = Series(indexer) - indexer = labels.take(com._ensure_platform_int(indexer)) + indexer = labels.take(_ensure_platform_int(indexer)) result = Series(Index(indexer).isin(r).nonzero()[0]) m = result.map(mapper)._values diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 89fc05fdcc5f5..86d22e141f781 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -3,13 +3,15 @@ import pandas.algos as _algos import pandas.index as _index +from pandas.types.common import (is_dtype_equal, pandas_dtype, + is_float_dtype, is_object_dtype, + is_integer_dtype, is_scalar) +from pandas.types.missing import array_equivalent, isnull +from pandas.core.common import _values_from_object + from pandas import compat from pandas.indexes.base import Index, InvalidIndexError, _index_shared_docs from pandas.util.decorators import Appender, cache_readonly -import pandas.core.common as com -from pandas.core.common import (is_dtype_equal, isnull, pandas_dtype, - is_float_dtype, is_object_dtype, - is_integer_dtype) import pandas.indexes.base as ibase @@ -164,8 +166,8 @@ def equals(self, other): if self.is_(other): return True - return com.array_equivalent(com._values_from_object(self), - com._values_from_object(other)) + return array_equivalent(_values_from_object(self), + _values_from_object(other)) def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None @@ -287,17 +289,17 @@ def _format_native_types(self, na_rep='', float_format=None, decimal='.', def get_value(self, series, key): """ we always want to get an index value, never a value """ - if not lib.isscalar(key): + if not is_scalar(key): raise InvalidIndexError from pandas.core.indexing import maybe_droplevels from pandas.core.series import Series - k = com._values_from_object(key) + k = _values_from_object(key) loc = self.get_loc(k) - new_values = com._values_from_object(series)[loc] + new_values = _values_from_object(series)[loc] - if lib.isscalar(new_values) or new_values is None: + if is_scalar(new_values) or new_values is None: return new_values new_index = self[loc] diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index 168143fdea047..f680d2da0161e 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -4,14 +4,16 @@ import numpy as np import pandas.index as _index +from pandas.types.common import (is_integer, + is_scalar, + is_int64_dtype) + from pandas import compat from pandas.compat import lrange, range from pandas.compat.numpy import function as nv from pandas.indexes.base import Index, _index_shared_docs from pandas.util.decorators import Appender, cache_readonly -import pandas.core.common as com import pandas.indexes.base as ibase -import pandas.lib as lib from pandas.indexes.numeric import Int64Index @@ -120,7 +122,7 @@ def _simple_new(cls, start, stop=None, step=None, name=None, result = object.__new__(cls) # handle passed None, non-integers - if start is None or not com.is_integer(start): + if start is None or not is_integer(start): try: return RangeIndex(start, stop, step, name=name, **kwargs) except TypeError: @@ -139,7 +141,7 @@ def _simple_new(cls, start, stop=None, step=None, name=None, @staticmethod def _validate_dtype(dtype): """ require dtype to be None or int64 """ - if not (dtype is None or com.is_int64_dtype(dtype)): + if not (dtype is None or is_int64_dtype(dtype)): raise TypeError('Invalid to pass a non-int64 dtype to RangeIndex') @cache_readonly @@ -448,7 +450,7 @@ def __getitem__(self, key): """ super_getitem = super(RangeIndex, self).__getitem__ - if lib.isscalar(key): + if is_scalar(key): n = int(key) if n != key: return super_getitem(key) @@ -510,7 +512,7 @@ def __getitem__(self, key): return super_getitem(key) def __floordiv__(self, other): - if com.is_integer(other): + if is_integer(other): if (len(self) == 0 or self._start % other == 0 and self._step % other == 0): @@ -560,7 +562,7 @@ def _evaluate_numeric_binop(self, other): # we don't have a representable op # so return a base index - if not com.is_integer(rstep) or not rstep: + if not is_integer(rstep) or not rstep: raise ValueError else: @@ -577,7 +579,7 @@ def _evaluate_numeric_binop(self, other): # for compat with numpy / Int64Index # even if we can represent as a RangeIndex, return # as a Float64Index if we have float-like descriptors - if not all([com.is_integer(x) for x in + if not all([is_integer(x) for x in [rstart, rstop, rstep]]): result = result.astype('float64') diff --git a/pandas/io/common.py b/pandas/io/common.py index 76395928eb011..6f9bddd0fdf9b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -11,8 +11,8 @@ from pandas.compat import StringIO, BytesIO, string_types, text_type from pandas import compat from pandas.formats.printing import pprint_thing -from pandas.core.common import is_number, AbstractMethodError - +from pandas.core.common import AbstractMethodError +from pandas.types.common import is_number try: import pathlib diff --git a/pandas/io/data.py b/pandas/io/data.py index 5fa440e7bb1ff..68151fbb091fa 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -19,7 +19,9 @@ ) import pandas.compat as compat from pandas import Panel, DataFrame, Series, read_csv, concat, to_datetime, DatetimeIndex, DateOffset -from pandas.core.common import is_list_like, PandasError + +from pandas.types.common import is_list_like +from pandas.core.common import PandasError from pandas.io.common import urlopen, ZipFile, urlencode from pandas.tseries.offsets import MonthEnd from pandas.util.testing import _network_error_classes diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 775465ea9372d..703cdbeaa7a8f 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -10,6 +10,9 @@ import abc import numpy as np +from pandas.types.common import (is_integer, is_float, + is_bool, is_list_like) + from pandas.core.frame import DataFrame from pandas.io.parsers import TextParser from pandas.io.common import (_is_url, _urlopen, _validate_header_arg, @@ -22,7 +25,6 @@ from pandas.formats.printing import pprint_thing import pandas.compat as compat import pandas.compat.openpyxl_compat as openpyxl_compat -import pandas.core.common as com from warnings import warn from distutils.version import LooseVersion @@ -423,17 +425,17 @@ def _parse_cell(cell_contents, cell_typ): output[asheetname] = DataFrame() continue - if com.is_list_like(header) and len(header) == 1: + if is_list_like(header) and len(header) == 1: header = header[0] # forward fill and pull out names for MultiIndex column header_names = None if header is not None: - if com.is_list_like(header): + if is_list_like(header): header_names = [] control_row = [True for x in data[0]] for row in header: - if com.is_integer(skiprows): + if is_integer(skiprows): row += skiprows data[row], control_row = _fill_mi_header( @@ -444,9 +446,9 @@ def _parse_cell(cell_contents, cell_typ): else: data[header] = _trim_excel_header(data[header]) - if com.is_list_like(index_col): + if is_list_like(index_col): # forward fill values for MultiIndex index - if not com.is_list_like(header): + if not is_list_like(header): offset = 1 + header else: offset = 1 + max(header) @@ -459,7 +461,7 @@ def _parse_cell(cell_contents, cell_typ): else: last = data[row][col] - if com.is_list_like(header) and len(header) > 1: + if is_list_like(header) and len(header) > 1: has_index_names = True # GH 12292 : error when read one empty column from excel file @@ -556,21 +558,21 @@ def _pop_header_name(row, index_col): return none_fill(row[0]), row[1:] else: # pop out header name and fill w/ blank - i = index_col if not com.is_list_like(index_col) else max(index_col) + i = index_col if not is_list_like(index_col) else max(index_col) return none_fill(row[i]), row[:i] + [''] + row[i + 1:] def _conv_value(val): # Convert numpy types to Python types for the Excel writers. - if com.is_integer(val): + if is_integer(val): val = int(val) - elif com.is_float(val): + elif is_float(val): val = float(val) - elif com.is_bool(val): + elif is_bool(val): val = bool(val) elif isinstance(val, Period): val = "%s" % val - elif com.is_list_like(val): + elif is_list_like(val): val = str(val) return val diff --git a/pandas/io/html.py b/pandas/io/html.py index 609642e248eda..e0d84a9617ae4 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -12,12 +12,12 @@ import numpy as np +from pandas.types.common import is_list_like from pandas.io.common import (EmptyDataError, _is_url, urlopen, parse_url, _validate_header_arg) from pandas.io.parsers import TextParser from pandas.compat import (lrange, lmap, u, string_types, iteritems, raise_with_traceback, binary_type) -from pandas.core import common as com from pandas import Series from pandas.core.common import AbstractMethodError from pandas.formats.printing import pprint_thing @@ -107,7 +107,7 @@ def _get_skiprows(skiprows): """ if isinstance(skiprows, slice): return lrange(skiprows.start or 0, skiprows.stop, skiprows.step or 1) - elif isinstance(skiprows, numbers.Integral) or com.is_list_like(skiprows): + elif isinstance(skiprows, numbers.Integral) or is_list_like(skiprows): return skiprows elif skiprows is None: return 0 diff --git a/pandas/io/packers.py b/pandas/io/packers.py index ff06a5f212f8b..14e2c9b371296 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -47,6 +47,10 @@ import numpy as np from pandas import compat from pandas.compat import u, u_safe + +from pandas.types.common import (is_categorical_dtype, is_object_dtype, + needs_i8_conversion, pandas_dtype) + from pandas import (Timestamp, Period, Series, DataFrame, # noqa Index, MultiIndex, Float64Index, Int64Index, Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, @@ -55,9 +59,7 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame -from pandas.core.common import (PerformanceWarning, - is_categorical_dtype, is_object_dtype, - needs_i8_conversion, pandas_dtype) +from pandas.core.common import PerformanceWarning from pandas.io.common import get_filepath_or_buffer from pandas.core.internals import BlockManager, make_block import pandas.core.internals as internals diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index dc9455289b757..84ea2a92b8026 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2,20 +2,22 @@ Module contains tools for processing files into DataFrames or other objects """ from __future__ import print_function -from pandas.compat import (range, lrange, StringIO, lzip, zip, - string_types, map, OrderedDict) -from pandas import compat from collections import defaultdict import re import csv import warnings +import datetime import numpy as np +from pandas import compat +from pandas.compat import range, lrange, StringIO, lzip, zip, string_types, map +from pandas.types.common import (is_integer, _ensure_object, + is_list_like, is_integer_dtype, + is_float, + is_scalar) from pandas.core.index import Index, MultiIndex from pandas.core.frame import DataFrame -import datetime -import pandas.core.common as com from pandas.core.common import AbstractMethodError from pandas.core.config import get_option from pandas.io.date_converters import generic_parser @@ -326,11 +328,11 @@ def _validate_nrows(nrows): msg = "'nrows' must be an integer" if nrows is not None: - if com.is_float(nrows): + if is_float(nrows): if int(nrows) != nrows: raise ValueError(msg) nrows = int(nrows) - elif not com.is_integer(nrows): + elif not is_integer(nrows): raise ValueError(msg) return nrows @@ -869,7 +871,7 @@ def _clean_options(self, options, engine): # handle skiprows; this is internally handled by the # c-engine, so only need for python parsers if engine != 'c': - if com.is_integer(skiprows): + if is_integer(skiprows): skiprows = lrange(skiprows) skiprows = set() if skiprows is None else set(skiprows) @@ -961,7 +963,7 @@ def _validate_parse_dates_arg(parse_dates): "for the 'parse_dates' parameter") if parse_dates is not None: - if lib.isscalar(parse_dates): + if is_scalar(parse_dates): if not lib.is_bool(parse_dates): raise TypeError(msg) @@ -1021,8 +1023,8 @@ def __init__(self, kwds): is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray)) if not (is_sequence and - all(map(com.is_integer, self.index_col)) or - com.is_integer(self.index_col)): + all(map(is_integer, self.index_col)) or + is_integer(self.index_col)): raise ValueError("index_col must only contain row numbers " "when specifying a multi-index header") @@ -1047,7 +1049,7 @@ def _should_parse_dates(self, i): name = self.index_names[i] j = self.index_col[i] - if lib.isscalar(self.parse_dates): + if is_scalar(self.parse_dates): return (j == self.parse_dates) or (name == self.parse_dates) else: return (j in self.parse_dates) or (name in self.parse_dates) @@ -1281,7 +1283,7 @@ def _convert_types(self, values, na_values, try_num_bool=True): mask = lib.ismember(values, na_values) na_count = mask.sum() if na_count > 0: - if com.is_integer_dtype(values): + if is_integer_dtype(values): values = values.astype(np.float64) np.putmask(values, mask, np.nan) return values, na_count @@ -1407,10 +1409,10 @@ def _set_noconvert_columns(self): usecols = self.usecols def _set(x): - if usecols and com.is_integer(x): + if usecols and is_integer(x): x = list(usecols)[x] - if not com.is_integer(x): + if not is_integer(x): x = names.index(x) self._reader.set_noconvert(x) @@ -1790,7 +1792,7 @@ def _set_no_thousands_columns(self): noconvert_columns = set() def _set(x): - if com.is_integer(x): + if is_integer(x): noconvert_columns.add(x) else: noconvert_columns.add(self.columns.index(x)) @@ -1954,7 +1956,7 @@ def _convert_data(self, data): def _to_recarray(self, data, columns): dtypes = [] - o = OrderedDict() + o = compat.OrderedDict() # use the columns to "order" the keys # in the unordered 'data' dictionary @@ -2439,7 +2441,7 @@ def converter(*date_cols): try: return tools._to_datetime( - com._ensure_object(strs), + _ensure_object(strs), utc=None, box=False, dayfirst=dayfirst, @@ -2492,7 +2494,7 @@ def _isindex(colspec): if isinstance(parse_spec, list): # list of column lists for colspec in parse_spec: - if lib.isscalar(colspec): + if is_scalar(colspec): if isinstance(colspec, int) and colspec not in data_dict: colspec = orig_names[colspec] if _isindex(colspec): @@ -2569,7 +2571,7 @@ def _clean_na_values(na_values, keep_default_na=True): (k, _floatify_na_values(v)) for k, v in na_values.items() # noqa ]) else: - if not com.is_list_like(na_values): + if not is_list_like(na_values): na_values = [na_values] na_values = _stringify_na_values(na_values) if keep_default_na: @@ -2622,7 +2624,7 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): if not isinstance(dtype, dict): dtype = defaultdict(lambda: dtype) # Convert column indexes to column names. - dtype = dict((columns[k] if com.is_integer(k) else k, v) + dtype = dict((columns[k] if is_integer(k) else k, v) for k, v in compat.iteritems(dtype)) if index_col is None or index_col is False: diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index c19dae7f3545e..2358c296f782e 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -3,7 +3,7 @@ import numpy as np from numpy.lib.format import read_array, write_array from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3 -import pandas.core.common as com +from pandas.types.common import is_datetime64_dtype, _NS_DTYPE def to_pickle(obj, path): @@ -86,7 +86,7 @@ def _unpickle_array(bytes): # All datetimes should be stored as M8[ns]. When unpickling with # numpy1.6, it will read these as M8[us]. So this ensures all # datetime64 types are read as MS[ns] - if com.is_datetime64_dtype(arr): - arr = arr.view(com._NS_DTYPE) + if is_datetime64_dtype(arr): + arr = arr.view(_NS_DTYPE) return arr diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d4ca717ddbc4e..038ca7ac7775b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -12,11 +12,21 @@ import warnings import os +from pandas.types.common import (is_list_like, + is_categorical_dtype, + is_timedelta64_dtype, + is_datetime64tz_dtype, + is_datetime64_dtype, + _ensure_object, + _ensure_int64, + _ensure_platform_int) +from pandas.types.missing import array_equivalent + import numpy as np import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index) + MultiIndex, Int64Index, isnull) from pandas.core import config from pandas.io.common import _stringify_path from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel @@ -32,7 +42,6 @@ _block2d_to_blocknd, _factor_indexer, _block_shape) from pandas.core.index import _ensure_index -import pandas.core.common as com from pandas.tools.merge import concat from pandas import compat from pandas.compat import u_safe as u, PY3, range, lrange, string_types, filter @@ -1677,7 +1686,7 @@ def validate_metadata(self, handler): new_metadata = self.metadata cur_metadata = handler.read_metadata(self.cname) if new_metadata is not None and cur_metadata is not None \ - and not com.array_equivalent(new_metadata, cur_metadata): + and not array_equivalent(new_metadata, cur_metadata): raise ValueError("cannot append a categorical with " "different categories to the existing") @@ -2566,7 +2575,7 @@ def write_array(self, key, value, items=None): empty_array = self._is_empty_array(value.shape) transposed = False - if com.is_categorical_dtype(value): + if is_categorical_dtype(value): raise NotImplementedError('Cannot store a category dtype in ' 'a HDF5 dataset that uses format=' '"fixed". Use format="table".') @@ -2621,12 +2630,12 @@ def write_array(self, key, value, items=None): if empty_array: self.write_array_empty(key, value) else: - if com.is_datetime64_dtype(value.dtype): + if is_datetime64_dtype(value.dtype): self._handle.create_array( self.group, key, value.view('i8')) getattr( self.group, key)._v_attrs.value_type = 'datetime64' - elif com.is_datetime64tz_dtype(value.dtype): + elif is_datetime64tz_dtype(value.dtype): # store as UTC # with a zone self._handle.create_array(self.group, key, @@ -2635,7 +2644,7 @@ def write_array(self, key, value, items=None): node = getattr(self.group, key) node._v_attrs.tz = _get_tz(value.tz) node._v_attrs.value_type = 'datetime64' - elif com.is_timedelta64_dtype(value.dtype): + elif is_timedelta64_dtype(value.dtype): self._handle.create_array( self.group, key, value.view('i8')) getattr( @@ -3756,8 +3765,8 @@ def read(self, where=None, columns=None, **kwargs): if len(unique(key)) == len(key): sorter, _ = algos.groupsort_indexer( - com._ensure_int64(key), np.prod(N)) - sorter = com._ensure_platform_int(sorter) + _ensure_int64(key), np.prod(N)) + sorter = _ensure_platform_int(sorter) # create the objs for c in self.values_axes: @@ -3802,7 +3811,7 @@ def read(self, where=None, columns=None, **kwargs): unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) - indexer = com._ensure_platform_int(indexer) + indexer = _ensure_platform_int(indexer) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) @@ -3903,7 +3912,7 @@ def write_data(self, chunksize, dropna=False): # figure the mask: only do if we can successfully process this # column, otherwise ignore the mask - mask = com.isnull(a.data).all(axis=0) + mask = isnull(a.data).all(axis=0) if isinstance(mask, np.ndarray): masks.append(mask.astype('u1', copy=False)) @@ -4522,7 +4531,7 @@ def _convert_string_array(data, encoding, itemsize=None): # create the sized dtype if itemsize is None: - itemsize = lib.max_len_string_array(com._ensure_object(data.ravel())) + itemsize = lib.max_len_string_array(_ensure_object(data.ravel())) data = np.asarray(data, dtype="S%d" % itemsize) return data @@ -4551,7 +4560,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None): encoding = _ensure_encoding(encoding) if encoding is not None and len(data): - itemsize = lib.max_len_string_array(com._ensure_object(data)) + itemsize = lib.max_len_string_array(_ensure_object(data)) if compat.PY3: dtype = "U{0}".format(itemsize) else: @@ -4619,7 +4628,7 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs): self.terms = None self.coordinates = None - if com.is_list_like(where): + if is_list_like(where): # see if we have a passed coordinate like try: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 324988360c9fe..8485a3f13f047 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -13,13 +13,15 @@ import numpy as np import pandas.lib as lib -import pandas.core.common as com +from pandas.types.missing import isnull +from pandas.types.dtypes import DatetimeTZDtype +from pandas.types.common import (is_list_like, + is_datetime64tz_dtype) + from pandas.compat import (lzip, map, zip, raise_with_traceback, string_types, text_type) from pandas.core.api import DataFrame, Series -from pandas.core.common import isnull from pandas.core.base import PandasObject -from pandas.types.api import DatetimeTZDtype from pandas.tseries.tools import to_datetime from contextlib import contextmanager @@ -90,7 +92,7 @@ def _handle_date_column(col, format=None): # parse dates as timestamp format = 's' if format is None else format return to_datetime(col, errors='coerce', unit=format, utc=True) - elif com.is_datetime64tz_dtype(col): + elif is_datetime64tz_dtype(col): # coerce to UTC timezone # GH11216 return (to_datetime(col, errors='coerce') @@ -123,7 +125,7 @@ def _parse_date_columns(data_frame, parse_dates): # we could in theory do a 'nice' conversion from a FixedOffset tz # GH11216 for col_name, df_col in data_frame.iteritems(): - if com.is_datetime64tz_dtype(df_col): + if is_datetime64tz_dtype(df_col): data_frame[col_name] = _handle_date_column(df_col) return data_frame @@ -876,7 +878,7 @@ def _create_table_setup(self): for name, typ, is_index in column_names_and_types] if self.keys is not None: - if not com.is_list_like(self.keys): + if not is_list_like(self.keys): keys = [self.keys] else: keys = self.keys @@ -1465,7 +1467,7 @@ def _create_table_setup(self): for cname, ctype, _ in column_names_and_types] if self.keys is not None and len(self.keys): - if not com.is_list_like(self.keys): + if not is_list_like(self.keys): keys = [self.keys] else: keys = self.keys diff --git a/pandas/io/stata.py b/pandas/io/stata.py index c7390cf240f8a..bd19102c7f18c 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -14,6 +14,10 @@ import sys import struct from dateutil.relativedelta import relativedelta + +from pandas.types.common import (is_categorical_dtype, is_datetime64_dtype, + _ensure_object) + from pandas.core.base import StringMixin from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame @@ -24,7 +28,7 @@ zip, BytesIO from pandas.util.decorators import Appender import pandas as pd -import pandas.core.common as com + from pandas.io.common import get_filepath_or_buffer, BaseIterator from pandas.lib import max_len_string_array, infer_dtype from pandas.tslib import NaT, Timestamp @@ -358,7 +362,7 @@ def _datetime_to_stata_elapsed_vec(dates, fmt): def parse_dates_safe(dates, delta=False, year=False, days=False): d = {} - if com.is_datetime64_dtype(dates.values): + if is_datetime64_dtype(dates.values): if delta: delta = dates - stata_epoch d['delta'] = delta.values.astype( @@ -396,7 +400,7 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): index = dates.index if bad_loc.any(): dates = Series(dates) - if com.is_datetime64_dtype(dates): + if is_datetime64_dtype(dates): dates[bad_loc] = to_datetime(stata_epoch) else: dates[bad_loc] = stata_epoch @@ -1746,7 +1750,7 @@ def _dtype_to_stata_type(dtype, column): elif dtype.type == np.object_: # try to coerce it to the biggest string # not memory efficient, what else could we # do? - itemsize = max_len_string_array(com._ensure_object(column.values)) + itemsize = max_len_string_array(_ensure_object(column.values)) return chr(max(itemsize, 1)) elif dtype == np.float64: return chr(255) @@ -1784,7 +1788,7 @@ def _dtype_to_default_stata_fmt(dtype, column): if not (inferred_dtype in ('string', 'unicode') or len(column) == 0): raise ValueError('Writing general object arrays is not supported') - itemsize = max_len_string_array(com._ensure_object(column.values)) + itemsize = max_len_string_array(_ensure_object(column.values)) if itemsize > 244: raise ValueError(excessive_string_length_error % column.name) return "%" + str(max(itemsize, 1)) + "s" @@ -1880,7 +1884,7 @@ def _prepare_categoricals(self, data): """Check for categorical columns, retain categorical information for Stata file and convert categorical data to int""" - is_cat = [com.is_categorical_dtype(data[col]) for col in data] + is_cat = [is_categorical_dtype(data[col]) for col in data] self._is_col_cat = is_cat self._value_labels = [] if not any(is_cat): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 9a995c17f0445..e5a49c5213a48 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -31,11 +31,12 @@ from datetime import datetime, date, time +from pandas.types.common import (is_object_dtype, is_datetime64_dtype, + is_datetime64tz_dtype) from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat from pandas import date_range, to_datetime, to_timedelta, Timestamp import pandas.compat as compat from pandas.compat import StringIO, range, lrange, string_types -from pandas.core import common as com from pandas.core.datetools import format as date_format import pandas.io.sql as sql @@ -1275,7 +1276,7 @@ def test_datetime_with_timezone(self): def check(col): # check that a column is either datetime64[ns] # or datetime64[ns, UTC] - if com.is_datetime64_dtype(col.dtype): + if is_datetime64_dtype(col.dtype): # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" @@ -1285,7 +1286,7 @@ def check(col): # "2000-06-01 07:00:00" self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00')) - elif com.is_datetime64tz_dtype(col.dtype): + elif is_datetime64tz_dtype(col.dtype): self.assertTrue(str(col.dt.tz) == 'UTC') # "2000-01-01 00:00:00-08:00" should convert to @@ -1311,9 +1312,9 @@ def check(col): # even with the same versions of psycopg2 & sqlalchemy, possibly a # Postgrsql server version difference col = df.DateColWithTz - self.assertTrue(com.is_object_dtype(col.dtype) or - com.is_datetime64_dtype(col.dtype) or - com.is_datetime64tz_dtype(col.dtype), + self.assertTrue(is_object_dtype(col.dtype) or + is_datetime64_dtype(col.dtype) or + is_datetime64tz_dtype(col.dtype), "DateCol loaded with incorrect type -> {0}" .format(col.dtype)) @@ -1327,7 +1328,7 @@ def check(col): self.conn, chunksize=1)), ignore_index=True) col = df.DateColWithTz - self.assertTrue(com.is_datetime64tz_dtype(col.dtype), + self.assertTrue(is_datetime64tz_dtype(col.dtype), "DateCol loaded with incorrect type -> {0}" .format(col.dtype)) self.assertTrue(str(col.dt.tz) == 'UTC') diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 830c68d62efad..5f45d1b547e62 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -15,7 +15,7 @@ import pandas as pd from pandas.compat import iterkeys from pandas.core.frame import DataFrame, Series -from pandas.core.common import is_categorical_dtype +from pandas.types.common import is_categorical_dtype from pandas.io.parsers import read_csv from pandas.io.stata import (read_stata, StataReader, InvalidColumnName, PossiblePrecisionLoss, StataMissingValue) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 0312fb023f7fd..35233d1b6ba94 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -15,6 +15,14 @@ from pandas.compat import range from pandas.compat.numpy import function as nv +from pandas.types.generic import ABCSparseArray, ABCSparseSeries +from pandas.types.common import (is_float, is_integer, + is_integer_dtype, _ensure_platform_int, + is_list_like, + is_scalar) +from pandas.types.cast import _possibly_convert_platform +from pandas.types.missing import isnull, notnull + from pandas._sparse import SparseIndex, BlockIndex, IntIndex import pandas._sparse as splib import pandas.index as _index @@ -40,13 +48,13 @@ def wrapper(self, other): if len(self) != len(other): raise AssertionError("length mismatch: %d vs. %d" % (len(self), len(other))) - if not isinstance(other, com.ABCSparseArray): + if not isinstance(other, ABCSparseArray): other = SparseArray(other, fill_value=self.fill_value) if name[0] == 'r': return _sparse_array_op(other, self, op, name[1:]) else: return _sparse_array_op(self, other, op, name) - elif lib.isscalar(other): + elif is_scalar(other): new_fill_value = op(np.float64(self.fill_value), np.float64(other)) return _wrap_result(name, op(self.sp_values, other), @@ -120,7 +128,7 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer', if index is not None: if data is None: data = np.nan - if not lib.isscalar(data): + if not is_scalar(data): raise Exception("must only pass scalars with an index ") values = np.empty(len(index), dtype='float64') values.fill(data) @@ -177,7 +185,7 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer', @classmethod def _simple_new(cls, data, sp_index, fill_value): - if (com.is_integer_dtype(data) and com.is_float(fill_value) and + if (is_integer_dtype(data) and is_float(fill_value) and sp_index.ngaps > 0): # if float fill_value is being included in dense repr, # convert values to float @@ -288,7 +296,7 @@ def __getitem__(self, key): """ """ - if com.is_integer(key): + if is_integer(key): return self._get_val_at(key) elif isinstance(key, tuple): data_slice = self.values[key] @@ -340,11 +348,11 @@ def take(self, indices, axis=0, allow_fill=True, if axis: raise ValueError("axis must be 0, input was {0}".format(axis)) - if com.is_integer(indices): + if is_integer(indices): # return scalar return self[indices] - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) n = len(self) if allow_fill and fill_value is not None: # allow -1 to indicate self.fill_value, @@ -380,7 +388,7 @@ def take(self, indices, axis=0, allow_fill=True, return self._simple_new(new_values, sp_index, self.fill_value) def __setitem__(self, key, value): - # if com.is_integer(key): + # if is_integer(key): # self.values[key] = value # else: # raise Exception("SparseArray does not support seting non-scalars @@ -395,7 +403,7 @@ def __setslice__(self, i, j, value): j = 0 slobj = slice(i, j) # noqa - # if not lib.isscalar(value): + # if not is_scalar(value): # raise Exception("SparseArray does not support seting non-scalars # via slices") @@ -445,12 +453,12 @@ def count(self): @property def _null_fill_value(self): - return com.isnull(self.fill_value) + return isnull(self.fill_value) @property def _valid_sp_values(self): sp_vals = self.sp_values - mask = com.notnull(sp_vals) + mask = notnull(sp_vals) return sp_vals[mask] @Appender(_index_shared_docs['fillna'] % _sparray_doc_kwargs) @@ -466,7 +474,7 @@ def fillna(self, value, downcast=None): fill_value=value) else: new_values = self.sp_values.copy() - new_values[com.isnull(new_values)] = value + new_values[isnull(new_values)] = value return self._simple_new(new_values, self.sp_index, fill_value=self.fill_value) @@ -498,7 +506,7 @@ def cumsum(self, axis=0, *args, **kwargs): nv.validate_cumsum(args, kwargs) # TODO: gh-12855 - return a SparseArray here - if com.notnull(self.fill_value): + if notnull(self.fill_value): return self.to_dense().cumsum() # TODO: what if sp_values contains NaN?? @@ -569,7 +577,7 @@ def _maybe_to_dense(obj): def _maybe_to_sparse(array): - if isinstance(array, com.ABCSparseSeries): + if isinstance(array, ABCSparseSeries): array = SparseArray(array.values, sparse_index=array.sp_index, fill_value=array.fill_value, copy=True) if not isinstance(array, SparseArray): @@ -588,15 +596,15 @@ def _sanitize_values(arr): else: # scalar - if lib.isscalar(arr): + if is_scalar(arr): arr = [arr] # ndarray if isinstance(arr, np.ndarray): pass - elif com.is_list_like(arr) and len(arr) > 0: - arr = com._possibly_convert_platform(arr) + elif is_list_like(arr) and len(arr) > 0: + arr = _possibly_convert_platform(arr) else: arr = np.asarray(arr) @@ -624,8 +632,8 @@ def make_sparse(arr, kind='block', fill_value=nan): if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") - if com.isnull(fill_value): - mask = com.notnull(arr) + if isnull(fill_value): + mask = notnull(arr) else: mask = arr != fill_value diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 52a6e6edf0896..811d8019c7fee 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -10,13 +10,15 @@ from pandas import compat import numpy as np +from pandas.types.missing import isnull, notnull +from pandas.types.common import _ensure_platform_int + +from pandas.core.common import _try_sort from pandas.compat.numpy import function as nv -from pandas.core.common import isnull, _try_sort from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.series import Series from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray, _default_index) -import pandas.core.common as com import pandas.core.algorithms as algos from pandas.core.internals import (BlockManager, create_block_manager_from_arrays) @@ -520,7 +522,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, return SparseDataFrame(index=index, columns=self.columns) indexer = self.index.get_indexer(index, method, limit=limit) - indexer = com._ensure_platform_int(indexer) + indexer = _ensure_platform_int(indexer) mask = indexer == -1 need_mask = mask.any() @@ -546,7 +548,7 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None, if level is not None: raise TypeError('Reindex by level not supported for sparse') - if com.notnull(fill_value): + if notnull(fill_value): raise NotImplementedError("'fill_value' argument is not supported") if limit: diff --git a/pandas/sparse/list.py b/pandas/sparse/list.py index bc10b73a47723..666dae8071053 100644 --- a/pandas/sparse/list.py +++ b/pandas/sparse/list.py @@ -2,9 +2,9 @@ from pandas.core.base import PandasObject from pandas.formats.printing import pprint_thing +from pandas.types.common import is_scalar from pandas.sparse.array import SparseArray import pandas._sparse as splib -import pandas.lib as lib class SparseList(PandasObject): @@ -121,7 +121,7 @@ def append(self, value): ---------- value: scalar or array-like """ - if lib.isscalar(value): + if is_scalar(value): value = [value] sparr = SparseArray(value, fill_value=self.fill_value) diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 88f396d20a91e..0996cd3bd826a 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -10,6 +10,7 @@ from pandas import compat import numpy as np +from pandas.types.common import is_list_like, is_scalar from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.frame import DataFrame from pandas.core.panel import Panel @@ -18,7 +19,6 @@ import pandas.core.common as com import pandas.core.ops as ops -import pandas.lib as lib class SparsePanelAxis(object): @@ -186,7 +186,7 @@ def _ixs(self, i, axis=0): key = self._get_axis(axis)[i] # xs cannot handle a non-scalar key, so just reindex here - if com.is_list_like(key): + if is_list_like(key): return self.reindex(**{self._get_axis_name(axis): key}) return self.xs(key, axis=axis) @@ -393,7 +393,7 @@ def _combine(self, other, func, axis=0): return self._combineFrame(other, func, axis=axis) elif isinstance(other, Panel): return self._combinePanel(other, func) - elif lib.isscalar(other): + elif is_scalar(other): new_frames = dict((k, func(v, other)) for k, v in self.iteritems()) return self._new_like(new_frames) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 5c7762c56ec6d..951c2ae0c0d5a 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -8,8 +8,11 @@ import numpy as np import warnings +from pandas.types.missing import isnull +from pandas.types.common import is_scalar +from pandas.core.common import _values_from_object, _maybe_match_name + from pandas.compat.numpy import function as nv -from pandas.core.common import isnull, _values_from_object, _maybe_match_name from pandas.core.index import Index, _ensure_index, InvalidIndexError from pandas.core.series import Series from pandas.core.frame import DataFrame @@ -18,7 +21,6 @@ import pandas.core.common as com import pandas.core.ops as ops import pandas.index as _index -import pandas.lib as lib from pandas.util.decorators import Appender from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray, @@ -54,7 +56,7 @@ def wrapper(self, other): return _sparse_series_op(self, other, op, name) elif isinstance(other, DataFrame): return NotImplemented - elif lib.isscalar(other): + elif is_scalar(other): if isnull(other) or isnull(self.fill_value): new_fill_value = np.nan else: diff --git a/pandas/src/testing.pyx b/pandas/src/testing.pyx index 6780cf311c244..e9563d9168206 100644 --- a/pandas/src/testing.pyx +++ b/pandas/src/testing.pyx @@ -1,7 +1,8 @@ import numpy as np from pandas import compat -from pandas.core.common import isnull, array_equivalent, is_dtype_equal +from pandas.types.missing import isnull, array_equivalent +from pandas.types.common import is_dtype_equal cdef NUMERIC_TYPES = ( bool, @@ -145,8 +146,15 @@ cpdef assert_almost_equal(a, b, if na != nb: from pandas.util.testing import raise_assert_detail + + # if we have a small diff set, print it + if abs(na-nb) < 10: + r = list(set(a) ^ set(b)) + else: + r = None + raise_assert_detail(obj, '{0} length are different'.format(obj), - na, nb) + na, nb, r) for i in xrange(len(a)): try: diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 46d30ab7fe313..bb475e47206c2 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -6,7 +6,7 @@ import warnings import numpy as np -from pandas import lib +from pandas.types.common import is_scalar from pandas.core.api import DataFrame, Series from pandas.util.decorators import Substitution, Appender @@ -226,7 +226,7 @@ def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): aargs += ',' def f(a, b): - if lib.isscalar(b): + if is_scalar(b): return "{a}={b}".format(a=a, b=b) return "{a}=<{b}>".format(a=a, b=type(b).__name__) aargs = ','.join([f(a, b) for a, b in kwds.items() if b is not None]) diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index 678689f2d2b30..b533d255bd196 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -13,7 +13,7 @@ from pandas.core.api import DataFrame, Series, isnull from pandas.core.base import StringMixin -from pandas.core.common import _ensure_float64 +from pandas.types.common import _ensure_float64 from pandas.core.index import MultiIndex from pandas.core.panel import Panel from pandas.util.decorators import cache_readonly diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 2b619b84a5994..020b7f1f1ab9d 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -10,7 +10,7 @@ from pandas import (notnull, DataFrame, Series, MultiIndex, date_range, Timestamp, compat) import pandas as pd -import pandas.core.common as com +from pandas.types.dtypes import CategoricalDtype from pandas.util.testing import (assert_series_equal, assert_frame_equal) import pandas.util.testing as tm @@ -45,8 +45,8 @@ def test_apply(self): 'c1': ['C', 'C', 'D', 'D']}) df = df.apply(lambda ts: ts.astype('category')) self.assertEqual(df.shape, (4, 2)) - self.assertTrue(isinstance(df['c0'].dtype, com.CategoricalDtype)) - self.assertTrue(isinstance(df['c1'].dtype, com.CategoricalDtype)) + self.assertTrue(isinstance(df['c0'].dtype, CategoricalDtype)) + self.assertTrue(isinstance(df['c1'].dtype, CategoricalDtype)) def test_apply_mixed_datetimelike(self): # mixed datetimelike diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b42aef9447373..d21db5ba52a45 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -14,6 +14,7 @@ import numpy.ma as ma import numpy.ma.mrecords as mrecords +from pandas.types.common import is_integer_dtype from pandas.compat import (lmap, long, zip, range, lrange, lzip, OrderedDict, is_platform_little_endian) from pandas import compat @@ -809,7 +810,7 @@ def test_constructor_list_of_lists(self): # GH #484 l = [[1, 'a'], [2, 'b']] df = DataFrame(data=l, columns=["num", "str"]) - self.assertTrue(com.is_integer_dtype(df['num'])) + self.assertTrue(is_integer_dtype(df['num'])) self.assertEqual(df['str'].dtype, np.object_) # GH 4851 diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 5f95ff6b6b601..c650436eefaf3 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -1,15 +1,13 @@ # -*- coding: utf-8 -*- from __future__ import print_function - from datetime import timedelta import numpy as np - from pandas import (DataFrame, Series, date_range, Timedelta, Timestamp, compat, option_context) from pandas.compat import u -from pandas.core import common as com +from pandas.types.dtypes import DatetimeTZDtype from pandas.tests.frame.common import TestData from pandas.util.testing import (assert_series_equal, assert_frame_equal, @@ -84,8 +82,8 @@ def test_datetime_with_tz_dtypes(self): tzframe.iloc[1, 2] = pd.NaT result = tzframe.dtypes.sort_index() expected = Series([np.dtype('datetime64[ns]'), - com.DatetimeTZDtype('datetime64[ns, US/Eastern]'), - com.DatetimeTZDtype('datetime64[ns, CET]')], + DatetimeTZDtype('datetime64[ns, US/Eastern]'), + DatetimeTZDtype('datetime64[ns, CET]')], ['A', 'B', 'C']) assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index d7fed8131a4f4..578df5ba9101e 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -17,6 +17,9 @@ date_range) import pandas as pd +from pandas.types.common import (is_float_dtype, + is_integer, + is_scalar) from pandas.util.testing import (assert_almost_equal, assert_numpy_array_equal, assert_series_equal, @@ -26,7 +29,6 @@ from pandas.core.indexing import IndexingError import pandas.util.testing as tm -import pandas.lib as lib from pandas.tests.frame.common import TestData @@ -1419,15 +1421,15 @@ def test_setitem_single_column_mixed_datetime(self): # set an allowable datetime64 type from pandas import tslib df.ix['b', 'timestamp'] = tslib.iNaT - self.assertTrue(com.isnull(df.ix['b', 'timestamp'])) + self.assertTrue(isnull(df.ix['b', 'timestamp'])) # allow this syntax df.ix['c', 'timestamp'] = nan - self.assertTrue(com.isnull(df.ix['c', 'timestamp'])) + self.assertTrue(isnull(df.ix['c', 'timestamp'])) # allow this syntax df.ix['d', :] = nan - self.assertTrue(com.isnull(df.ix['c', :]).all() == False) # noqa + self.assertTrue(isnull(df.ix['c', :]).all() == False) # noqa # as of GH 3216 this will now work! # try to set with a list like item @@ -1619,7 +1621,7 @@ def test_set_value_resize(self): res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 5) - self.assertTrue(com.is_float_dtype(res3['baz'])) + self.assertTrue(is_float_dtype(res3['baz'])) self.assertTrue(isnull(res3['baz'].drop(['foobar'])).all()) self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') @@ -1662,7 +1664,7 @@ def test_single_element_ix_dont_upcast(self): (int, np.integer))) result = self.frame.ix[self.frame.index[5], 'E'] - self.assertTrue(com.is_integer(result)) + self.assertTrue(is_integer(result)) def test_irow(self): df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2)) @@ -2268,7 +2270,7 @@ def _check_align(df, cond, other, check_dtypes=True): d = df[k].values c = cond[k].reindex(df[k].index).fillna(False).values - if lib.isscalar(other): + if is_scalar(other): o = other else: if isinstance(other, np.ndarray): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index a6246790f83cb..44c7f2277293d 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -6,6 +6,9 @@ import warnings from datetime import datetime +from pandas.types.common import (is_integer_dtype, + is_float_dtype, + is_scalar) from pandas.compat import range, lrange, lzip, StringIO, lmap, map from pandas.tslib import NaT from numpy import nan @@ -22,7 +25,7 @@ assert_frame_equal, assert_panel_equal, assert_attr_equal, slow) from pandas.formats.printing import pprint_thing -from pandas import concat, lib +from pandas import concat from pandas.core.common import PerformanceWarning import pandas.util.testing as tm @@ -200,7 +203,7 @@ def _print(result, error=None): return try: - if lib.isscalar(rs) and lib.isscalar(xp): + if is_scalar(rs) and is_scalar(xp): self.assertEqual(rs, xp) elif xp.ndim == 1: assert_series_equal(rs, xp) @@ -775,7 +778,7 @@ def test_ix_loc_consistency(self): # this is not an exhaustive case def compare(result, expected): - if lib.isscalar(expected): + if is_scalar(expected): self.assertEqual(result, expected) else: self.assertTrue(expected.equals(result)) @@ -2888,8 +2891,8 @@ def test_setitem_dtype_upcast(self): columns=['foo', 'bar', 'baz']) assert_frame_equal(left, right) - self.assertTrue(com.is_integer_dtype(left['foo'])) - self.assertTrue(com.is_integer_dtype(left['baz'])) + self.assertTrue(is_integer_dtype(left['foo'])) + self.assertTrue(is_integer_dtype(left['baz'])) left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0, index=list('ab'), @@ -2900,8 +2903,8 @@ def test_setitem_dtype_upcast(self): columns=['foo', 'bar', 'baz']) assert_frame_equal(left, right) - self.assertTrue(com.is_float_dtype(left['foo'])) - self.assertTrue(com.is_float_dtype(left['baz'])) + self.assertTrue(is_float_dtype(left['foo'])) + self.assertTrue(is_float_dtype(left['baz'])) def test_setitem_iloc(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 2a7e8a957977f..b7ec4d570f18b 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -8,10 +8,11 @@ import numpy.ma as ma import pandas as pd +from pandas.types.common import is_categorical_dtype, is_datetime64tz_dtype from pandas import Index, Series, isnull, date_range, period_range from pandas.core.index import MultiIndex from pandas.tseries.index import Timestamp, DatetimeIndex -import pandas.core.common as com + import pandas.lib as lib from pandas.compat import lrange, range, zip, OrderedDict, long @@ -144,11 +145,11 @@ def test_constructor_categorical(self): ValueError, lambda: Series(pd.Categorical([1, 2, 3]), dtype='int64')) cat = Series(pd.Categorical([1, 2, 3]), dtype='category') - self.assertTrue(com.is_categorical_dtype(cat)) - self.assertTrue(com.is_categorical_dtype(cat.dtype)) + self.assertTrue(is_categorical_dtype(cat)) + self.assertTrue(is_categorical_dtype(cat.dtype)) s = Series([1, 2, 3], dtype='category') - self.assertTrue(com.is_categorical_dtype(s)) - self.assertTrue(com.is_categorical_dtype(s.dtype)) + self.assertTrue(is_categorical_dtype(s)) + self.assertTrue(is_categorical_dtype(s.dtype)) def test_constructor_maskedarray(self): data = ma.masked_all((3, ), dtype=float) @@ -429,7 +430,7 @@ def test_constructor_with_datetime_tz(self): s = Series(dr) self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]') self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]') - self.assertTrue(com.is_datetime64tz_dtype(s.dtype)) + self.assertTrue(is_datetime64tz_dtype(s.dtype)) self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # export diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 6e82f81f901a9..c25895548dcb9 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd +from pandas.types.common import is_integer_dtype, is_list_like from pandas import (Index, Series, DataFrame, bdate_range, date_range, period_range, timedelta_range) from pandas.tseries.period import PeriodIndex @@ -49,16 +50,16 @@ def test_dt_namespace_accessor(self): def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): - if com.is_integer_dtype(result): + if is_integer_dtype(result): result = result.astype('int64') - elif not com.is_list_like(result): + elif not is_list_like(result): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) - if not (com.is_list_like(a) and com.is_list_like(b)): + if not (is_list_like(a) and is_list_like(b)): self.assertEqual(a, b) else: tm.assert_series_equal(a, b) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 15ca238ee32a0..64ebaa63cc10f 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -7,16 +7,14 @@ import numpy as np import pandas as pd +from pandas.types.common import is_integer, is_scalar from pandas import Index, Series, DataFrame, isnull, date_range from pandas.core.index import MultiIndex from pandas.core.indexing import IndexingError from pandas.tseries.index import Timestamp from pandas.tseries.tdi import Timedelta -import pandas.core.common as com import pandas.core.datetools as datetools -import pandas.lib as lib - from pandas.compat import lrange, range from pandas import compat from pandas.util.testing import assert_series_equal, assert_almost_equal @@ -375,7 +373,7 @@ def test_getitem_ambiguous_keyerror(self): def test_getitem_unordered_dup(self): obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b']) - self.assertTrue(lib.isscalar(obj['c'])) + self.assertTrue(is_scalar(obj['c'])) self.assertEqual(obj['c'], 0) def test_getitem_dups_with_missing(self): @@ -1174,23 +1172,23 @@ def test_where_numeric_with_string(self): s = pd.Series([1, 2, 3]) w = s.where(s > 1, 'X') - self.assertFalse(com.is_integer(w[0])) - self.assertTrue(com.is_integer(w[1])) - self.assertTrue(com.is_integer(w[2])) + self.assertFalse(is_integer(w[0])) + self.assertTrue(is_integer(w[1])) + self.assertTrue(is_integer(w[2])) self.assertTrue(isinstance(w[0], str)) self.assertTrue(w.dtype == 'object') w = s.where(s > 1, ['X', 'Y', 'Z']) - self.assertFalse(com.is_integer(w[0])) - self.assertTrue(com.is_integer(w[1])) - self.assertTrue(com.is_integer(w[2])) + self.assertFalse(is_integer(w[0])) + self.assertTrue(is_integer(w[1])) + self.assertTrue(is_integer(w[2])) self.assertTrue(isinstance(w[0], str)) self.assertTrue(w.dtype == 'object') w = s.where(s > 1, np.array(['X', 'Y', 'Z'])) - self.assertFalse(com.is_integer(w[0])) - self.assertTrue(com.is_integer(w[1])) - self.assertTrue(com.is_integer(w[2])) + self.assertFalse(is_integer(w[0])) + self.assertTrue(is_integer(w[1])) + self.assertTrue(is_integer(w[2])) self.assertTrue(isinstance(w[0], str)) self.assertTrue(w.dtype == 'object') diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index e0bff7fbd39e4..7d2517987e526 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -7,7 +7,7 @@ from pandas import (Index, Series, _np_version_under1p9) from pandas.tseries.index import Timestamp -import pandas.core.common as com +from pandas.types.common import is_integer import pandas.util.testing as tm from .common import TestData @@ -96,11 +96,11 @@ def test_quantile_interpolation_dtype(self): # interpolation = linear (default case) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower') self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) - self.assertTrue(com.is_integer(q)) + self.assertTrue(is_integer(q)) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher') self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) - self.assertTrue(com.is_integer(q)) + self.assertTrue(is_integer(q)) def test_quantile_interpolation_np_lt_1p9(self): # GH #10174 diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 77ae3ca20d123..2721d8d0e5e69 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -9,7 +9,7 @@ import pandas as pd import pandas.compat as compat -import pandas.core.common as com +from pandas.types.common import is_object_dtype, is_datetimetz import pandas.util.testing as tm from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta) @@ -517,7 +517,7 @@ def test_value_counts_unique_nunique(self): continue # special assign to the numpy array - if com.is_datetimetz(o): + if is_datetimetz(o): if isinstance(o, DatetimeIndex): v = o.asi8 v[0:2] = pd.tslib.iNaT @@ -982,8 +982,8 @@ def test_memory_usage(self): res = o.memory_usage() res_deep = o.memory_usage(deep=True) - if (com.is_object_dtype(o) or (isinstance(o, Series) and - com.is_object_dtype(o.index))): + if (is_object_dtype(o) or (isinstance(o, Series) and + is_object_dtype(o.index))): # if there are objects, only deep will pick them up self.assertTrue(res_deep > res) else: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 90876a4541da6..2ca1fc71df20a 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -8,12 +8,17 @@ import numpy as np +from pandas.types.dtypes import CategoricalDtype +from pandas.types.common import (is_categorical_dtype, + is_object_dtype, + is_float_dtype, + is_integer_dtype) + import pandas as pd import pandas.compat as compat -import pandas.core.common as com import pandas.util.testing as tm from pandas import (Categorical, Index, Series, DataFrame, PeriodIndex, - Timestamp, CategoricalIndex) + Timestamp, CategoricalIndex, isnull) from pandas.compat import range, lrange, u, PY3 from pandas.core.config import option_context @@ -195,18 +200,18 @@ def f(): # This should result in integer categories, not float! cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - self.assertTrue(com.is_integer_dtype(cat.categories)) + self.assertTrue(is_integer_dtype(cat.categories)) # https://github.com/pydata/pandas/issues/3678 cat = pd.Categorical([np.nan, 1, 2, 3]) - self.assertTrue(com.is_integer_dtype(cat.categories)) + self.assertTrue(is_integer_dtype(cat.categories)) # this should result in floats cat = pd.Categorical([np.nan, 1, 2., 3]) - self.assertTrue(com.is_float_dtype(cat.categories)) + self.assertTrue(is_float_dtype(cat.categories)) cat = pd.Categorical([np.nan, 1., 2., 3.]) - self.assertTrue(com.is_float_dtype(cat.categories)) + self.assertTrue(is_float_dtype(cat.categories)) # Deprecating NaNs in categoires (GH #10748) # preserve int as far as possible by converting to object if NaN is in @@ -214,23 +219,23 @@ def f(): with tm.assert_produces_warning(FutureWarning): cat = pd.Categorical([np.nan, 1, 2, 3], categories=[np.nan, 1, 2, 3]) - self.assertTrue(com.is_object_dtype(cat.categories)) + self.assertTrue(is_object_dtype(cat.categories)) # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... # vals = np.asarray(cat[cat.notnull()]) - # self.assertTrue(com.is_integer_dtype(vals)) + # self.assertTrue(is_integer_dtype(vals)) with tm.assert_produces_warning(FutureWarning): cat = pd.Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"]) - self.assertTrue(com.is_object_dtype(cat.categories)) + self.assertTrue(is_object_dtype(cat.categories)) # but don't do it for floats with tm.assert_produces_warning(FutureWarning): cat = pd.Categorical([np.nan, 1., 2., 3.], categories=[np.nan, 1., 2., 3.]) - self.assertTrue(com.is_float_dtype(cat.categories)) + self.assertTrue(is_float_dtype(cat.categories)) # corner cases cat = pd.Categorical([1]) @@ -552,7 +557,7 @@ def test_na_flags_int_categories(self): cat = Categorical(labels, categories, fastpath=True) repr(cat) - self.assert_numpy_array_equal(com.isnull(cat), labels == -1) + self.assert_numpy_array_equal(isnull(cat), labels == -1) def test_categories_none(self): factor = Categorical(['a', 'b', 'b', 'a', @@ -2076,15 +2081,15 @@ def test_assignment_to_dataframe(self): result = df.dtypes expected = Series( - [np.dtype('int32'), com.CategoricalDtype()], index=['value', 'D']) + [np.dtype('int32'), CategoricalDtype()], index=['value', 'D']) tm.assert_series_equal(result, expected) df['E'] = s str(df) result = df.dtypes - expected = Series([np.dtype('int32'), com.CategoricalDtype(), - com.CategoricalDtype()], + expected = Series([np.dtype('int32'), CategoricalDtype(), + CategoricalDtype()], index=['value', 'D', 'E']) tm.assert_series_equal(result, expected) @@ -3234,7 +3239,7 @@ def test_slicing_and_getting_ops(self): # frame res_df = df.iloc[2:4, :] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) # row res_row = df.iloc[2, :] @@ -3244,7 +3249,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(com.is_categorical_dtype(res_col)) + self.assertTrue(is_categorical_dtype(res_col)) # single value res_val = df.iloc[2, 0] @@ -3254,7 +3259,7 @@ def test_slicing_and_getting_ops(self): # frame res_df = df.loc["j":"k", :] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) # row res_row = df.loc["j", :] @@ -3264,7 +3269,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.loc[:, "cats"] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(com.is_categorical_dtype(res_col)) + self.assertTrue(is_categorical_dtype(res_col)) # single value res_val = df.loc["j", "cats"] @@ -3275,7 +3280,7 @@ def test_slicing_and_getting_ops(self): # res_df = df.ix["j":"k",[0,1]] # doesn't work? res_df = df.ix["j":"k", :] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) # row res_row = df.ix["j", :] @@ -3285,7 +3290,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.ix[:, "cats"] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(com.is_categorical_dtype(res_col)) + self.assertTrue(is_categorical_dtype(res_col)) # single value res_val = df.ix["j", 0] @@ -3318,23 +3323,23 @@ def test_slicing_and_getting_ops(self): res_df = df.iloc[slice(2, 4)] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) res_df = df.iloc[[2, 3]] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(com.is_categorical_dtype(res_col)) + self.assertTrue(is_categorical_dtype(res_col)) res_df = df.iloc[:, slice(0, 2)] tm.assert_frame_equal(res_df, df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) res_df = df.iloc[:, [0, 1]] tm.assert_frame_equal(res_df, df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) def test_slicing_doc_examples(self): @@ -4114,7 +4119,7 @@ def test_astype_to_other(self): s = self.cat['value_group'] expected = s tm.assert_series_equal(s.astype('category'), expected) - tm.assert_series_equal(s.astype(com.CategoricalDtype()), expected) + tm.assert_series_equal(s.astype(CategoricalDtype()), expected) self.assertRaises(ValueError, lambda: s.astype('float64')) cat = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])) @@ -4139,10 +4144,10 @@ def cmp(a, b): # valid conversion for valid in [lambda x: x.astype('category'), - lambda x: x.astype(com.CategoricalDtype()), + lambda x: x.astype(CategoricalDtype()), lambda x: x.astype('object').astype('category'), lambda x: x.astype('object').astype( - com.CategoricalDtype()) + CategoricalDtype()) ]: result = valid(s) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 56b1b542d547e..09dd3f7ab517c 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,21 +1,12 @@ # -*- coding: utf-8 -*- -import collections -from datetime import datetime, timedelta -import re import nose import numpy as np -import pandas as pd -from pandas.tslib import iNaT, NaT -from pandas import (Series, DataFrame, date_range, DatetimeIndex, - TimedeltaIndex, Timestamp, Float64Index) -from pandas import compat -from pandas.compat import range, lrange, lmap, u -from pandas.core.common import notnull, isnull, array_equivalent + +from pandas import Series, Timestamp +from pandas.compat import range, lmap import pandas.core.common as com -import pandas.core.convert as convert import pandas.util.testing as tm -import pandas.core.config as cf _multiprocess_can_split_ = True @@ -28,22 +19,6 @@ def test_mut_exclusive(): assert com._mut_exclusive(major=None, major_axis=None) is None -def test_is_sequence(): - is_seq = com.is_sequence - assert (is_seq((1, 2))) - assert (is_seq([1, 2])) - assert (not is_seq("abcd")) - assert (not is_seq(u("abcd"))) - assert (not is_seq(np.int64)) - - class A(object): - - def __getitem__(self): - return 1 - - assert (not is_seq(A())) - - def test_get_callable_name(): from functools import partial getname = com._get_callable_name @@ -68,407 +43,6 @@ def __call__(self): assert getname(1) is None -class TestInferDtype(tm.TestCase): - - def test_infer_dtype_from_scalar(self): - # Test that _infer_dtype_from_scalar is returning correct dtype for int - # and float. - - for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, - np.int32, np.uint64, np.int64]: - data = dtypec(12) - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, type(data)) - - data = 12 - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.int64) - - for dtypec in [np.float16, np.float32, np.float64]: - data = dtypec(12) - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, dtypec) - - data = np.float(12) - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.float64) - - for data in [True, False]: - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.bool_) - - for data in [np.complex64(1), np.complex128(1)]: - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.complex_) - - import datetime - for data in [np.datetime64(1, 'ns'), pd.Timestamp(1), - datetime.datetime(2000, 1, 1, 0, 0)]: - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, 'M8[ns]') - - for data in [np.timedelta64(1, 'ns'), pd.Timedelta(1), - datetime.timedelta(1)]: - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, 'm8[ns]') - - for data in [datetime.date(2000, 1, 1), - pd.Timestamp(1, tz='US/Eastern'), 'foo']: - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.object_) - - -def test_notnull(): - assert notnull(1.) - assert not notnull(None) - assert not notnull(np.NaN) - - with cf.option_context("mode.use_inf_as_null", False): - assert notnull(np.inf) - assert notnull(-np.inf) - - arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notnull(arr) - assert result.all() - - with cf.option_context("mode.use_inf_as_null", True): - assert not notnull(np.inf) - assert not notnull(-np.inf) - - arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notnull(arr) - assert result.sum() == 2 - - with cf.option_context("mode.use_inf_as_null", False): - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries(), tm.makeTimeSeries(), - tm.makePeriodSeries()]: - assert (isinstance(isnull(s), Series)) - - -def test_isnull(): - assert not isnull(1.) - assert isnull(None) - assert isnull(np.NaN) - assert not isnull(np.inf) - assert not isnull(-np.inf) - - # series - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries(), tm.makeTimeSeries(), - tm.makePeriodSeries()]: - assert (isinstance(isnull(s), Series)) - - # frame - for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), - tm.makeMixedDataFrame()]: - result = isnull(df) - expected = df.apply(isnull) - tm.assert_frame_equal(result, expected) - - # panel - for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) - ]: - result = isnull(p) - expected = p.apply(isnull) - tm.assert_panel_equal(result, expected) - - # panel 4d - for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: - result = isnull(p) - expected = p.apply(isnull) - tm.assert_panel4d_equal(result, expected) - - -def test_isnull_lists(): - result = isnull([[False]]) - exp = np.array([[False]]) - assert (np.array_equal(result, exp)) - - result = isnull([[1], [2]]) - exp = np.array([[False], [False]]) - assert (np.array_equal(result, exp)) - - # list of strings / unicode - result = isnull(['foo', 'bar']) - assert (not result.any()) - - result = isnull([u('foo'), u('bar')]) - assert (not result.any()) - - -def test_isnull_nat(): - result = isnull([NaT]) - exp = np.array([True]) - assert (np.array_equal(result, exp)) - - result = isnull(np.array([NaT], dtype=object)) - exp = np.array([True]) - assert (np.array_equal(result, exp)) - - -def test_isnull_numpy_nat(): - arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), - np.datetime64('NaT', 's')]) - result = isnull(arr) - expected = np.array([True] * 4) - tm.assert_numpy_array_equal(result, expected) - - -def test_isnull_datetime(): - assert (not isnull(datetime.now())) - assert notnull(datetime.now()) - - idx = date_range('1/1/1990', periods=20) - assert (notnull(idx).all()) - - idx = np.asarray(idx) - idx[0] = iNaT - idx = DatetimeIndex(idx) - mask = isnull(idx) - assert (mask[0]) - assert (not mask[1:].any()) - - # GH 9129 - pidx = idx.to_period(freq='M') - mask = isnull(pidx) - assert (mask[0]) - assert (not mask[1:].any()) - - mask = isnull(pidx[1:]) - assert (not mask.any()) - - -class TestIsNull(tm.TestCase): - - def test_0d_array(self): - self.assertTrue(isnull(np.array(np.nan))) - self.assertFalse(isnull(np.array(0.0))) - self.assertFalse(isnull(np.array(0))) - # test object dtype - self.assertTrue(isnull(np.array(np.nan, dtype=object))) - self.assertFalse(isnull(np.array(0.0, dtype=object))) - self.assertFalse(isnull(np.array(0, dtype=object))) - - -class TestNumberScalar(tm.TestCase): - - def test_is_number(self): - - self.assertTrue(com.is_number(True)) - self.assertTrue(com.is_number(1)) - self.assertTrue(com.is_number(1.1)) - self.assertTrue(com.is_number(1 + 3j)) - self.assertTrue(com.is_number(np.bool(False))) - self.assertTrue(com.is_number(np.int64(1))) - self.assertTrue(com.is_number(np.float64(1.1))) - self.assertTrue(com.is_number(np.complex128(1 + 3j))) - self.assertTrue(com.is_number(np.nan)) - - self.assertFalse(com.is_number(None)) - self.assertFalse(com.is_number('x')) - self.assertFalse(com.is_number(datetime(2011, 1, 1))) - self.assertFalse(com.is_number(np.datetime64('2011-01-01'))) - self.assertFalse(com.is_number(pd.Timestamp('2011-01-01'))) - self.assertFalse(com.is_number(pd.Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(com.is_number(timedelta(1000))) - self.assertFalse(com.is_number(pd.Timedelta('1 days'))) - - # questionable - self.assertFalse(com.is_number(np.bool_(False))) - self.assertTrue(com.is_number(np.timedelta64(1, 'D'))) - - def test_is_bool(self): - self.assertTrue(com.is_bool(True)) - self.assertTrue(com.is_bool(np.bool(False))) - self.assertTrue(com.is_bool(np.bool_(False))) - - self.assertFalse(com.is_bool(1)) - self.assertFalse(com.is_bool(1.1)) - self.assertFalse(com.is_bool(1 + 3j)) - self.assertFalse(com.is_bool(np.int64(1))) - self.assertFalse(com.is_bool(np.float64(1.1))) - self.assertFalse(com.is_bool(np.complex128(1 + 3j))) - self.assertFalse(com.is_bool(np.nan)) - self.assertFalse(com.is_bool(None)) - self.assertFalse(com.is_bool('x')) - self.assertFalse(com.is_bool(datetime(2011, 1, 1))) - self.assertFalse(com.is_bool(np.datetime64('2011-01-01'))) - self.assertFalse(com.is_bool(pd.Timestamp('2011-01-01'))) - self.assertFalse(com.is_bool(pd.Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(com.is_bool(timedelta(1000))) - self.assertFalse(com.is_bool(np.timedelta64(1, 'D'))) - self.assertFalse(com.is_bool(pd.Timedelta('1 days'))) - - def test_is_integer(self): - self.assertTrue(com.is_integer(1)) - self.assertTrue(com.is_integer(np.int64(1))) - - self.assertFalse(com.is_integer(True)) - self.assertFalse(com.is_integer(1.1)) - self.assertFalse(com.is_integer(1 + 3j)) - self.assertFalse(com.is_integer(np.bool(False))) - self.assertFalse(com.is_integer(np.bool_(False))) - self.assertFalse(com.is_integer(np.float64(1.1))) - self.assertFalse(com.is_integer(np.complex128(1 + 3j))) - self.assertFalse(com.is_integer(np.nan)) - self.assertFalse(com.is_integer(None)) - self.assertFalse(com.is_integer('x')) - self.assertFalse(com.is_integer(datetime(2011, 1, 1))) - self.assertFalse(com.is_integer(np.datetime64('2011-01-01'))) - self.assertFalse(com.is_integer(pd.Timestamp('2011-01-01'))) - self.assertFalse(com.is_integer(pd.Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(com.is_integer(timedelta(1000))) - self.assertFalse(com.is_integer(pd.Timedelta('1 days'))) - - # questionable - self.assertTrue(com.is_integer(np.timedelta64(1, 'D'))) - - def test_is_float(self): - self.assertTrue(com.is_float(1.1)) - self.assertTrue(com.is_float(np.float64(1.1))) - self.assertTrue(com.is_float(np.nan)) - - self.assertFalse(com.is_float(True)) - self.assertFalse(com.is_float(1)) - self.assertFalse(com.is_float(1 + 3j)) - self.assertFalse(com.is_float(np.bool(False))) - self.assertFalse(com.is_float(np.bool_(False))) - self.assertFalse(com.is_float(np.int64(1))) - self.assertFalse(com.is_float(np.complex128(1 + 3j))) - self.assertFalse(com.is_float(None)) - self.assertFalse(com.is_float('x')) - self.assertFalse(com.is_float(datetime(2011, 1, 1))) - self.assertFalse(com.is_float(np.datetime64('2011-01-01'))) - self.assertFalse(com.is_float(pd.Timestamp('2011-01-01'))) - self.assertFalse(com.is_float(pd.Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(com.is_float(timedelta(1000))) - self.assertFalse(com.is_float(np.timedelta64(1, 'D'))) - self.assertFalse(com.is_float(pd.Timedelta('1 days'))) - - -def test_downcast_conv(): - # test downcasting - - arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) - result = com._possibly_downcast_to_dtype(arr, 'infer') - assert (np.array_equal(result, arr)) - - arr = np.array([8., 8., 8., 8., 8.9999999999995]) - result = com._possibly_downcast_to_dtype(arr, 'infer') - expected = np.array([8, 8, 8, 8, 9]) - assert (np.array_equal(result, expected)) - - arr = np.array([8., 8., 8., 8., 9.0000000000005]) - result = com._possibly_downcast_to_dtype(arr, 'infer') - expected = np.array([8, 8, 8, 8, 9]) - assert (np.array_equal(result, expected)) - - # conversions - - expected = np.array([1, 2]) - for dtype in [np.float64, object, np.int64]: - arr = np.array([1.0, 2.0], dtype=dtype) - result = com._possibly_downcast_to_dtype(arr, 'infer') - tm.assert_almost_equal(result, expected, check_dtype=False) - - for dtype in [np.float64, object]: - expected = np.array([1.0, 2.0, np.nan], dtype=dtype) - arr = np.array([1.0, 2.0, np.nan], dtype=dtype) - result = com._possibly_downcast_to_dtype(arr, 'infer') - tm.assert_almost_equal(result, expected) - - # empties - for dtype in [np.int32, np.float64, np.float32, np.bool_, - np.int64, object]: - arr = np.array([], dtype=dtype) - result = com._possibly_downcast_to_dtype(arr, 'int64') - tm.assert_almost_equal(result, np.array([], dtype=np.int64)) - assert result.dtype == np.int64 - - -def test_array_equivalent(): - assert array_equivalent(np.array([np.nan, np.nan]), - np.array([np.nan, np.nan])) - assert array_equivalent(np.array([np.nan, 1, np.nan]), - np.array([np.nan, 1, np.nan])) - assert array_equivalent(np.array([np.nan, None], dtype='object'), - np.array([np.nan, None], dtype='object')) - assert array_equivalent(np.array([np.nan, 1 + 1j], dtype='complex'), - np.array([np.nan, 1 + 1j], dtype='complex')) - assert not array_equivalent( - np.array([np.nan, 1 + 1j], dtype='complex'), np.array( - [np.nan, 1 + 2j], dtype='complex')) - assert not array_equivalent( - np.array([np.nan, 1, np.nan]), np.array([np.nan, 2, np.nan])) - assert not array_equivalent( - np.array(['a', 'b', 'c', 'd']), np.array(['e', 'e'])) - assert array_equivalent(Float64Index([0, np.nan]), - Float64Index([0, np.nan])) - assert not array_equivalent( - Float64Index([0, np.nan]), Float64Index([1, np.nan])) - assert array_equivalent(DatetimeIndex([0, np.nan]), - DatetimeIndex([0, np.nan])) - assert not array_equivalent( - DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan])) - assert array_equivalent(TimedeltaIndex([0, np.nan]), - TimedeltaIndex([0, np.nan])) - assert not array_equivalent( - TimedeltaIndex([0, np.nan]), TimedeltaIndex([1, np.nan])) - assert array_equivalent(DatetimeIndex([0, np.nan], tz='US/Eastern'), - DatetimeIndex([0, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan], tz='US/Eastern'), DatetimeIndex( - [1, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan]), DatetimeIndex( - [0, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan], tz='CET'), DatetimeIndex( - [0, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) - - -def test_array_equivalent_str(): - for dtype in ['O', 'S', 'U']: - assert array_equivalent(np.array(['A', 'B'], dtype=dtype), - np.array(['A', 'B'], dtype=dtype)) - assert not array_equivalent(np.array(['A', 'B'], dtype=dtype), - np.array(['A', 'X'], dtype=dtype)) - - -def test_datetimeindex_from_empty_datetime64_array(): - for unit in ['ms', 'us', 'ns']: - idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit)) - assert (len(idx) == 0) - - -def test_nan_to_nat_conversions(): - - df = DataFrame(dict({ - 'A': np.asarray( - lrange(10), dtype='float64'), - 'B': Timestamp('20010101') - })) - df.iloc[3:6, :] = np.nan - result = df.loc[4, 'B'].value - assert (result == iNaT) - - s = df['B'].copy() - s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) - assert (isnull(s[8])) - - # numpy < 1.7.0 is wrong - from distutils.version import LooseVersion - if LooseVersion(np.__version__) >= '1.7.0': - assert (s[8].value == np.datetime64('NaT').astype(np.int64)) - - def test_any_none(): assert (com._any_none(1, 2, 3, None)) assert (not com._any_none(1, 2, 3, 4)) @@ -567,122 +141,6 @@ def test_groupby(): assert v == expected[k] -def test_is_list_like(): - passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), - Series([]), Series(['a']).str) - fails = (1, '2', object()) - - for p in passes: - assert com.is_list_like(p) - - for f in fails: - assert not com.is_list_like(f) - - -def test_is_dict_like(): - passes = [{}, {'A': 1}, pd.Series([1])] - fails = ['1', 1, [1, 2], (1, 2), range(2), pd.Index([1])] - - for p in passes: - assert com.is_dict_like(p) - - for f in fails: - assert not com.is_dict_like(f) - - -def test_is_named_tuple(): - passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), ) - fails = ((1, 2, 3), 'a', Series({'pi': 3.14})) - - for p in passes: - assert com.is_named_tuple(p) - - for f in fails: - assert not com.is_named_tuple(f) - - -def test_is_hashable(): - - # all new-style classes are hashable by default - class HashableClass(object): - pass - - class UnhashableClass1(object): - __hash__ = None - - class UnhashableClass2(object): - - def __hash__(self): - raise TypeError("Not hashable") - - hashable = (1, - 3.14, - np.float64(3.14), - 'a', - tuple(), - (1, ), - HashableClass(), ) - not_hashable = ([], UnhashableClass1(), ) - abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), ) - - for i in hashable: - assert com.is_hashable(i) - for i in not_hashable: - assert not com.is_hashable(i) - for i in abc_hashable_not_really_hashable: - assert not com.is_hashable(i) - - # numpy.array is no longer collections.Hashable as of - # https://github.com/numpy/numpy/pull/5326, just test - # pandas.common.is_hashable() - assert not com.is_hashable(np.array([])) - - # old-style classes in Python 2 don't appear hashable to - # collections.Hashable but also seem to support hash() by default - if compat.PY2: - - class OldStyleClass(): - pass - - c = OldStyleClass() - assert not isinstance(c, collections.Hashable) - assert com.is_hashable(c) - hash(c) # this will not raise - - -def test_ensure_int32(): - values = np.arange(10, dtype=np.int32) - result = com._ensure_int32(values) - assert (result.dtype == np.int32) - - values = np.arange(10, dtype=np.int64) - result = com._ensure_int32(values) - assert (result.dtype == np.int32) - - -def test_is_re(): - passes = re.compile('ad'), - fails = 'x', 2, 3, object() - - for p in passes: - assert com.is_re(p) - - for f in fails: - assert not com.is_re(f) - - -def test_is_recompilable(): - passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'), - re.compile(r'')) - fails = 1, [], object() - - for p in passes: - assert com.is_re_compilable(p) - - for f in fails: - assert not com.is_re_compilable(f) - - def test_random_state(): import numpy.random as npr # Check with seed @@ -730,83 +188,6 @@ def test_maybe_match_name(): assert (matched == 'y') -class TestMaybe(tm.TestCase): - - def test_maybe_convert_string_to_array(self): - result = com._maybe_convert_string_to_object('x') - tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object)) - self.assertTrue(result.dtype == object) - - result = com._maybe_convert_string_to_object(1) - self.assertEqual(result, 1) - - arr = np.array(['x', 'y'], dtype=str) - result = com._maybe_convert_string_to_object(arr) - tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) - self.assertTrue(result.dtype == object) - - # unicode - arr = np.array(['x', 'y']).astype('U') - result = com._maybe_convert_string_to_object(arr) - tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) - self.assertTrue(result.dtype == object) - - # object - arr = np.array(['x', 2], dtype=object) - result = com._maybe_convert_string_to_object(arr) - tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object)) - self.assertTrue(result.dtype == object) - - def test_maybe_convert_scalar(self): - - # pass thru - result = com._maybe_convert_scalar('x') - self.assertEqual(result, 'x') - result = com._maybe_convert_scalar(np.array([1])) - self.assertEqual(result, np.array([1])) - - # leave scalar dtype - result = com._maybe_convert_scalar(np.int64(1)) - self.assertEqual(result, np.int64(1)) - result = com._maybe_convert_scalar(np.int32(1)) - self.assertEqual(result, np.int32(1)) - result = com._maybe_convert_scalar(np.float32(1)) - self.assertEqual(result, np.float32(1)) - result = com._maybe_convert_scalar(np.int64(1)) - self.assertEqual(result, np.float64(1)) - - # coerce - result = com._maybe_convert_scalar(1) - self.assertEqual(result, np.int64(1)) - result = com._maybe_convert_scalar(1.0) - self.assertEqual(result, np.float64(1)) - result = com._maybe_convert_scalar(pd.Timestamp('20130101')) - self.assertEqual(result, pd.Timestamp('20130101').value) - result = com._maybe_convert_scalar(datetime(2013, 1, 1)) - self.assertEqual(result, pd.Timestamp('20130101').value) - result = com._maybe_convert_scalar(pd.Timedelta('1 day 1 min')) - self.assertEqual(result, pd.Timedelta('1 day 1 min').value) - - -class TestConvert(tm.TestCase): - - def test_possibly_convert_objects_copy(self): - values = np.array([1, 2]) - - out = convert._possibly_convert_objects(values, copy=False) - self.assertTrue(values is out) - - out = convert._possibly_convert_objects(values, copy=True) - self.assertTrue(values is not out) - - values = np.array(['apply', 'banana']) - out = convert._possibly_convert_objects(values, copy=False) - self.assertTrue(values is out) - - out = convert._possibly_convert_objects(values, copy=True) - self.assertTrue(values is not out) - - def test_dict_compat(): data_datetime64 = {np.datetime64('1990-03-15'): 1, np.datetime64('2015-03-15'): 2} @@ -817,39 +198,6 @@ def test_dict_compat(): assert (com._dict_compat(data_unchanged) == data_unchanged) -def test_is_timedelta(): - assert (com.is_timedelta64_dtype('timedelta64')) - assert (com.is_timedelta64_dtype('timedelta64[ns]')) - assert (not com.is_timedelta64_ns_dtype('timedelta64')) - assert (com.is_timedelta64_ns_dtype('timedelta64[ns]')) - - tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') - assert (com.is_timedelta64_dtype(tdi)) - assert (com.is_timedelta64_ns_dtype(tdi)) - assert (com.is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))) - # Conversion to Int64Index: - assert (not com.is_timedelta64_ns_dtype(tdi.astype('timedelta64'))) - assert (not com.is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]'))) - - -def test_array_equivalent_compat(): - # see gh-13388 - m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) - n = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) - assert (com.array_equivalent(m, n, strict_nan=True)) - assert (com.array_equivalent(m, n, strict_nan=False)) - - m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) - n = np.array([(1, 2), (4, 3)], dtype=[('a', int), ('b', float)]) - assert (not com.array_equivalent(m, n, strict_nan=True)) - assert (not com.array_equivalent(m, n, strict_nan=False)) - - m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) - n = np.array([(1, 2), (3, 4)], dtype=[('b', int), ('a', float)]) - assert (not com.array_equivalent(m, n, strict_nan=True)) - assert (not com.array_equivalent(m, n, strict_nan=False)) - - if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 2f4c2b414cc30..a53e79439b017 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -7,12 +7,12 @@ from numpy import nan import pandas as pd +from pandas.types.common import is_scalar from pandas import (Index, Series, DataFrame, Panel, isnull, date_range, period_range, Panel4D) from pandas.core.index import MultiIndex import pandas.formats.printing as printing -import pandas.lib as lib from pandas.compat import range, zip, PY3 from pandas import compat @@ -53,7 +53,7 @@ def _construct(self, shape, value=None, dtype=None, **kwargs): if isinstance(shape, int): shape = tuple([shape] * self._ndim) if value is not None: - if lib.isscalar(value): + if is_scalar(value): if value == 'empty': arr = None diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index bd19a83ce2b64..3a5b0117948b7 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -9,6 +9,7 @@ from datetime import datetime, date +from pandas.types.common import is_list_like import pandas as pd from pandas import (Series, DataFrame, MultiIndex, PeriodIndex, date_range, bdate_range) @@ -16,7 +17,6 @@ iteritems, OrderedDict, PY3) from pandas.util.decorators import cache_readonly from pandas.formats.printing import pprint_thing -import pandas.core.common as com import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, assert_is_valid_plot_return_object, slow) @@ -157,7 +157,7 @@ def _check_visible(self, collections, visible=True): """ from matplotlib.collections import Collection if not isinstance(collections, - Collection) and not com.is_list_like(collections): + Collection) and not is_list_like(collections): collections = [collections] for patch in collections: @@ -242,7 +242,7 @@ def _check_text_labels(self, texts, expected): expected : str or list-like which has the same length as texts expected text label, or its list """ - if not com.is_list_like(texts): + if not is_list_like(texts): self.assertEqual(texts.get_text(), expected) else: labels = [t.get_text() for t in texts] diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index a52f22fe2032a..57d43f22757ea 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -5,7 +5,8 @@ from datetime import datetime from numpy import nan -from pandas import date_range, bdate_range, Timestamp +from pandas.types.common import _ensure_platform_int +from pandas import date_range, bdate_range, Timestamp, isnull from pandas.core.index import Index, MultiIndex, CategoricalIndex from pandas.core.api import Categorical, DataFrame from pandas.core.common import UnsupportedFunctionCall @@ -163,9 +164,9 @@ def test_first_last_nth(self): grouped['B'].nth(0) self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan - self.assertTrue(com.isnull(grouped['B'].first()['foo'])) - self.assertTrue(com.isnull(grouped['B'].last()['foo'])) - self.assertTrue(com.isnull(grouped['B'].nth(0)['foo'])) + self.assertTrue(isnull(grouped['B'].first()['foo'])) + self.assertTrue(isnull(grouped['B'].last()['foo'])) + self.assertTrue(isnull(grouped['B'].nth(0)['foo'])) # v0.14.0 whatsnew df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) @@ -1079,8 +1080,9 @@ def test_transform_fast(self): grp = df.groupby('id')['val'] values = np.repeat(grp.mean().values, - com._ensure_platform_int(grp.count().values)) + _ensure_platform_int(grp.count().values)) expected = pd.Series(values, index=df.index, name='val') + result = grp.transform(np.mean) assert_series_equal(result, expected) diff --git a/pandas/tests/test_infer_and_convert.py b/pandas/tests/test_infer_and_convert.py deleted file mode 100644 index 5f016322f101f..0000000000000 --- a/pandas/tests/test_infer_and_convert.py +++ /dev/null @@ -1,653 +0,0 @@ -# -*- coding: utf-8 -*- - -from datetime import datetime, timedelta, date, time - -import numpy as np -import pandas as pd -import pandas.lib as lib -import pandas.util.testing as tm -from pandas import Index - -from pandas.compat import long, u, PY2 - - -class TestInference(tm.TestCase): - - def test_infer_dtype_bytes(self): - compare = 'string' if PY2 else 'bytes' - - # string array of bytes - arr = np.array(list('abc'), dtype='S1') - self.assertEqual(pd.lib.infer_dtype(arr), compare) - - # object array of bytes - arr = arr.astype(object) - self.assertEqual(pd.lib.infer_dtype(arr), compare) - - def test_isinf_scalar(self): - # GH 11352 - self.assertTrue(lib.isposinf_scalar(float('inf'))) - self.assertTrue(lib.isposinf_scalar(np.inf)) - self.assertFalse(lib.isposinf_scalar(-np.inf)) - self.assertFalse(lib.isposinf_scalar(1)) - self.assertFalse(lib.isposinf_scalar('a')) - - self.assertTrue(lib.isneginf_scalar(float('-inf'))) - self.assertTrue(lib.isneginf_scalar(-np.inf)) - self.assertFalse(lib.isneginf_scalar(np.inf)) - self.assertFalse(lib.isneginf_scalar(1)) - self.assertFalse(lib.isneginf_scalar('a')) - - def test_maybe_convert_numeric_infinities(self): - # see gh-13274 - infinities = ['inf', 'inF', 'iNf', 'Inf', - 'iNF', 'InF', 'INf', 'INF'] - na_values = set(['', 'NULL', 'nan']) - - pos = np.array(['inf'], dtype=np.float64) - neg = np.array(['-inf'], dtype=np.float64) - - msg = "Unable to parse string" - - for infinity in infinities: - for maybe_int in (True, False): - out = lib.maybe_convert_numeric( - np.array([infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - out = lib.maybe_convert_numeric( - np.array(['-' + infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, neg) - - out = lib.maybe_convert_numeric( - np.array([u(infinity)], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - out = lib.maybe_convert_numeric( - np.array(['+' + infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - # too many characters - with tm.assertRaisesRegexp(ValueError, msg): - lib.maybe_convert_numeric( - np.array(['foo_' + infinity], dtype=object), - na_values, maybe_int) - - def test_maybe_convert_numeric_post_floatify_nan(self): - # see gh-13314 - data = np.array(['1.200', '-999.000', '4.500'], dtype=object) - expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) - nan_values = set([-999, -999.0]) - - for coerce_type in (True, False): - out = lib.maybe_convert_numeric(data, nan_values, coerce_type) - tm.assert_numpy_array_equal(out, expected) - - def test_convert_infs(self): - arr = np.array(['inf', 'inf', 'inf'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False) - self.assertTrue(result.dtype == np.float64) - - arr = np.array(['-inf', '-inf', '-inf'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False) - self.assertTrue(result.dtype == np.float64) - - def test_scientific_no_exponent(self): - # See PR 12215 - arr = np.array(['42E', '2E', '99e', '6e'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False, True) - self.assertTrue(np.all(np.isnan(result))) - - def test_convert_non_hashable(self): - # GH13324 - # make sure that we are handing non-hashables - arr = np.array([[10.0, 2], 1.0, 'apple']) - result = lib.maybe_convert_numeric(arr, set(), False, True) - tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) - - -class TestTypeInference(tm.TestCase): - _multiprocess_can_split_ = True - - def test_length_zero(self): - result = lib.infer_dtype(np.array([], dtype='i4')) - self.assertEqual(result, 'integer') - - result = lib.infer_dtype([]) - self.assertEqual(result, 'empty') - - def test_integers(self): - arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'integer') - - arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed-integer') - - arr = np.array([1, 2, 3, 4, 5], dtype='i4') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'integer') - - def test_bools(self): - arr = np.array([True, False, True, True, True], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - arr = np.array([np.bool_(True), np.bool_(False)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - arr = np.array([True, False, True, 'foo'], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed') - - arr = np.array([True, False, True], dtype=bool) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - def test_floats(self): - arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'], - dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed-integer') - - arr = np.array([1, 2, 3, 4, 5], dtype='f4') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - arr = np.array([1, 2, 3, 4, 5], dtype='f8') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - def test_string(self): - pass - - def test_unicode(self): - pass - - def test_datetime(self): - - dates = [datetime(2012, 1, x) for x in range(1, 20)] - index = Index(dates) - self.assertEqual(index.inferred_type, 'datetime64') - - def test_infer_dtype_datetime(self): - - arr = np.array([pd.Timestamp('2011-01-01'), - pd.Timestamp('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([np.datetime64('2011-01-01'), - np.datetime64('2011-01-01')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, pd.Timestamp('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([n, np.datetime64('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - arr = np.array([n, datetime(2011, 1, 1)]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([n, pd.Timestamp('2011-01-02'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([n, np.datetime64('2011-01-02'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - arr = np.array([n, datetime(2011, 1, 1), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - # different type of nat - arr = np.array([np.timedelta64('nat'), - np.datetime64('2011-01-02')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.datetime64('2011-01-02'), - np.timedelta64('nat')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - # mixed datetime - arr = np.array([datetime(2011, 1, 1), - pd.Timestamp('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - # should be datetime? - arr = np.array([np.datetime64('2011-01-01'), - pd.Timestamp('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([pd.Timestamp('2011-01-02'), - np.datetime64('2011-01-01')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed-integer') - - arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - def test_infer_dtype_timedelta(self): - - arr = np.array([pd.Timedelta('1 days'), - pd.Timedelta('2 days')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([np.timedelta64(1, 'D'), - np.timedelta64(2, 'D')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([timedelta(1), timedelta(2)]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, pd.Timedelta('1 days')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, np.timedelta64(1, 'D')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, timedelta(1)]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, pd.Timedelta('1 days'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, np.timedelta64(1, 'D'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, timedelta(1), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - # different type of nat - arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')], - dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')], - dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - def test_infer_dtype_all_nan_nat_like(self): - arr = np.array([np.nan, np.nan]) - self.assertEqual(pd.lib.infer_dtype(arr), 'floating') - - # nan and None mix are result in mixed - arr = np.array([np.nan, np.nan, None]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([None, np.nan, np.nan]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - # pd.NaT - arr = np.array([pd.NaT]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([pd.NaT, np.nan]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([np.nan, pd.NaT]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([np.nan, pd.NaT, np.nan]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([None, pd.NaT, None]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - # np.datetime64(nat) - arr = np.array([np.datetime64('nat')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - for n in [np.nan, pd.NaT, None]: - arr = np.array([n, np.datetime64('nat'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - arr = np.array([pd.NaT, n, np.datetime64('nat'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - arr = np.array([np.timedelta64('nat')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - for n in [np.nan, pd.NaT, None]: - arr = np.array([n, np.timedelta64('nat'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([pd.NaT, n, np.timedelta64('nat'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - # datetime / timedelta mixed - arr = np.array([pd.NaT, np.datetime64('nat'), - np.timedelta64('nat'), np.nan]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.timedelta64('nat'), np.datetime64('nat')], - dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - def test_is_datetimelike_array_all_nan_nat_like(self): - arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) - self.assertTrue(pd.lib.is_datetime_array(arr)) - self.assertTrue(pd.lib.is_datetime64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_array(arr)) - self.assertFalse(pd.lib.is_timedelta64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')]) - self.assertFalse(pd.lib.is_datetime_array(arr)) - self.assertFalse(pd.lib.is_datetime64_array(arr)) - self.assertTrue(pd.lib.is_timedelta_array(arr)) - self.assertTrue(pd.lib.is_timedelta64_array(arr)) - self.assertTrue(pd.lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, pd.NaT, np.datetime64('nat'), - np.timedelta64('nat')]) - self.assertFalse(pd.lib.is_datetime_array(arr)) - self.assertFalse(pd.lib.is_datetime64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_array(arr)) - self.assertFalse(pd.lib.is_timedelta64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, pd.NaT]) - self.assertTrue(pd.lib.is_datetime_array(arr)) - self.assertTrue(pd.lib.is_datetime64_array(arr)) - self.assertTrue(pd.lib.is_timedelta_array(arr)) - self.assertTrue(pd.lib.is_timedelta64_array(arr)) - self.assertTrue(pd.lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, np.nan], dtype=object) - self.assertFalse(pd.lib.is_datetime_array(arr)) - self.assertFalse(pd.lib.is_datetime64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_array(arr)) - self.assertFalse(pd.lib.is_timedelta64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_or_timedelta64_array(arr)) - - def test_date(self): - - dates = [date(2012, 1, x) for x in range(1, 20)] - index = Index(dates) - self.assertEqual(index.inferred_type, 'date') - - def test_to_object_array_tuples(self): - r = (5, 6) - values = [r] - result = lib.to_object_array_tuples(values) - - try: - # make sure record array works - from collections import namedtuple - record = namedtuple('record', 'x y') - r = record(5, 6) - values = [r] - result = lib.to_object_array_tuples(values) # noqa - except ImportError: - pass - - def test_to_object_array_width(self): - # see gh-13320 - rows = [[1, 2, 3], [4, 5, 6]] - - expected = np.array(rows, dtype=object) - out = lib.to_object_array(rows) - tm.assert_numpy_array_equal(out, expected) - - expected = np.array(rows, dtype=object) - out = lib.to_object_array(rows, min_width=1) - tm.assert_numpy_array_equal(out, expected) - - expected = np.array([[1, 2, 3, None, None], - [4, 5, 6, None, None]], dtype=object) - out = lib.to_object_array(rows, min_width=5) - tm.assert_numpy_array_equal(out, expected) - - def test_object(self): - - # GH 7431 - # cannot infer more than this as only a single element - arr = np.array([None], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed') - - def test_categorical(self): - - # GH 8974 - from pandas import Categorical, Series - arr = Categorical(list('abc')) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'categorical') - - result = lib.infer_dtype(Series(arr)) - self.assertEqual(result, 'categorical') - - arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'categorical') - - result = lib.infer_dtype(Series(arr)) - self.assertEqual(result, 'categorical') - - def test_is_period(self): - self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) - self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))) - self.assertFalse(lib.is_period(pd.Timestamp('2011-01'))) - self.assertFalse(lib.is_period(1)) - self.assertFalse(lib.is_period(np.nan)) - - -class TestConvert(tm.TestCase): - - def test_convert_objects(self): - arr = np.array(['a', 'b', np.nan, np.nan, 'd', 'e', 'f'], dtype='O') - result = lib.maybe_convert_objects(arr) - self.assertTrue(result.dtype == np.object_) - - def test_convert_objects_ints(self): - # test that we can detect many kinds of integers - dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8'] - - for dtype_str in dtypes: - arr = np.array(list(np.arange(20, dtype=dtype_str)), dtype='O') - self.assertTrue(arr[0].dtype == np.dtype(dtype_str)) - result = lib.maybe_convert_objects(arr) - self.assertTrue(issubclass(result.dtype.type, np.integer)) - - def test_convert_objects_complex_number(self): - for dtype in np.sctypes['complex']: - arr = np.array(list(1j * np.arange(20, dtype=dtype)), dtype='O') - self.assertTrue(arr[0].dtype == np.dtype(dtype)) - result = lib.maybe_convert_objects(arr) - self.assertTrue(issubclass(result.dtype.type, np.complexfloating)) - - -class Testisscalar(tm.TestCase): - - def test_isscalar_builtin_scalars(self): - self.assertTrue(lib.isscalar(None)) - self.assertTrue(lib.isscalar(True)) - self.assertTrue(lib.isscalar(False)) - self.assertTrue(lib.isscalar(0.)) - self.assertTrue(lib.isscalar(np.nan)) - self.assertTrue(lib.isscalar('foobar')) - self.assertTrue(lib.isscalar(b'foobar')) - self.assertTrue(lib.isscalar(u('efoobar'))) - self.assertTrue(lib.isscalar(datetime(2014, 1, 1))) - self.assertTrue(lib.isscalar(date(2014, 1, 1))) - self.assertTrue(lib.isscalar(time(12, 0))) - self.assertTrue(lib.isscalar(timedelta(hours=1))) - self.assertTrue(lib.isscalar(pd.NaT)) - - def test_isscalar_builtin_nonscalars(self): - self.assertFalse(lib.isscalar({})) - self.assertFalse(lib.isscalar([])) - self.assertFalse(lib.isscalar([1])) - self.assertFalse(lib.isscalar(())) - self.assertFalse(lib.isscalar((1, ))) - self.assertFalse(lib.isscalar(slice(None))) - self.assertFalse(lib.isscalar(Ellipsis)) - - def test_isscalar_numpy_array_scalars(self): - self.assertTrue(lib.isscalar(np.int64(1))) - self.assertTrue(lib.isscalar(np.float64(1.))) - self.assertTrue(lib.isscalar(np.int32(1))) - self.assertTrue(lib.isscalar(np.object_('foobar'))) - self.assertTrue(lib.isscalar(np.str_('foobar'))) - self.assertTrue(lib.isscalar(np.unicode_(u('foobar')))) - self.assertTrue(lib.isscalar(np.bytes_(b'foobar'))) - self.assertTrue(lib.isscalar(np.datetime64('2014-01-01'))) - self.assertTrue(lib.isscalar(np.timedelta64(1, 'h'))) - - def test_isscalar_numpy_zerodim_arrays(self): - for zerodim in [np.array(1), np.array('foobar'), - np.array(np.datetime64('2014-01-01')), - np.array(np.timedelta64(1, 'h')), - np.array(np.datetime64('NaT'))]: - self.assertFalse(lib.isscalar(zerodim)) - self.assertTrue(lib.isscalar(lib.item_from_zerodim(zerodim))) - - def test_isscalar_numpy_arrays(self): - self.assertFalse(lib.isscalar(np.array([]))) - self.assertFalse(lib.isscalar(np.array([[]]))) - self.assertFalse(lib.isscalar(np.matrix('1; 2'))) - - def test_isscalar_pandas_scalars(self): - self.assertTrue(lib.isscalar(pd.Timestamp('2014-01-01'))) - self.assertTrue(lib.isscalar(pd.Timedelta(hours=1))) - self.assertTrue(lib.isscalar(pd.Period('2014-01-01'))) - - def test_lisscalar_pandas_containers(self): - self.assertFalse(lib.isscalar(pd.Series())) - self.assertFalse(lib.isscalar(pd.Series([1]))) - self.assertFalse(lib.isscalar(pd.DataFrame())) - self.assertFalse(lib.isscalar(pd.DataFrame([[1]]))) - self.assertFalse(lib.isscalar(pd.Panel())) - self.assertFalse(lib.isscalar(pd.Panel([[[1]]]))) - self.assertFalse(lib.isscalar(pd.Index([]))) - self.assertFalse(lib.isscalar(pd.Index([1]))) - - -class TestParseSQL(tm.TestCase): - - def test_convert_sql_column_floats(self): - arr = np.array([1.5, None, 3, 4.2], dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_strings(self): - arr = np.array(['1.5', None, '3', '4.2'], dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array(['1.5', np.nan, '3', '4.2'], dtype=object) - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_unicode(self): - arr = np.array([u('1.5'), None, u('3'), u('4.2')], - dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], - dtype=object) - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_ints(self): - arr = np.array([1, 2, 3, 4], dtype='O') - arr2 = np.array([1, 2, 3, 4], dtype='i4').astype('O') - result = lib.convert_sql_column(arr) - result2 = lib.convert_sql_column(arr2) - expected = np.array([1, 2, 3, 4], dtype='i8') - self.assert_numpy_array_equal(result, expected) - self.assert_numpy_array_equal(result2, expected) - - arr = np.array([1, 2, 3, None, 4], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_longs(self): - arr = np.array([long(1), long(2), long(3), long(4)], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, 4], dtype='i8') - self.assert_numpy_array_equal(result, expected) - - arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_bools(self): - arr = np.array([True, False, True, False], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([True, False, True, False], dtype=bool) - self.assert_numpy_array_equal(result, expected) - - arr = np.array([True, False, None, False], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([True, False, np.nan, False], dtype=object) - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_decimals(self): - from decimal import Decimal - arr = np.array([Decimal('1.5'), None, Decimal('3'), Decimal('4.2')]) - result = lib.convert_sql_column(arr) - expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_downcast_int64(self): - from pandas.parser import na_values - - arr = np.array([1, 2, 7, 8, 10], dtype=np.int64) - expected = np.array([1, 2, 7, 8, 10], dtype=np.int8) - - # default argument - result = lib.downcast_int64(arr, na_values) - self.assert_numpy_array_equal(result, expected) - - result = lib.downcast_int64(arr, na_values, use_unsigned=False) - self.assert_numpy_array_equal(result, expected) - - expected = np.array([1, 2, 7, 8, 10], dtype=np.uint8) - result = lib.downcast_int64(arr, na_values, use_unsigned=True) - self.assert_numpy_array_equal(result, expected) - - # still cast to int8 despite use_unsigned=True - # because of the negative number as an element - arr = np.array([1, 2, -7, 8, 10], dtype=np.int64) - expected = np.array([1, 2, -7, 8, 10], dtype=np.int8) - result = lib.downcast_int64(arr, na_values, use_unsigned=True) - self.assert_numpy_array_equal(result, expected) - - arr = np.array([1, 2, 7, 8, 300], dtype=np.int64) - expected = np.array([1, 2, 7, 8, 300], dtype=np.int16) - result = lib.downcast_int64(arr, na_values) - self.assert_numpy_array_equal(result, expected) - - int8_na = na_values[np.int8] - int64_na = na_values[np.int64] - arr = np.array([int64_na, 2, 3, 10, 15], dtype=np.int64) - expected = np.array([int8_na, 2, 3, 10, 15], dtype=np.int8) - result = lib.downcast_int64(arr, na_values) - self.assert_numpy_array_equal(result, expected) - - -if __name__ == '__main__': - import nose - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 10a6bb5c75b01..84d7226f1b2f5 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -222,6 +222,7 @@ def test_duplicated_with_nas(): expected = trues + trues assert (np.array_equal(result, expected)) + if __name__ == '__main__': import nose diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 1b1db90ea713d..f3b0becccf596 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -10,6 +10,7 @@ from pandas.core.index import Index, MultiIndex from pandas import Panel, DataFrame, Series, notnull, isnull, Timestamp +from pandas.types.common import is_float_dtype, is_integer_dtype from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assertRaisesRegexp) import pandas.core.common as com @@ -787,8 +788,8 @@ def test_delevel_infer_dtype(self): df = DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() - self.assertTrue(com.is_integer_dtype(deleveled['prm1'])) - self.assertTrue(com.is_float_dtype(deleveled['prm2'])) + self.assertTrue(is_integer_dtype(deleveled['prm1'])) + self.assertTrue(is_float_dtype(deleveled['prm2'])) def test_reset_index_with_drop(self): deleveled = self.ymd.reset_index(drop=True) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 904bedde03312..eeeddc278c714 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -5,8 +5,8 @@ import warnings import numpy as np -from pandas import Series -from pandas.core.common import isnull, is_integer_dtype +from pandas import Series, isnull +from pandas.types.common import is_integer_dtype import pandas.core.nanops as nanops import pandas.util.testing as tm diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index b1f09ad2685e3..f2e13867d3bf0 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -10,12 +10,13 @@ import numpy as np import pandas as pd +from pandas.types.common import is_float_dtype from pandas import Series, DataFrame, Index, isnull, notnull, pivot, MultiIndex from pandas.core.datetools import bday from pandas.core.nanops import nanall, nanany from pandas.core.panel import Panel from pandas.core.series import remove_na -import pandas.core.common as com + from pandas.formats.printing import pprint_thing from pandas import compat from pandas.compat import range, lrange, StringIO, OrderedDict, signature @@ -903,7 +904,7 @@ def test_set_value(self): self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5) res3 = self.panel.set_value('ItemE', 'foobar', 'baz', 5) - self.assertTrue(com.is_float_dtype(res3['ItemE'].values)) + self.assertTrue(is_float_dtype(res3['ItemE'].values)) with tm.assertRaisesRegexp(TypeError, "There must be an argument for each axis" " plus the value provided"): diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 607048df29faa..16a55c7ec4aeb 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -6,12 +6,12 @@ import numpy as np +from pandas.types.common import is_float_dtype from pandas import Series, Index, isnull, notnull from pandas.core.datetools import bday from pandas.core.panel import Panel from pandas.core.panel4d import Panel4D from pandas.core.series import remove_na -import pandas.core.common as com from pandas.util.testing import (assert_panel_equal, assert_panel4d_equal, @@ -595,7 +595,7 @@ def test_set_value(self): self.assertEqual(res.get_value('l4', 'ItemE', 'foo', 'bar'), 1.5) res3 = self.panel4d.set_value('l4', 'ItemE', 'foobar', 'baz', 5) - self.assertTrue(com.is_float_dtype(res3['l4'].values)) + self.assertTrue(is_float_dtype(res3['l4'].values)) class TestPanel4d(tm.TestCase, CheckIndexing, SafeForSparse, diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 67d171bb8efda..4d23bed620265 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -12,8 +12,7 @@ from pandas.compat import range, u import pandas.compat as compat -from pandas import (Index, Series, DataFrame, isnull, MultiIndex) -import pandas.core.common as com +from pandas import (Index, Series, DataFrame, isnull, MultiIndex, notnull) from pandas.util.testing import assert_series_equal import pandas.util.testing as tm @@ -1350,7 +1349,7 @@ def test_len(self): values = Series(['foo', 'fooo', 'fooooo', np.nan, 'fooooooo']) result = values.str.len() - exp = values.map(lambda x: len(x) if com.notnull(x) else NA) + exp = values.map(lambda x: len(x) if notnull(x) else NA) tm.assert_series_equal(result, exp) # mixed @@ -1368,7 +1367,7 @@ def test_len(self): 'fooooooo')]) result = values.str.len() - exp = values.map(lambda x: len(x) if com.notnull(x) else NA) + exp = values.map(lambda x: len(x) if notnull(x) else NA) tm.assert_series_equal(result, exp) def test_findall(self): diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py new file mode 100644 index 0000000000000..dd3f07ea8157f --- /dev/null +++ b/pandas/tests/types/test_cast.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- + +""" +These test the private routines in types/cast.py + +""" + + +import nose +from datetime import datetime +import numpy as np + +from pandas import Timedelta, Timestamp +from pandas.types.cast import (_possibly_downcast_to_dtype, + _possibly_convert_objects, + _infer_dtype_from_scalar, + _maybe_convert_string_to_object, + _maybe_convert_scalar) +from pandas.util import testing as tm + +_multiprocess_can_split_ = True + + +def test_downcast_conv(): + # test downcasting + + arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) + result = _possibly_downcast_to_dtype(arr, 'infer') + assert (np.array_equal(result, arr)) + + arr = np.array([8., 8., 8., 8., 8.9999999999995]) + result = _possibly_downcast_to_dtype(arr, 'infer') + expected = np.array([8, 8, 8, 8, 9]) + assert (np.array_equal(result, expected)) + + arr = np.array([8., 8., 8., 8., 9.0000000000005]) + result = _possibly_downcast_to_dtype(arr, 'infer') + expected = np.array([8, 8, 8, 8, 9]) + assert (np.array_equal(result, expected)) + + # conversions + + expected = np.array([1, 2]) + for dtype in [np.float64, object, np.int64]: + arr = np.array([1.0, 2.0], dtype=dtype) + result = _possibly_downcast_to_dtype(arr, 'infer') + tm.assert_almost_equal(result, expected, check_dtype=False) + + for dtype in [np.float64, object]: + expected = np.array([1.0, 2.0, np.nan], dtype=dtype) + arr = np.array([1.0, 2.0, np.nan], dtype=dtype) + result = _possibly_downcast_to_dtype(arr, 'infer') + tm.assert_almost_equal(result, expected) + + # empties + for dtype in [np.int32, np.float64, np.float32, np.bool_, + np.int64, object]: + arr = np.array([], dtype=dtype) + result = _possibly_downcast_to_dtype(arr, 'int64') + tm.assert_almost_equal(result, np.array([], dtype=np.int64)) + assert result.dtype == np.int64 + + +class TestInferDtype(tm.TestCase): + + def test_infer_dtype_from_scalar(self): + # Test that _infer_dtype_from_scalar is returning correct dtype for int + # and float. + + for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, + np.int32, np.uint64, np.int64]: + data = dtypec(12) + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, type(data)) + + data = 12 + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, np.int64) + + for dtypec in [np.float16, np.float32, np.float64]: + data = dtypec(12) + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, dtypec) + + data = np.float(12) + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, np.float64) + + for data in [True, False]: + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, np.bool_) + + for data in [np.complex64(1), np.complex128(1)]: + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, np.complex_) + + import datetime + for data in [np.datetime64(1, 'ns'), Timestamp(1), + datetime.datetime(2000, 1, 1, 0, 0)]: + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, 'M8[ns]') + + for data in [np.timedelta64(1, 'ns'), Timedelta(1), + datetime.timedelta(1)]: + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, 'm8[ns]') + + for data in [datetime.date(2000, 1, 1), + Timestamp(1, tz='US/Eastern'), 'foo']: + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, np.object_) + + +class TestMaybe(tm.TestCase): + + def test_maybe_convert_string_to_array(self): + result = _maybe_convert_string_to_object('x') + tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object)) + self.assertTrue(result.dtype == object) + + result = _maybe_convert_string_to_object(1) + self.assertEqual(result, 1) + + arr = np.array(['x', 'y'], dtype=str) + result = _maybe_convert_string_to_object(arr) + tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) + self.assertTrue(result.dtype == object) + + # unicode + arr = np.array(['x', 'y']).astype('U') + result = _maybe_convert_string_to_object(arr) + tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) + self.assertTrue(result.dtype == object) + + # object + arr = np.array(['x', 2], dtype=object) + result = _maybe_convert_string_to_object(arr) + tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object)) + self.assertTrue(result.dtype == object) + + def test_maybe_convert_scalar(self): + + # pass thru + result = _maybe_convert_scalar('x') + self.assertEqual(result, 'x') + result = _maybe_convert_scalar(np.array([1])) + self.assertEqual(result, np.array([1])) + + # leave scalar dtype + result = _maybe_convert_scalar(np.int64(1)) + self.assertEqual(result, np.int64(1)) + result = _maybe_convert_scalar(np.int32(1)) + self.assertEqual(result, np.int32(1)) + result = _maybe_convert_scalar(np.float32(1)) + self.assertEqual(result, np.float32(1)) + result = _maybe_convert_scalar(np.int64(1)) + self.assertEqual(result, np.float64(1)) + + # coerce + result = _maybe_convert_scalar(1) + self.assertEqual(result, np.int64(1)) + result = _maybe_convert_scalar(1.0) + self.assertEqual(result, np.float64(1)) + result = _maybe_convert_scalar(Timestamp('20130101')) + self.assertEqual(result, Timestamp('20130101').value) + result = _maybe_convert_scalar(datetime(2013, 1, 1)) + self.assertEqual(result, Timestamp('20130101').value) + result = _maybe_convert_scalar(Timedelta('1 day 1 min')) + self.assertEqual(result, Timedelta('1 day 1 min').value) + + +class TestConvert(tm.TestCase): + + def test_possibly_convert_objects_copy(self): + values = np.array([1, 2]) + + out = _possibly_convert_objects(values, copy=False) + self.assertTrue(values is out) + + out = _possibly_convert_objects(values, copy=True) + self.assertTrue(values is not out) + + values = np.array(['apply', 'banana']) + out = _possibly_convert_objects(values, copy=False) + self.assertTrue(values is out) + + out = _possibly_convert_objects(values, copy=True) + self.assertTrue(values is not out) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py new file mode 100644 index 0000000000000..0a586410ad5a0 --- /dev/null +++ b/pandas/tests/types/test_common.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +import nose +import numpy as np + +from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype +from pandas.types.common import pandas_dtype + +_multiprocess_can_split_ = True + + +def test_pandas_dtype(): + + assert pandas_dtype('datetime64[ns, US/Eastern]') == DatetimeTZDtype( + 'datetime64[ns, US/Eastern]') + assert pandas_dtype('category') == CategoricalDtype() + for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: + assert pandas_dtype(dtype) == np.dtype(dtype) + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py index d48b9baf64777..1743e80ae01a9 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -4,13 +4,14 @@ import nose import numpy as np from pandas import Series, Categorical, date_range -import pandas.core.common as com -from pandas.types.api import CategoricalDtype -from pandas.core.common import (is_categorical_dtype, - is_categorical, DatetimeTZDtype, - is_datetime64tz_dtype, is_datetimetz, - is_dtype_equal, is_datetime64_ns_dtype, - is_datetime64_dtype) + +from pandas.types.dtypes import CategoricalDtype +from pandas.types.common import (is_categorical_dtype, + is_categorical, DatetimeTZDtype, + is_datetime64tz_dtype, is_datetimetz, + is_dtype_equal, is_datetime64_ns_dtype, + is_datetime64_dtype, + _coerce_to_dtype) import pandas.util.testing as tm _multiprocess_can_split_ = True @@ -124,9 +125,9 @@ def test_subclass(self): self.assertTrue(issubclass(type(a), type(b))) def test_coerce_to_dtype(self): - self.assertEqual(com._coerce_to_dtype('datetime64[ns, US/Eastern]'), + self.assertEqual(_coerce_to_dtype('datetime64[ns, US/Eastern]'), DatetimeTZDtype('ns', 'US/Eastern')) - self.assertEqual(com._coerce_to_dtype('datetime64[ns, Asia/Tokyo]'), + self.assertEqual(_coerce_to_dtype('datetime64[ns, Asia/Tokyo]'), DatetimeTZDtype('ns', 'Asia/Tokyo')) def test_compat(self): diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/types/test_generic.py index 5549a3a376992..89913de6f6069 100644 --- a/pandas/tests/types/test_generic.py +++ b/pandas/tests/types/test_generic.py @@ -3,8 +3,8 @@ import nose import numpy as np import pandas as pd -import pandas.core.common as com import pandas.util.testing as tm +from pandas.types import generic as gt _multiprocess_can_split_ = True @@ -22,24 +22,24 @@ class TestABCClasses(tm.TestCase): sparse_array = pd.SparseArray(np.random.randn(10)) def test_abc_types(self): - self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndex) - self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCInt64Index) - self.assertIsInstance(pd.Float64Index([1, 2, 3]), com.ABCFloat64Index) - self.assertIsInstance(self.multi_index, com.ABCMultiIndex) - self.assertIsInstance(self.datetime_index, com.ABCDatetimeIndex) - self.assertIsInstance(self.timedelta_index, com.ABCTimedeltaIndex) - self.assertIsInstance(self.period_index, com.ABCPeriodIndex) + self.assertIsInstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) + self.assertIsInstance(pd.Int64Index([1, 2, 3]), gt.ABCInt64Index) + self.assertIsInstance(pd.Float64Index([1, 2, 3]), gt.ABCFloat64Index) + self.assertIsInstance(self.multi_index, gt.ABCMultiIndex) + self.assertIsInstance(self.datetime_index, gt.ABCDatetimeIndex) + self.assertIsInstance(self.timedelta_index, gt.ABCTimedeltaIndex) + self.assertIsInstance(self.period_index, gt.ABCPeriodIndex) self.assertIsInstance(self.categorical_df.index, - com.ABCCategoricalIndex) - self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndexClass) - self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCIndexClass) - self.assertIsInstance(pd.Series([1, 2, 3]), com.ABCSeries) - self.assertIsInstance(self.df, com.ABCDataFrame) - self.assertIsInstance(self.df.to_panel(), com.ABCPanel) - self.assertIsInstance(self.sparse_series, com.ABCSparseSeries) - self.assertIsInstance(self.sparse_array, com.ABCSparseArray) - self.assertIsInstance(self.categorical, com.ABCCategorical) - self.assertIsInstance(pd.Period('2012', freq='A-DEC'), com.ABCPeriod) + gt.ABCCategoricalIndex) + self.assertIsInstance(pd.Index(['a', 'b', 'c']), gt.ABCIndexClass) + self.assertIsInstance(pd.Int64Index([1, 2, 3]), gt.ABCIndexClass) + self.assertIsInstance(pd.Series([1, 2, 3]), gt.ABCSeries) + self.assertIsInstance(self.df, gt.ABCDataFrame) + self.assertIsInstance(self.df.to_panel(), gt.ABCPanel) + self.assertIsInstance(self.sparse_series, gt.ABCSparseSeries) + self.assertIsInstance(self.sparse_array, gt.ABCSparseArray) + self.assertIsInstance(self.categorical, gt.ABCCategorical) + self.assertIsInstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) if __name__ == '__main__': diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py new file mode 100644 index 0000000000000..34d10ee9dfa42 --- /dev/null +++ b/pandas/tests/types/test_inference.py @@ -0,0 +1,820 @@ +# -*- coding: utf-8 -*- + +""" +These the test the public routines exposed in types/common.py +related to inference and not otherwise tested in types/test_common.py + +""" + +import nose +import collections +import re +from datetime import datetime, date, timedelta, time +import numpy as np + +import pandas as pd +from pandas import lib, tslib +from pandas import (Series, Index, DataFrame, Timedelta, + DatetimeIndex, TimedeltaIndex, Timestamp, + Panel, Period) +from pandas.compat import u, PY2, lrange +from pandas.types import inference +from pandas.types.common import (is_timedelta64_dtype, + is_timedelta64_ns_dtype, + is_number, + is_integer, + is_float, + is_bool, + is_scalar, + _ensure_int32) +from pandas.types.missing import isnull +from pandas.util import testing as tm + +_multiprocess_can_split_ = True + + +def test_is_sequence(): + is_seq = inference.is_sequence + assert (is_seq((1, 2))) + assert (is_seq([1, 2])) + assert (not is_seq("abcd")) + assert (not is_seq(u("abcd"))) + assert (not is_seq(np.int64)) + + class A(object): + + def __getitem__(self): + return 1 + + assert (not is_seq(A())) + + +def test_is_list_like(): + passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), + Series([]), Series(['a']).str) + fails = (1, '2', object()) + + for p in passes: + assert inference.is_list_like(p) + + for f in fails: + assert not inference.is_list_like(f) + + +def test_is_dict_like(): + passes = [{}, {'A': 1}, Series([1])] + fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])] + + for p in passes: + assert inference.is_dict_like(p) + + for f in fails: + assert not inference.is_dict_like(f) + + +def test_is_named_tuple(): + passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), ) + fails = ((1, 2, 3), 'a', Series({'pi': 3.14})) + + for p in passes: + assert inference.is_named_tuple(p) + + for f in fails: + assert not inference.is_named_tuple(f) + + +def test_is_hashable(): + + # all new-style classes are hashable by default + class HashableClass(object): + pass + + class UnhashableClass1(object): + __hash__ = None + + class UnhashableClass2(object): + + def __hash__(self): + raise TypeError("Not hashable") + + hashable = (1, + 3.14, + np.float64(3.14), + 'a', + tuple(), + (1, ), + HashableClass(), ) + not_hashable = ([], UnhashableClass1(), ) + abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), ) + + for i in hashable: + assert inference.is_hashable(i) + for i in not_hashable: + assert not inference.is_hashable(i) + for i in abc_hashable_not_really_hashable: + assert not inference.is_hashable(i) + + # numpy.array is no longer collections.Hashable as of + # https://github.com/numpy/numpy/pull/5326, just test + # is_hashable() + assert not inference.is_hashable(np.array([])) + + # old-style classes in Python 2 don't appear hashable to + # collections.Hashable but also seem to support hash() by default + if PY2: + + class OldStyleClass(): + pass + + c = OldStyleClass() + assert not isinstance(c, collections.Hashable) + assert inference.is_hashable(c) + hash(c) # this will not raise + + +def test_is_re(): + passes = re.compile('ad'), + fails = 'x', 2, 3, object() + + for p in passes: + assert inference.is_re(p) + + for f in fails: + assert not inference.is_re(f) + + +def test_is_recompilable(): + passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'), + re.compile(r'')) + fails = 1, [], object() + + for p in passes: + assert inference.is_re_compilable(p) + + for f in fails: + assert not inference.is_re_compilable(f) + + +class TestInference(tm.TestCase): + + def test_infer_dtype_bytes(self): + compare = 'string' if PY2 else 'bytes' + + # string array of bytes + arr = np.array(list('abc'), dtype='S1') + self.assertEqual(lib.infer_dtype(arr), compare) + + # object array of bytes + arr = arr.astype(object) + self.assertEqual(lib.infer_dtype(arr), compare) + + def test_isinf_scalar(self): + # GH 11352 + self.assertTrue(lib.isposinf_scalar(float('inf'))) + self.assertTrue(lib.isposinf_scalar(np.inf)) + self.assertFalse(lib.isposinf_scalar(-np.inf)) + self.assertFalse(lib.isposinf_scalar(1)) + self.assertFalse(lib.isposinf_scalar('a')) + + self.assertTrue(lib.isneginf_scalar(float('-inf'))) + self.assertTrue(lib.isneginf_scalar(-np.inf)) + self.assertFalse(lib.isneginf_scalar(np.inf)) + self.assertFalse(lib.isneginf_scalar(1)) + self.assertFalse(lib.isneginf_scalar('a')) + + def test_maybe_convert_numeric_infinities(self): + # see gh-13274 + infinities = ['inf', 'inF', 'iNf', 'Inf', + 'iNF', 'InF', 'INf', 'INF'] + na_values = set(['', 'NULL', 'nan']) + + pos = np.array(['inf'], dtype=np.float64) + neg = np.array(['-inf'], dtype=np.float64) + + msg = "Unable to parse string" + + for infinity in infinities: + for maybe_int in (True, False): + out = lib.maybe_convert_numeric( + np.array([infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(['-' + infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, neg) + + out = lib.maybe_convert_numeric( + np.array([u(infinity)], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(['+' + infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + # too many characters + with tm.assertRaisesRegexp(ValueError, msg): + lib.maybe_convert_numeric( + np.array(['foo_' + infinity], dtype=object), + na_values, maybe_int) + + def test_maybe_convert_numeric_post_floatify_nan(self): + # see gh-13314 + data = np.array(['1.200', '-999.000', '4.500'], dtype=object) + expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) + nan_values = set([-999, -999.0]) + + for coerce_type in (True, False): + out = lib.maybe_convert_numeric(data, nan_values, coerce_type) + tm.assert_numpy_array_equal(out, expected) + + def test_convert_infs(self): + arr = np.array(['inf', 'inf', 'inf'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False) + self.assertTrue(result.dtype == np.float64) + + arr = np.array(['-inf', '-inf', '-inf'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False) + self.assertTrue(result.dtype == np.float64) + + def test_scientific_no_exponent(self): + # See PR 12215 + arr = np.array(['42E', '2E', '99e', '6e'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False, True) + self.assertTrue(np.all(np.isnan(result))) + + def test_convert_non_hashable(self): + # GH13324 + # make sure that we are handing non-hashables + arr = np.array([[10.0, 2], 1.0, 'apple']) + result = lib.maybe_convert_numeric(arr, set(), False, True) + tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) + + +class TestTypeInference(tm.TestCase): + _multiprocess_can_split_ = True + + def test_length_zero(self): + result = lib.infer_dtype(np.array([], dtype='i4')) + self.assertEqual(result, 'integer') + + result = lib.infer_dtype([]) + self.assertEqual(result, 'empty') + + def test_integers(self): + arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'integer') + + arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed-integer') + + arr = np.array([1, 2, 3, 4, 5], dtype='i4') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'integer') + + def test_bools(self): + arr = np.array([True, False, True, True, True], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'boolean') + + arr = np.array([np.bool_(True), np.bool_(False)], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'boolean') + + arr = np.array([True, False, True, 'foo'], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed') + + arr = np.array([True, False, True], dtype=bool) + result = lib.infer_dtype(arr) + self.assertEqual(result, 'boolean') + + def test_floats(self): + arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'floating') + + arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'], + dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed-integer') + + arr = np.array([1, 2, 3, 4, 5], dtype='f4') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'floating') + + arr = np.array([1, 2, 3, 4, 5], dtype='f8') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'floating') + + def test_string(self): + pass + + def test_unicode(self): + pass + + def test_datetime(self): + + dates = [datetime(2012, 1, x) for x in range(1, 20)] + index = Index(dates) + self.assertEqual(index.inferred_type, 'datetime64') + + def test_infer_dtype_datetime(self): + + arr = np.array([Timestamp('2011-01-01'), + Timestamp('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([np.datetime64('2011-01-01'), + np.datetime64('2011-01-01')], dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, pd.Timestamp('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([n, np.datetime64('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + arr = np.array([n, datetime(2011, 1, 1)]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([n, pd.Timestamp('2011-01-02'), n]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([n, np.datetime64('2011-01-02'), n]) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + arr = np.array([n, datetime(2011, 1, 1), n]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + # different type of nat + arr = np.array([np.timedelta64('nat'), + np.datetime64('2011-01-02')], dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.datetime64('2011-01-02'), + np.timedelta64('nat')], dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + # mixed datetime + arr = np.array([datetime(2011, 1, 1), + pd.Timestamp('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + # should be datetime? + arr = np.array([np.datetime64('2011-01-01'), + pd.Timestamp('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([pd.Timestamp('2011-01-02'), + np.datetime64('2011-01-01')]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1]) + self.assertEqual(lib.infer_dtype(arr), 'mixed-integer') + + arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + def test_infer_dtype_timedelta(self): + + arr = np.array([pd.Timedelta('1 days'), + pd.Timedelta('2 days')]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([np.timedelta64(1, 'D'), + np.timedelta64(2, 'D')], dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([timedelta(1), timedelta(2)]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, Timedelta('1 days')]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, np.timedelta64(1, 'D')]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, timedelta(1)]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, pd.Timedelta('1 days'), n]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, np.timedelta64(1, 'D'), n]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, timedelta(1), n]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + # different type of nat + arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')], + dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')], + dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + def test_infer_dtype_all_nan_nat_like(self): + arr = np.array([np.nan, np.nan]) + self.assertEqual(lib.infer_dtype(arr), 'floating') + + # nan and None mix are result in mixed + arr = np.array([np.nan, np.nan, None]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([None, np.nan, np.nan]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + # pd.NaT + arr = np.array([pd.NaT]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([pd.NaT, np.nan]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([np.nan, pd.NaT]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([np.nan, pd.NaT, np.nan]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([None, pd.NaT, None]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + # np.datetime64(nat) + arr = np.array([np.datetime64('nat')]) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.datetime64('nat'), n]) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + arr = np.array([pd.NaT, n, np.datetime64('nat'), n]) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + arr = np.array([np.timedelta64('nat')], dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.timedelta64('nat'), n]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([pd.NaT, n, np.timedelta64('nat'), n]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + # datetime / timedelta mixed + arr = np.array([pd.NaT, np.datetime64('nat'), + np.timedelta64('nat'), np.nan]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.timedelta64('nat'), np.datetime64('nat')], + dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + def test_is_datetimelike_array_all_nan_nat_like(self): + arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) + self.assertTrue(lib.is_datetime_array(arr)) + self.assertTrue(lib.is_datetime64_array(arr)) + self.assertFalse(lib.is_timedelta_array(arr)) + self.assertFalse(lib.is_timedelta64_array(arr)) + self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')]) + self.assertFalse(lib.is_datetime_array(arr)) + self.assertFalse(lib.is_datetime64_array(arr)) + self.assertTrue(lib.is_timedelta_array(arr)) + self.assertTrue(lib.is_timedelta64_array(arr)) + self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, pd.NaT, np.datetime64('nat'), + np.timedelta64('nat')]) + self.assertFalse(lib.is_datetime_array(arr)) + self.assertFalse(lib.is_datetime64_array(arr)) + self.assertFalse(lib.is_timedelta_array(arr)) + self.assertFalse(lib.is_timedelta64_array(arr)) + self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, pd.NaT]) + self.assertTrue(lib.is_datetime_array(arr)) + self.assertTrue(lib.is_datetime64_array(arr)) + self.assertTrue(lib.is_timedelta_array(arr)) + self.assertTrue(lib.is_timedelta64_array(arr)) + self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, np.nan], dtype=object) + self.assertFalse(lib.is_datetime_array(arr)) + self.assertFalse(lib.is_datetime64_array(arr)) + self.assertFalse(lib.is_timedelta_array(arr)) + self.assertFalse(lib.is_timedelta64_array(arr)) + self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) + + def test_date(self): + + dates = [date(2012, 1, x) for x in range(1, 20)] + index = Index(dates) + self.assertEqual(index.inferred_type, 'date') + + def test_to_object_array_tuples(self): + r = (5, 6) + values = [r] + result = lib.to_object_array_tuples(values) + + try: + # make sure record array works + from collections import namedtuple + record = namedtuple('record', 'x y') + r = record(5, 6) + values = [r] + result = lib.to_object_array_tuples(values) # noqa + except ImportError: + pass + + def test_object(self): + + # GH 7431 + # cannot infer more than this as only a single element + arr = np.array([None], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed') + + def test_to_object_array_width(self): + # see gh-13320 + rows = [[1, 2, 3], [4, 5, 6]] + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows, min_width=1) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array([[1, 2, 3, None, None], + [4, 5, 6, None, None]], dtype=object) + out = lib.to_object_array(rows, min_width=5) + tm.assert_numpy_array_equal(out, expected) + + def test_is_period(self): + self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) + self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))) + self.assertFalse(lib.is_period(pd.Timestamp('2011-01'))) + self.assertFalse(lib.is_period(1)) + self.assertFalse(lib.is_period(np.nan)) + + def test_categorical(self): + + # GH 8974 + from pandas import Categorical, Series + arr = Categorical(list('abc')) + result = lib.infer_dtype(arr) + self.assertEqual(result, 'categorical') + + result = lib.infer_dtype(Series(arr)) + self.assertEqual(result, 'categorical') + + arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) + result = lib.infer_dtype(arr) + self.assertEqual(result, 'categorical') + + result = lib.infer_dtype(Series(arr)) + self.assertEqual(result, 'categorical') + + +class TestNumberScalar(tm.TestCase): + + def test_is_number(self): + + self.assertTrue(is_number(True)) + self.assertTrue(is_number(1)) + self.assertTrue(is_number(1.1)) + self.assertTrue(is_number(1 + 3j)) + self.assertTrue(is_number(np.bool(False))) + self.assertTrue(is_number(np.int64(1))) + self.assertTrue(is_number(np.float64(1.1))) + self.assertTrue(is_number(np.complex128(1 + 3j))) + self.assertTrue(is_number(np.nan)) + + self.assertFalse(is_number(None)) + self.assertFalse(is_number('x')) + self.assertFalse(is_number(datetime(2011, 1, 1))) + self.assertFalse(is_number(np.datetime64('2011-01-01'))) + self.assertFalse(is_number(Timestamp('2011-01-01'))) + self.assertFalse(is_number(Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(is_number(timedelta(1000))) + self.assertFalse(is_number(Timedelta('1 days'))) + + # questionable + self.assertFalse(is_number(np.bool_(False))) + self.assertTrue(is_number(np.timedelta64(1, 'D'))) + + def test_is_bool(self): + self.assertTrue(is_bool(True)) + self.assertTrue(is_bool(np.bool(False))) + self.assertTrue(is_bool(np.bool_(False))) + + self.assertFalse(is_bool(1)) + self.assertFalse(is_bool(1.1)) + self.assertFalse(is_bool(1 + 3j)) + self.assertFalse(is_bool(np.int64(1))) + self.assertFalse(is_bool(np.float64(1.1))) + self.assertFalse(is_bool(np.complex128(1 + 3j))) + self.assertFalse(is_bool(np.nan)) + self.assertFalse(is_bool(None)) + self.assertFalse(is_bool('x')) + self.assertFalse(is_bool(datetime(2011, 1, 1))) + self.assertFalse(is_bool(np.datetime64('2011-01-01'))) + self.assertFalse(is_bool(Timestamp('2011-01-01'))) + self.assertFalse(is_bool(Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(is_bool(timedelta(1000))) + self.assertFalse(is_bool(np.timedelta64(1, 'D'))) + self.assertFalse(is_bool(Timedelta('1 days'))) + + def test_is_integer(self): + self.assertTrue(is_integer(1)) + self.assertTrue(is_integer(np.int64(1))) + + self.assertFalse(is_integer(True)) + self.assertFalse(is_integer(1.1)) + self.assertFalse(is_integer(1 + 3j)) + self.assertFalse(is_integer(np.bool(False))) + self.assertFalse(is_integer(np.bool_(False))) + self.assertFalse(is_integer(np.float64(1.1))) + self.assertFalse(is_integer(np.complex128(1 + 3j))) + self.assertFalse(is_integer(np.nan)) + self.assertFalse(is_integer(None)) + self.assertFalse(is_integer('x')) + self.assertFalse(is_integer(datetime(2011, 1, 1))) + self.assertFalse(is_integer(np.datetime64('2011-01-01'))) + self.assertFalse(is_integer(Timestamp('2011-01-01'))) + self.assertFalse(is_integer(Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(is_integer(timedelta(1000))) + self.assertFalse(is_integer(Timedelta('1 days'))) + + # questionable + self.assertTrue(is_integer(np.timedelta64(1, 'D'))) + + def test_is_float(self): + self.assertTrue(is_float(1.1)) + self.assertTrue(is_float(np.float64(1.1))) + self.assertTrue(is_float(np.nan)) + + self.assertFalse(is_float(True)) + self.assertFalse(is_float(1)) + self.assertFalse(is_float(1 + 3j)) + self.assertFalse(is_float(np.bool(False))) + self.assertFalse(is_float(np.bool_(False))) + self.assertFalse(is_float(np.int64(1))) + self.assertFalse(is_float(np.complex128(1 + 3j))) + self.assertFalse(is_float(None)) + self.assertFalse(is_float('x')) + self.assertFalse(is_float(datetime(2011, 1, 1))) + self.assertFalse(is_float(np.datetime64('2011-01-01'))) + self.assertFalse(is_float(Timestamp('2011-01-01'))) + self.assertFalse(is_float(Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(is_float(timedelta(1000))) + self.assertFalse(is_float(np.timedelta64(1, 'D'))) + self.assertFalse(is_float(Timedelta('1 days'))) + + def test_is_timedelta(self): + self.assertTrue(is_timedelta64_dtype('timedelta64')) + self.assertTrue(is_timedelta64_dtype('timedelta64[ns]')) + self.assertFalse(is_timedelta64_ns_dtype('timedelta64')) + self.assertTrue(is_timedelta64_ns_dtype('timedelta64[ns]')) + + tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') + self.assertTrue(is_timedelta64_dtype(tdi)) + self.assertTrue(is_timedelta64_ns_dtype(tdi)) + self.assertTrue(is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))) + + # Conversion to Int64Index: + self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64'))) + self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]'))) + + +class Testisscalar(tm.TestCase): + + def test_isscalar_builtin_scalars(self): + self.assertTrue(is_scalar(None)) + self.assertTrue(is_scalar(True)) + self.assertTrue(is_scalar(False)) + self.assertTrue(is_scalar(0.)) + self.assertTrue(is_scalar(np.nan)) + self.assertTrue(is_scalar('foobar')) + self.assertTrue(is_scalar(b'foobar')) + self.assertTrue(is_scalar(u('efoobar'))) + self.assertTrue(is_scalar(datetime(2014, 1, 1))) + self.assertTrue(is_scalar(date(2014, 1, 1))) + self.assertTrue(is_scalar(time(12, 0))) + self.assertTrue(is_scalar(timedelta(hours=1))) + self.assertTrue(is_scalar(pd.NaT)) + + def test_isscalar_builtin_nonscalars(self): + self.assertFalse(is_scalar({})) + self.assertFalse(is_scalar([])) + self.assertFalse(is_scalar([1])) + self.assertFalse(is_scalar(())) + self.assertFalse(is_scalar((1, ))) + self.assertFalse(is_scalar(slice(None))) + self.assertFalse(is_scalar(Ellipsis)) + + def test_isscalar_numpy_array_scalars(self): + self.assertTrue(is_scalar(np.int64(1))) + self.assertTrue(is_scalar(np.float64(1.))) + self.assertTrue(is_scalar(np.int32(1))) + self.assertTrue(is_scalar(np.object_('foobar'))) + self.assertTrue(is_scalar(np.str_('foobar'))) + self.assertTrue(is_scalar(np.unicode_(u('foobar')))) + self.assertTrue(is_scalar(np.bytes_(b'foobar'))) + self.assertTrue(is_scalar(np.datetime64('2014-01-01'))) + self.assertTrue(is_scalar(np.timedelta64(1, 'h'))) + + def test_isscalar_numpy_zerodim_arrays(self): + for zerodim in [np.array(1), np.array('foobar'), + np.array(np.datetime64('2014-01-01')), + np.array(np.timedelta64(1, 'h')), + np.array(np.datetime64('NaT'))]: + self.assertFalse(is_scalar(zerodim)) + self.assertTrue(is_scalar(lib.item_from_zerodim(zerodim))) + + def test_isscalar_numpy_arrays(self): + self.assertFalse(is_scalar(np.array([]))) + self.assertFalse(is_scalar(np.array([[]]))) + self.assertFalse(is_scalar(np.matrix('1; 2'))) + + def test_isscalar_pandas_scalars(self): + self.assertTrue(is_scalar(Timestamp('2014-01-01'))) + self.assertTrue(is_scalar(Timedelta(hours=1))) + self.assertTrue(is_scalar(Period('2014-01-01'))) + + def test_lisscalar_pandas_containers(self): + self.assertFalse(is_scalar(Series())) + self.assertFalse(is_scalar(Series([1]))) + self.assertFalse(is_scalar(DataFrame())) + self.assertFalse(is_scalar(DataFrame([[1]]))) + self.assertFalse(is_scalar(Panel())) + self.assertFalse(is_scalar(Panel([[[1]]]))) + self.assertFalse(is_scalar(Index([]))) + self.assertFalse(is_scalar(Index([1]))) + + +def test_datetimeindex_from_empty_datetime64_array(): + for unit in ['ms', 'us', 'ns']: + idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit)) + assert (len(idx) == 0) + + +def test_nan_to_nat_conversions(): + + df = DataFrame(dict({ + 'A': np.asarray( + lrange(10), dtype='float64'), + 'B': Timestamp('20010101') + })) + df.iloc[3:6, :] = np.nan + result = df.loc[4, 'B'].value + assert (result == tslib.iNaT) + + s = df['B'].copy() + s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) + assert (isnull(s[8])) + + # numpy < 1.7.0 is wrong + from distutils.version import LooseVersion + if LooseVersion(np.__version__) >= '1.7.0': + assert (s[8].value == np.datetime64('NaT').astype(np.int64)) + + +def test_ensure_int32(): + values = np.arange(10, dtype=np.int32) + result = _ensure_int32(values) + assert (result.dtype == np.int32) + + values = np.arange(10, dtype=np.int64) + result = _ensure_int32(values) + assert (result.dtype == np.int32) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/test_io.py b/pandas/tests/types/test_io.py new file mode 100644 index 0000000000000..545edf8f1386c --- /dev/null +++ b/pandas/tests/types/test_io.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pandas.lib as lib +import pandas.util.testing as tm + +from pandas.compat import long, u + + +class TestParseSQL(tm.TestCase): + + def test_convert_sql_column_floats(self): + arr = np.array([1.5, None, 3, 4.2], dtype=object) + result = lib.convert_sql_column(arr) + expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_strings(self): + arr = np.array(['1.5', None, '3', '4.2'], dtype=object) + result = lib.convert_sql_column(arr) + expected = np.array(['1.5', np.nan, '3', '4.2'], dtype=object) + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_unicode(self): + arr = np.array([u('1.5'), None, u('3'), u('4.2')], + dtype=object) + result = lib.convert_sql_column(arr) + expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], + dtype=object) + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_ints(self): + arr = np.array([1, 2, 3, 4], dtype='O') + arr2 = np.array([1, 2, 3, 4], dtype='i4').astype('O') + result = lib.convert_sql_column(arr) + result2 = lib.convert_sql_column(arr2) + expected = np.array([1, 2, 3, 4], dtype='i8') + self.assert_numpy_array_equal(result, expected) + self.assert_numpy_array_equal(result2, expected) + + arr = np.array([1, 2, 3, None, 4], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_longs(self): + arr = np.array([long(1), long(2), long(3), long(4)], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([1, 2, 3, 4], dtype='i8') + self.assert_numpy_array_equal(result, expected) + + arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_bools(self): + arr = np.array([True, False, True, False], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([True, False, True, False], dtype=bool) + self.assert_numpy_array_equal(result, expected) + + arr = np.array([True, False, None, False], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([True, False, np.nan, False], dtype=object) + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_decimals(self): + from decimal import Decimal + arr = np.array([Decimal('1.5'), None, Decimal('3'), Decimal('4.2')]) + result = lib.convert_sql_column(arr) + expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_downcast_int64(self): + from pandas.parser import na_values + + arr = np.array([1, 2, 7, 8, 10], dtype=np.int64) + expected = np.array([1, 2, 7, 8, 10], dtype=np.int8) + + # default argument + result = lib.downcast_int64(arr, na_values) + self.assert_numpy_array_equal(result, expected) + + result = lib.downcast_int64(arr, na_values, use_unsigned=False) + self.assert_numpy_array_equal(result, expected) + + expected = np.array([1, 2, 7, 8, 10], dtype=np.uint8) + result = lib.downcast_int64(arr, na_values, use_unsigned=True) + self.assert_numpy_array_equal(result, expected) + + # still cast to int8 despite use_unsigned=True + # because of the negative number as an element + arr = np.array([1, 2, -7, 8, 10], dtype=np.int64) + expected = np.array([1, 2, -7, 8, 10], dtype=np.int8) + result = lib.downcast_int64(arr, na_values, use_unsigned=True) + self.assert_numpy_array_equal(result, expected) + + arr = np.array([1, 2, 7, 8, 300], dtype=np.int64) + expected = np.array([1, 2, 7, 8, 300], dtype=np.int16) + result = lib.downcast_int64(arr, na_values) + self.assert_numpy_array_equal(result, expected) + + int8_na = na_values[np.int8] + int64_na = na_values[np.int64] + arr = np.array([int64_na, 2, 3, 10, 15], dtype=np.int64) + expected = np.array([int8_na, 2, 3, 10, 15], dtype=np.int8) + result = lib.downcast_int64(arr, na_values) + self.assert_numpy_array_equal(result, expected) + + +if __name__ == '__main__': + import nose + + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py new file mode 100644 index 0000000000000..edcb69de7bfad --- /dev/null +++ b/pandas/tests/types/test_missing.py @@ -0,0 +1,243 @@ +# -*- coding: utf-8 -*- + +import nose +import numpy as np +from datetime import datetime +from pandas.util import testing as tm + +from pandas.core import config as cf +from pandas.compat import u +from pandas.tslib import iNaT +from pandas import (NaT, Float64Index, Series, + DatetimeIndex, TimedeltaIndex, date_range) +from pandas.types.dtypes import DatetimeTZDtype +from pandas.types.missing import (array_equivalent, isnull, notnull, + na_value_for_dtype) + +_multiprocess_can_split_ = True + + +def test_notnull(): + assert notnull(1.) + assert not notnull(None) + assert not notnull(np.NaN) + + with cf.option_context("mode.use_inf_as_null", False): + assert notnull(np.inf) + assert notnull(-np.inf) + + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notnull(arr) + assert result.all() + + with cf.option_context("mode.use_inf_as_null", True): + assert not notnull(np.inf) + assert not notnull(-np.inf) + + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notnull(arr) + assert result.sum() == 2 + + with cf.option_context("mode.use_inf_as_null", False): + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries(), tm.makeTimeSeries(), + tm.makePeriodSeries()]: + assert (isinstance(isnull(s), Series)) + + +def test_isnull(): + assert not isnull(1.) + assert isnull(None) + assert isnull(np.NaN) + assert not isnull(np.inf) + assert not isnull(-np.inf) + + # series + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries(), tm.makeTimeSeries(), + tm.makePeriodSeries()]: + assert (isinstance(isnull(s), Series)) + + # frame + for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), + tm.makeMixedDataFrame()]: + result = isnull(df) + expected = df.apply(isnull) + tm.assert_frame_equal(result, expected) + + # panel + for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) + ]: + result = isnull(p) + expected = p.apply(isnull) + tm.assert_panel_equal(result, expected) + + # panel 4d + for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: + result = isnull(p) + expected = p.apply(isnull) + tm.assert_panel4d_equal(result, expected) + + +def test_isnull_lists(): + result = isnull([[False]]) + exp = np.array([[False]]) + assert (np.array_equal(result, exp)) + + result = isnull([[1], [2]]) + exp = np.array([[False], [False]]) + assert (np.array_equal(result, exp)) + + # list of strings / unicode + result = isnull(['foo', 'bar']) + assert (not result.any()) + + result = isnull([u('foo'), u('bar')]) + assert (not result.any()) + + +def test_isnull_nat(): + result = isnull([NaT]) + exp = np.array([True]) + assert (np.array_equal(result, exp)) + + result = isnull(np.array([NaT], dtype=object)) + exp = np.array([True]) + assert (np.array_equal(result, exp)) + + +def test_isnull_numpy_nat(): + arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), + np.datetime64('NaT', 's')]) + result = isnull(arr) + expected = np.array([True] * 4) + tm.assert_numpy_array_equal(result, expected) + + +def test_isnull_datetime(): + assert (not isnull(datetime.now())) + assert notnull(datetime.now()) + + idx = date_range('1/1/1990', periods=20) + assert (notnull(idx).all()) + + idx = np.asarray(idx) + idx[0] = iNaT + idx = DatetimeIndex(idx) + mask = isnull(idx) + assert (mask[0]) + assert (not mask[1:].any()) + + # GH 9129 + pidx = idx.to_period(freq='M') + mask = isnull(pidx) + assert (mask[0]) + assert (not mask[1:].any()) + + mask = isnull(pidx[1:]) + assert (not mask.any()) + + +class TestIsNull(tm.TestCase): + + def test_0d_array(self): + self.assertTrue(isnull(np.array(np.nan))) + self.assertFalse(isnull(np.array(0.0))) + self.assertFalse(isnull(np.array(0))) + # test object dtype + self.assertTrue(isnull(np.array(np.nan, dtype=object))) + self.assertFalse(isnull(np.array(0.0, dtype=object))) + self.assertFalse(isnull(np.array(0, dtype=object))) + + +def test_array_equivalent(): + assert array_equivalent(np.array([np.nan, np.nan]), + np.array([np.nan, np.nan])) + assert array_equivalent(np.array([np.nan, 1, np.nan]), + np.array([np.nan, 1, np.nan])) + assert array_equivalent(np.array([np.nan, None], dtype='object'), + np.array([np.nan, None], dtype='object')) + assert array_equivalent(np.array([np.nan, 1 + 1j], dtype='complex'), + np.array([np.nan, 1 + 1j], dtype='complex')) + assert not array_equivalent( + np.array([np.nan, 1 + 1j], dtype='complex'), np.array( + [np.nan, 1 + 2j], dtype='complex')) + assert not array_equivalent( + np.array([np.nan, 1, np.nan]), np.array([np.nan, 2, np.nan])) + assert not array_equivalent( + np.array(['a', 'b', 'c', 'd']), np.array(['e', 'e'])) + assert array_equivalent(Float64Index([0, np.nan]), + Float64Index([0, np.nan])) + assert not array_equivalent( + Float64Index([0, np.nan]), Float64Index([1, np.nan])) + assert array_equivalent(DatetimeIndex([0, np.nan]), + DatetimeIndex([0, np.nan])) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan])) + assert array_equivalent(TimedeltaIndex([0, np.nan]), + TimedeltaIndex([0, np.nan])) + assert not array_equivalent( + TimedeltaIndex([0, np.nan]), TimedeltaIndex([1, np.nan])) + assert array_equivalent(DatetimeIndex([0, np.nan], tz='US/Eastern'), + DatetimeIndex([0, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz='US/Eastern'), DatetimeIndex( + [1, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex( + [0, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz='CET'), DatetimeIndex( + [0, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) + + +def test_array_equivalent_compat(): + # see gh-13388 + m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) + n = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) + assert (array_equivalent(m, n, strict_nan=True)) + assert (array_equivalent(m, n, strict_nan=False)) + + m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) + n = np.array([(1, 2), (4, 3)], dtype=[('a', int), ('b', float)]) + assert (not array_equivalent(m, n, strict_nan=True)) + assert (not array_equivalent(m, n, strict_nan=False)) + + m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) + n = np.array([(1, 2), (3, 4)], dtype=[('b', int), ('a', float)]) + assert (not array_equivalent(m, n, strict_nan=True)) + assert (not array_equivalent(m, n, strict_nan=False)) + + +def test_array_equivalent_str(): + for dtype in ['O', 'S', 'U']: + assert array_equivalent(np.array(['A', 'B'], dtype=dtype), + np.array(['A', 'B'], dtype=dtype)) + assert not array_equivalent(np.array(['A', 'B'], dtype=dtype), + np.array(['A', 'X'], dtype=dtype)) + + +def test_na_value_for_dtype(): + for dtype in [np.dtype('M8[ns]'), np.dtype('m8[ns]'), + DatetimeTZDtype('datetime64[ns, US/Eastern]')]: + assert na_value_for_dtype(dtype) is NaT + + for dtype in ['u1', 'u2', 'u4', 'u8', + 'i1', 'i2', 'i4', 'i8']: + assert na_value_for_dtype(np.dtype(dtype)) == 0 + + for dtype in ['bool']: + assert na_value_for_dtype(np.dtype(dtype)) is False + + for dtype in ['f2', 'f4', 'f8']: + assert np.isnan(na_value_for_dtype(np.dtype(dtype))) + + for dtype in ['O']: + assert np.isnan(na_value_for_dtype(np.dtype(dtype))) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/test_types.py b/pandas/tests/types/test_types.py deleted file mode 100644 index b9f6006cab731..0000000000000 --- a/pandas/tests/types/test_types.py +++ /dev/null @@ -1,40 +0,0 @@ -# -*- coding: utf-8 -*- -import nose -import numpy as np - -from pandas import NaT -from pandas.types.api import (DatetimeTZDtype, CategoricalDtype, - na_value_for_dtype, pandas_dtype) - - -def test_pandas_dtype(): - - assert pandas_dtype('datetime64[ns, US/Eastern]') == DatetimeTZDtype( - 'datetime64[ns, US/Eastern]') - assert pandas_dtype('category') == CategoricalDtype() - for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: - assert pandas_dtype(dtype) == np.dtype(dtype) - - -def test_na_value_for_dtype(): - for dtype in [np.dtype('M8[ns]'), np.dtype('m8[ns]'), - DatetimeTZDtype('datetime64[ns, US/Eastern]')]: - assert na_value_for_dtype(dtype) is NaT - - for dtype in ['u1', 'u2', 'u4', 'u8', - 'i1', 'i2', 'i4', 'i8']: - assert na_value_for_dtype(np.dtype(dtype)) == 0 - - for dtype in ['bool']: - assert na_value_for_dtype(np.dtype(dtype)) is False - - for dtype in ['f2', 'f4', 'f8']: - assert np.isnan(na_value_for_dtype(np.dtype(dtype))) - - for dtype in ['O']: - assert np.isnan(na_value_for_dtype(np.dtype(dtype))) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 075dff9cf6c38..5b66e55eb60b6 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -12,6 +12,21 @@ from pandas import (Categorical, DataFrame, Series, Index, MultiIndex, Timedelta) from pandas.core.frame import _merge_doc +from pandas.types.generic import ABCSeries +from pandas.types.common import (is_datetime64tz_dtype, + is_datetime64_dtype, + needs_i8_conversion, + is_int64_dtype, + is_integer, + is_int_or_datetime_dtype, + is_dtype_equal, + is_bool, + is_list_like, + _ensure_int64, + _ensure_platform_int, + _ensure_object) +from pandas.types.missing import na_value_for_dtype + from pandas.core.generic import NDFrame from pandas.core.index import (_get_combined_index, _ensure_index, _get_consensus_names, @@ -19,18 +34,10 @@ from pandas.core.internals import (items_overlap_with_suffix, concatenate_block_managers) from pandas.util.decorators import Appender, Substitution -from pandas.core.common import (ABCSeries, is_dtype_equal, - is_datetime64_dtype, - is_int64_dtype, - is_integer, - is_bool, - is_list_like, - needs_i8_conversion) import pandas.core.algorithms as algos import pandas.core.common as com import pandas.types.concat as _concat -from pandas.types.api import na_value_for_dtype import pandas.algos as _algos import pandas.hashtable as _hash @@ -436,7 +443,7 @@ def _merger(x, y): # if we DO have duplicates, then # we cannot guarantee order - sorter = com._ensure_platform_int( + sorter = _ensure_platform_int( np.concatenate([groupby.indices[g] for g, _ in groupby])) if len(result) != len(sorter): if check_duplicates: @@ -1111,8 +1118,8 @@ def _get_single_indexer(join_key, index, sort=False): left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) left_indexer, right_indexer = _algos.left_outer_join( - com._ensure_int64(left_key), - com._ensure_int64(right_key), + _ensure_int64(left_key), + _ensure_int64(right_key), count, sort=sort) return left_indexer, right_indexer @@ -1158,18 +1165,17 @@ def _right_outer_join(x, y, max_groups): def _factorize_keys(lk, rk, sort=True): - if com.is_datetime64tz_dtype(lk) and com.is_datetime64tz_dtype(rk): + if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): lk = lk.values rk = rk.values - - if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk): + if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): klass = _hash.Int64Factorizer - lk = com._ensure_int64(com._values_from_object(lk)) - rk = com._ensure_int64(com._values_from_object(rk)) + lk = _ensure_int64(com._values_from_object(lk)) + rk = _ensure_int64(com._values_from_object(rk)) else: klass = _hash.Factorizer - lk = com._ensure_object(lk) - rk = com._ensure_object(rk) + lk = _ensure_object(lk) + rk = _ensure_object(rk) rizer = klass(max(len(lk), len(rk))) @@ -1208,10 +1214,10 @@ def _sort_labels(uniques, left, right): reverse_indexer = np.empty(len(sorter), dtype=np.int64) reverse_indexer.put(sorter, np.arange(len(sorter))) - new_left = reverse_indexer.take(com._ensure_platform_int(left)) + new_left = reverse_indexer.take(_ensure_platform_int(left)) np.putmask(new_left, left == -1, -1) - new_right = reverse_indexer.take(com._ensure_platform_int(right)) + new_right = reverse_indexer.take(_ensure_platform_int(right)) np.putmask(new_right, right == -1, -1) return new_left, new_right diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index e1405bc9e6add..3e2b7c3af460e 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -1,6 +1,7 @@ # pylint: disable=E1103 +from pandas.types.common import is_list_like, is_scalar from pandas import Series, DataFrame from pandas.core.index import MultiIndex, Index from pandas.core.groupby import Grouper @@ -9,7 +10,6 @@ from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com -import pandas.lib as lib import numpy as np @@ -95,7 +95,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', values_passed = values is not None if values_passed: - if com.is_list_like(values): + if is_list_like(values): values_multi = True values = list(values) else: @@ -361,7 +361,7 @@ def _all_key(): def _convert_by(by): if by is None: by = [] - elif (lib.isscalar(by) or + elif (is_scalar(by) or isinstance(by, (np.ndarray, Index, Series, Grouper)) or hasattr(by, '__call__')): by = [by] diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index b6c1926c1e7fc..4cf3364a03056 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -11,10 +11,17 @@ import numpy as np +from pandas.types.common import (is_list_like, + is_integer, + is_number, + is_hashable, + is_iterator) +from pandas.types.missing import isnull, notnull + from pandas.util.decorators import cache_readonly, deprecate_kwarg from pandas.core.base import PandasObject -import pandas.core.common as com -from pandas.core.common import AbstractMethodError + +from pandas.core.common import AbstractMethodError, _try_sort from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex from pandas.core.series import Series, remove_na @@ -161,7 +168,7 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', if colormap is not None: warnings.warn("'color' and 'colormap' cannot be used " "simultaneously. Using 'color'") - colors = list(color) if com.is_list_like(color) else color + colors = list(color) if is_list_like(color) else color else: if color_type == 'default': # need to call list() on the result to copy so we don't @@ -336,7 +343,7 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, # no gaps between subplots fig.subplots_adjust(wspace=0, hspace=0) - mask = com.notnull(df) + mask = notnull(df) marker = _get_marker_compat(marker) @@ -980,7 +987,7 @@ def _validate_color_args(self): "simultaneously. Using 'color'") if 'color' in self.kwds and self.style is not None: - if com.is_list_like(self.style): + if is_list_like(self.style): styles = self.style else: styles = [self.style] @@ -1001,7 +1008,7 @@ def _iter_data(self, data=None, keep_index=False, fillna=None): # TODO: unused? # if self.sort_columns: - # columns = com._try_sort(data.columns) + # columns = _try_sort(data.columns) # else: # columns = data.columns @@ -1099,13 +1106,13 @@ def result(self): Return result axes """ if self.subplots: - if self.layout is not None and not com.is_list_like(self.ax): + if self.layout is not None and not is_list_like(self.ax): return self.axes.reshape(*self.layout) else: return self.axes else: sec_true = isinstance(self.secondary_y, bool) and self.secondary_y - all_sec = (com.is_list_like(self.secondary_y) and + all_sec = (is_list_like(self.secondary_y) and len(self.secondary_y) == self.nseries) if (sec_true or all_sec): # if all data is plotted on secondary, return right axes @@ -1322,7 +1329,7 @@ def _get_xticks(self, convert_period=False): @classmethod def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): - mask = com.isnull(y) + mask = isnull(y) if mask.any(): y = np.ma.array(y) y = np.ma.masked_where(mask, y) @@ -1463,8 +1470,8 @@ def match_labels(data, e): err = np.atleast_2d(evalues) err = np.tile(err, (self.nseries, 1)) - elif com.is_list_like(err): - if com.is_iterator(err): + elif is_list_like(err): + if is_iterator(err): err = np.atleast_2d(list(err)) else: # raw error values @@ -1486,7 +1493,7 @@ def match_labels(data, e): if len(err) == 1: err = np.tile(err, (self.nseries, 1)) - elif com.is_number(err): + elif is_number(err): err = np.tile([err], (self.nseries, len(self.data))) else: @@ -1543,9 +1550,9 @@ def __init__(self, data, x, y, **kwargs): MPLPlot.__init__(self, data, **kwargs) if x is None or y is None: raise ValueError(self._kind + ' requires and x and y column') - if com.is_integer(x) and not self.data.columns.holds_integer(): + if is_integer(x) and not self.data.columns.holds_integer(): x = self.data.columns[x] - if com.is_integer(y) and not self.data.columns.holds_integer(): + if is_integer(y) and not self.data.columns.holds_integer(): y = self.data.columns[y] self.x = x self.y = y @@ -1569,7 +1576,7 @@ def __init__(self, data, x, y, s=None, c=None, **kwargs): # the handling of this argument later s = 20 super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs) - if com.is_integer(c) and not self.data.columns.holds_integer(): + if is_integer(c) and not self.data.columns.holds_integer(): c = self.data.columns[c] self.c = c @@ -1577,7 +1584,7 @@ def _make_plot(self): x, y, c, data = self.x, self.y, self.c, self.data ax = self.axes[0] - c_is_column = com.is_hashable(c) and c in self.data.columns + c_is_column = is_hashable(c) and c in self.data.columns # plot a colorbar only if a colormap is provided or necessary cb = self.kwds.pop('colorbar', self.colormap or c_is_column) @@ -1629,7 +1636,7 @@ class HexBinPlot(PlanePlot): def __init__(self, data, x, y, C=None, **kwargs): super(HexBinPlot, self).__init__(data, x, y, **kwargs) - if com.is_integer(C) and not self.data.columns.holds_integer(): + if is_integer(C) and not self.data.columns.holds_integer(): C = self.data.columns[C] self.C = C @@ -1912,9 +1919,9 @@ def __init__(self, data, **kwargs): self.ax_pos = self.tick_pos - self.tickoffset def _args_adjust(self): - if com.is_list_like(self.bottom): + if is_list_like(self.bottom): self.bottom = np.array(self.bottom) - if com.is_list_like(self.left): + if is_list_like(self.left): self.left = np.array(self.left) @classmethod @@ -2027,18 +2034,18 @@ def __init__(self, data, bins=10, bottom=0, **kwargs): MPLPlot.__init__(self, data, **kwargs) def _args_adjust(self): - if com.is_integer(self.bins): + if is_integer(self.bins): # create common bin edge values = (self.data._convert(datetime=True)._get_numeric_data()) values = np.ravel(values) - values = values[~com.isnull(values)] + values = values[~isnull(values)] hist, self.bins = np.histogram( values, bins=self.bins, range=self.kwds.get('range', None), weights=self.kwds.get('weights', None)) - if com.is_list_like(self.bottom): + if is_list_like(self.bottom): self.bottom = np.array(self.bottom) @classmethod @@ -2046,7 +2053,7 @@ def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, stacking_id=None, **kwds): if column_num == 0: cls._initialize_stacker(ax, stacking_id, len(bins) - 1) - y = y[~com.isnull(y)] + y = y[~isnull(y)] base = np.zeros(len(bins) - 1) bottom = bottom + \ @@ -2411,7 +2418,7 @@ def _plot(data, x=None, y=None, subplots=False, msg = "{0} requires either y column or 'subplots=True'" raise ValueError(msg.format(kind)) elif y is not None: - if com.is_integer(y) and not data.columns.holds_integer(): + if is_integer(y) and not data.columns.holds_integer(): y = data.columns[y] # converted to series actually. copy to not modify data = data[y].copy() @@ -2420,12 +2427,12 @@ def _plot(data, x=None, y=None, subplots=False, else: if isinstance(data, DataFrame): if x is not None: - if com.is_integer(x) and not data.columns.holds_integer(): + if is_integer(x) and not data.columns.holds_integer(): x = data.columns[x] data = data.set_index(x) if y is not None: - if com.is_integer(y) and not data.columns.holds_integer(): + if is_integer(y) and not data.columns.holds_integer(): y = data.columns[y] label = kwds['label'] if 'label' in kwds else y series = data[y].copy() # Don't modify @@ -2434,7 +2441,7 @@ def _plot(data, x=None, y=None, subplots=False, for kw in ['xerr', 'yerr']: if (kw in kwds) and \ (isinstance(kwds[kw], string_types) or - com.is_integer(kwds[kw])): + is_integer(kwds[kw])): try: kwds[kw] = data[kwds[kw]] except (IndexError, KeyError, TypeError): @@ -2897,7 +2904,7 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, layout=layout) _axes = _flatten(axes) - for i, col in enumerate(com._try_sort(data.columns)): + for i, col in enumerate(_try_sort(data.columns)): ax = _axes[i] ax.hist(data[col].dropna().values, bins=bins, **kwds) ax.set_title(col) @@ -3345,7 +3352,7 @@ def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, if ax is None: fig = plt.figure(**fig_kw) else: - if com.is_list_like(ax): + if is_list_like(ax): ax = _flatten(ax) if layout is not None: warnings.warn("When passing multiple axes, layout keyword is " @@ -3487,7 +3494,7 @@ def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): def _flatten(axes): - if not com.is_list_like(axes): + if not is_list_like(axes): return np.array([axes]) elif isinstance(axes, (np.ndarray, Index)): return axes.ravel() diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index b0bbf8ba70354..62bbfc2f630a5 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -2,12 +2,14 @@ Quantilization functions and related stuff """ +from pandas.types.missing import isnull +from pandas.types.common import (is_float, is_integer, + is_scalar) + from pandas.core.api import Series from pandas.core.categorical import Categorical import pandas.core.algorithms as algos -import pandas.core.common as com import pandas.core.nanops as nanops -import pandas.lib as lib from pandas.compat import zip import numpy as np @@ -80,7 +82,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, """ # NOTE: this binning code is changed a bit from histogram for var(x) == 0 if not np.iterable(bins): - if lib.isscalar(bins) and bins < 1: + if is_scalar(bins) and bins < 1: raise ValueError("`bins` should be a positive integer.") try: # for array-like sz = x.size @@ -164,7 +166,7 @@ def qcut(x, q, labels=None, retbins=False, precision=3): >>> pd.qcut(range(5), 4, labels=False) array([0, 0, 1, 2, 3], dtype=int64) """ - if com.is_integer(q): + if is_integer(q): quantiles = np.linspace(0, 1, q + 1) else: quantiles = q @@ -194,7 +196,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, if include_lowest: ids[x == bins[0]] = 1 - na_mask = com.isnull(x) | (ids == len(bins)) | (ids == 0) + na_mask = isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: @@ -264,7 +266,7 @@ def _format_label(x, precision=3): fmt_str = '%%.%dg' % precision if np.isinf(x): return str(x) - elif com.is_float(x): + elif is_float(x): frac, whole = np.modf(x) sgn = '-' if x < 0 else '' whole = abs(whole) diff --git a/pandas/tools/util.py b/pandas/tools/util.py index d70904e1bf286..b8b28663387cc 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -1,6 +1,12 @@ import numpy as np import pandas.lib as lib +from pandas.types.common import (is_number, + is_numeric_dtype, + is_datetime_or_timedelta_dtype, + _ensure_object) +from pandas.types.cast import _possibly_downcast_to_dtype + import pandas as pd from pandas.compat import reduce from pandas.core.index import Index @@ -141,7 +147,7 @@ def to_numeric(arg, errors='raise', downcast=None): elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') elif np.isscalar(arg): - if com.is_number(arg): + if is_number(arg): return arg is_scalar = True values = np.array([arg], dtype='O') @@ -151,14 +157,13 @@ def to_numeric(arg, errors='raise', downcast=None): values = arg try: - if com.is_numeric_dtype(values): + if is_numeric_dtype(values): pass - elif com.is_datetime_or_timedelta_dtype(values): + elif is_datetime_or_timedelta_dtype(values): values = values.astype(np.int64) else: - values = com._ensure_object(values) + values = _ensure_object(values) coerce_numeric = False if errors in ('ignore', 'raise') else True - values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) @@ -168,7 +173,7 @@ def to_numeric(arg, errors='raise', downcast=None): # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified - if downcast is not None and com.is_numeric_dtype(values): + if downcast is not None and is_numeric_dtype(values): typecodes = None if downcast in ('integer', 'signed'): @@ -189,7 +194,7 @@ def to_numeric(arg, errors='raise', downcast=None): # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize < values.dtype.itemsize: - values = com._possibly_downcast_to_dtype( + values = _possibly_downcast_to_dtype( values, dtype) # successful conversion diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 4bafac873ea09..fe0440170383b 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -9,10 +9,16 @@ from pandas.compat.numpy import function as nv import numpy as np - +from pandas.types.common import (is_integer, is_float, + is_bool_dtype, _ensure_int64, + is_scalar, + is_list_like) +from pandas.types.generic import (ABCIndex, ABCSeries, + ABCPeriodIndex, ABCIndexClass) +from pandas.types.missing import isnull from pandas.core import common as com, algorithms -from pandas.core.common import (is_integer, is_float, is_bool_dtype, - AbstractMethodError) +from pandas.core.common import AbstractMethodError + import pandas.formats.printing as printing import pandas.tslib as tslib import pandas._period as prlib @@ -111,9 +117,9 @@ def _join_i8_wrapper(joinf, dtype, with_indexers=True): @staticmethod def wrapper(left, right): - if isinstance(left, (np.ndarray, com.ABCIndex, com.ABCSeries)): + if isinstance(left, (np.ndarray, ABCIndex, ABCSeries)): left = left.view('i8') - if isinstance(right, (np.ndarray, com.ABCIndex, com.ABCSeries)): + if isinstance(right, (np.ndarray, ABCIndex, ABCSeries)): right = right.view('i8') results = joinf(left, right) if with_indexers: @@ -133,10 +139,10 @@ def _evaluate_compare(self, other, op): # coerce to a similar object if not isinstance(other, type(self)): - if not com.is_list_like(other): + if not is_list_like(other): # scalar other = [other] - elif lib.isscalar(lib.item_from_zerodim(other)): + elif is_scalar(lib.item_from_zerodim(other)): # ndarray scalar other = [other.item()] other = type(self)(other) @@ -174,7 +180,7 @@ def _ensure_localized(self, result): # reconvert to local tz if getattr(self, 'tz', None) is not None: - if not isinstance(result, com.ABCIndexClass): + if not isinstance(result, ABCIndexClass): result = self._simple_new(result) result = result.tz_localize(self.tz) return result @@ -202,7 +208,7 @@ def _format_with_header(self, header, **kwargs): def __contains__(self, key): try: res = self.get_loc(key) - return lib.isscalar(res) or type(res) == slice or np.any(res) + return is_scalar(res) or type(res) == slice or np.any(res) except (KeyError, TypeError, ValueError): return False @@ -213,7 +219,7 @@ def __getitem__(self, key): """ is_int = is_integer(key) - if lib.isscalar(key) and not is_int: + if is_scalar(key) and not is_int: raise ValueError getitem = self._data.__getitem__ @@ -282,7 +288,7 @@ def _nat_new(self, box=True): return result attribs = self._get_attributes_dict() - if not isinstance(self, com.ABCPeriodIndex): + if not isinstance(self, ABCPeriodIndex): attribs['freq'] = None return self._simple_new(result, **attribs) @@ -312,7 +318,7 @@ def sort_values(self, return_indexer=False, ascending=True): attribs = self._get_attributes_dict() freq = attribs['freq'] - if freq is not None and not isinstance(self, com.ABCPeriodIndex): + if freq is not None and not isinstance(self, ABCPeriodIndex): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: @@ -328,7 +334,7 @@ def sort_values(self, return_indexer=False, ascending=True): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = com._ensure_int64(indices) + indices = _ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): @@ -340,7 +346,7 @@ def take(self, indices, axis=0, allow_fill=True, na_value=tslib.iNaT) # keep freq in PeriodIndex, reset otherwise - freq = self.freq if isinstance(self, com.ABCPeriodIndex) else None + freq = self.freq if isinstance(self, ABCPeriodIndex) else None return self._shallow_copy(taken, freq=freq) def get_duplicates(self): @@ -545,7 +551,7 @@ def _convert_scalar_indexer(self, key, kind=None): # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem - if lib.isscalar(key): + if is_scalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ['loc'] and (is_int or is_flt): @@ -591,7 +597,7 @@ def __add__(self, other): elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): return self._add_delta(other) - elif com.is_integer(other): + elif is_integer(other): return self.shift(other) elif isinstance(other, (tslib.Timestamp, datetime)): return self._add_datelike(other) @@ -619,7 +625,7 @@ def __sub__(self, other): elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): return self._add_delta(-other) - elif com.is_integer(other): + elif is_integer(other): return self.shift(-other) elif isinstance(other, (tslib.Timestamp, datetime)): return self._sub_datelike(other) @@ -791,9 +797,9 @@ def summary(self, name=None): def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ - if lib.isscalar(other) and com.isnull(other): + if lib.isscalar(other) and isnull(other): other = tslib.iNaT - elif isinstance(other, com.ABCIndexClass): + elif isinstance(other, ABCIndexClass): # convert tz if needed if getattr(other, 'tz', None) is not None: diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 8937e83c7009a..46e8bd43e8ff8 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -3,19 +3,21 @@ """ import numpy as np + +from pandas.types.common import (_NS_DTYPE, _TD_DTYPE, + is_period_arraylike, + is_datetime_arraylike, is_integer_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, + is_timedelta64_dtype, is_categorical_dtype, + is_list_like) + from pandas.core.base import PandasDelegate, NoNewAttributesMixin -from pandas.core import common as com from pandas.tseries.index import DatetimeIndex from pandas._period import IncompatibleFrequency # flake8: noqa from pandas.tseries.period import PeriodIndex from pandas.tseries.tdi import TimedeltaIndex from pandas import tslib from pandas.core.algorithms import take_1d -from pandas.core.common import (_NS_DTYPE, _TD_DTYPE, is_period_arraylike, - is_datetime_arraylike, is_integer_dtype, - is_list_like, - is_datetime64_dtype, is_datetime64tz_dtype, - is_timedelta64_dtype, is_categorical_dtype) def is_datetimelike(data): @@ -129,7 +131,7 @@ def _delegate_method(self, name, *args, **kwargs): method = getattr(self.values, name) result = method(*args, **kwargs) - if not com.is_list_like(result): + if not is_list_like(result): return result result = Series(result, index=self.index, name=self.name) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 78b185ae8cf31..fc23f4f99449b 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -10,6 +10,14 @@ from matplotlib.ticker import Formatter, AutoLocator, Locator from matplotlib.transforms import nonsingular + +from pandas.types.common import (is_float, is_integer, + is_integer_dtype, + is_float_dtype, + is_datetime64_ns_dtype, + is_period_arraylike, + ) + from pandas.compat import lrange import pandas.compat as compat import pandas.lib as lib @@ -73,8 +81,8 @@ class TimeConverter(units.ConversionInterface): @staticmethod def convert(value, unit, axis): valid_types = (str, pydt.time) - if (isinstance(value, valid_types) or com.is_integer(value) or - com.is_float(value)): + if (isinstance(value, valid_types) or is_integer(value) or + is_float(value)): return time2num(value) if isinstance(value, Index): return value.map(time2num) @@ -129,14 +137,14 @@ def convert(values, units, axis): raise TypeError('Axis must have `freq` set to convert to Periods') valid_types = (compat.string_types, datetime, Period, pydt.date, pydt.time) - if (isinstance(values, valid_types) or com.is_integer(values) or - com.is_float(values)): + if (isinstance(values, valid_types) or is_integer(values) or + is_float(values)): return get_datevalue(values, axis.freq) if isinstance(values, PeriodIndex): return values.asfreq(axis.freq).values if isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) - if com.is_period_arraylike(values): + if is_period_arraylike(values): return PeriodIndex(values, freq=axis.freq).values if isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] @@ -149,7 +157,7 @@ def get_datevalue(date, freq): elif isinstance(date, (compat.string_types, datetime, pydt.date, pydt.time)): return Period(date, freq).ordinal - elif (com.is_integer(date) or com.is_float(date) or + elif (is_integer(date) or is_float(date) or (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): return date elif date is None: @@ -163,8 +171,8 @@ def _dt_to_float_ordinal(dt): preserving hours, minutes, seconds and microseconds. Return value is a :func:`float`. """ - if (isinstance(dt, (np.ndarray, Index, Series)) and - com.is_datetime64_ns_dtype(dt)): + if (isinstance(dt, (np.ndarray, Index, Series) + ) and is_datetime64_ns_dtype(dt)): base = dates.epoch2num(dt.asi8 / 1.0E9) else: base = dates.date2num(dt) @@ -188,7 +196,7 @@ def try_parse(values): return _dt_to_float_ordinal(lib.Timestamp(values)) elif isinstance(values, pydt.time): return dates.date2num(values) - elif (com.is_integer(values) or com.is_float(values)): + elif (is_integer(values) or is_float(values)): return values elif isinstance(values, compat.string_types): return try_parse(values) @@ -198,7 +206,7 @@ def try_parse(values): if not isinstance(values, np.ndarray): values = com._asarray_tuplesafe(values) - if com.is_integer_dtype(values) or com.is_float_dtype(values): + if is_integer_dtype(values) or is_float_dtype(values): return values try: diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 3f1d0c6d969a6..e2132deb97d64 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -6,12 +6,17 @@ import numpy as np +from pandas.types.generic import ABCSeries +from pandas.types.common import (is_integer, + is_period_arraylike, + is_timedelta64_dtype, + is_datetime64_dtype) + import pandas.core.algorithms as algos from pandas.core.algorithms import unique from pandas.tseries.offsets import DateOffset from pandas.util.decorators import cache_readonly import pandas.tseries.offsets as offsets -import pandas.core.common as com import pandas.lib as lib import pandas.tslib as tslib from pandas.tslib import Timedelta @@ -255,8 +260,8 @@ def get_freq_code(freqstr): freqstr = (freqstr.rule_code, freqstr.n) if isinstance(freqstr, tuple): - if (com.is_integer(freqstr[0]) and - com.is_integer(freqstr[1])): + if (is_integer(freqstr[0]) and + is_integer(freqstr[1])): # e.g., freqstr = (2000, 1) return freqstr else: @@ -265,13 +270,13 @@ def get_freq_code(freqstr): code = _period_str_to_code(freqstr[0]) stride = freqstr[1] except: - if com.is_integer(freqstr[1]): + if is_integer(freqstr[1]): raise code = _period_str_to_code(freqstr[1]) stride = freqstr[0] return code, stride - if com.is_integer(freqstr): + if is_integer(freqstr): return (freqstr, 1) base, stride = _base_and_stride(freqstr) @@ -843,16 +848,16 @@ def infer_freq(index, warn=True): """ import pandas as pd - if isinstance(index, com.ABCSeries): + if isinstance(index, ABCSeries): values = index._values - if not (com.is_datetime64_dtype(values) or - com.is_timedelta64_dtype(values) or + if not (is_datetime64_dtype(values) or + is_timedelta64_dtype(values) or values.dtype == object): raise TypeError("cannot infer freq from a non-convertible " "dtype on a Series of {0}".format(index.dtype)) index = values - if com.is_period_arraylike(index): + if is_period_arraylike(index): raise TypeError("PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq.") elif isinstance(index, pd.TimedeltaIndex): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 9b36bc5907066..47bb69b8d7ad6 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -6,13 +6,25 @@ from datetime import timedelta import numpy as np from pandas.core.base import _shared_docs -from pandas.core.common import (_INT64_DTYPE, _NS_DTYPE, _maybe_box, - _values_from_object, ABCSeries, - DatetimeTZDtype, PerformanceWarning, - is_datetimetz, is_datetime64_dtype, - is_datetime64_ns_dtype, is_dtype_equal, - is_float, is_integer, is_integer_dtype, - is_object_dtype, is_string_dtype) + +from pandas.types.common import (_NS_DTYPE, _INT64_DTYPE, + is_object_dtype, is_datetime64_dtype, + is_datetimetz, is_dtype_equal, + is_integer, is_float, + is_integer_dtype, + is_datetime64_ns_dtype, + is_bool_dtype, + is_string_dtype, + is_list_like, + is_scalar, + _ensure_int64) +from pandas.types.generic import ABCSeries +from pandas.types.dtypes import DatetimeTZDtype +from pandas.types.missing import isnull + +import pandas.types.concat as _concat +from pandas.core.common import (_values_from_object, _maybe_box, + PerformanceWarning) from pandas.core.index import Index, Int64Index, Float64Index from pandas.indexes.base import _index_shared_docs @@ -27,7 +39,6 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) import pandas.core.common as com -import pandas.types.concat as _concat import pandas.tseries.offsets as offsets import pandas.tseries.tools as tools @@ -87,7 +98,7 @@ def wrapper(self, other): isinstance(other, compat.string_types)): other = _to_m8(other, tz=self.tz) result = func(other) - if com.isnull(other): + if isnull(other): result.fill(nat_result) else: if isinstance(other, list): @@ -109,7 +120,7 @@ def wrapper(self, other): result[self._isnan] = nat_result # support of bool dtype indexers - if com.is_bool_dtype(result): + if is_bool_dtype(result): return result return Index(result) @@ -277,7 +288,7 @@ def __new__(cls, data=None, ambiguous=ambiguous) if not isinstance(data, (np.ndarray, Index, ABCSeries)): - if lib.isscalar(data): + if is_scalar(data): raise ValueError('DatetimeIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) @@ -537,7 +548,7 @@ def _generate(cls, start, end, periods, name, offset, index = _generate_regular_range(start, end, periods, offset) if tz is not None and getattr(index, 'tz', None) is None: - index = tslib.tz_localize_to_utc(com._ensure_int64(index), tz, + index = tslib.tz_localize_to_utc(_ensure_int64(index), tz, ambiguous=ambiguous) index = index.view(_NS_DTYPE) @@ -601,7 +612,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, return cls(values, name=name, freq=freq, tz=tz, dtype=dtype, **kwargs).values elif not is_datetime64_dtype(values): - values = com._ensure_int64(values).view(_NS_DTYPE) + values = _ensure_int64(values).view(_NS_DTYPE) result = object.__new__(cls) result._data = values @@ -1683,7 +1694,7 @@ def inferred_type(self): def dtype(self): if self.tz is None: return _NS_DTYPE - return com.DatetimeTZDtype('ns', self.tz) + return DatetimeTZDtype('ns', self.tz) @property def is_all_dates(self): @@ -1787,9 +1798,9 @@ def delete(self, loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: - if com.is_list_like(loc): + if is_list_like(loc): loc = lib.maybe_indices_to_slice( - com._ensure_int64(np.array(loc)), len(self)) + _ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index d0b1fd746d0d5..f12ba8083f545 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -3,9 +3,9 @@ from pandas import compat import numpy as np +from pandas.types.generic import ABCSeries, ABCDatetimeIndex, ABCPeriod from pandas.tseries.tools import to_datetime, normalize_date -from pandas.core.common import (ABCSeries, ABCDatetimeIndex, ABCPeriod, - AbstractMethodError) +from pandas.core.common import AbstractMethodError # import after tools, dateutil check from dateutil.relativedelta import relativedelta, weekday diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 750e7a5553ef6..45f634050a5d8 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -1,6 +1,24 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime, timedelta import numpy as np + + +from pandas.core import common as com +from pandas.types.common import (is_integer, + is_float, + is_object_dtype, + is_integer_dtype, + is_float_dtype, + is_scalar, + is_timedelta64_dtype, + is_bool_dtype, + _ensure_int64, + _ensure_object) + +from pandas.types.generic import ABCSeries +from pandas.types.missing import isnull + + import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.tseries.index import DatetimeIndex, Int64Index, Index @@ -17,15 +35,10 @@ from pandas.core.base import _shared_docs from pandas.indexes.base import _index_shared_docs -import pandas.core.common as com -from pandas.core.common import ( - _maybe_box, _values_from_object, ABCSeries, is_float, is_integer, - is_integer_dtype, is_object_dtype, isnull) from pandas import compat from pandas.compat.numpy import function as nv from pandas.util.decorators import Appender, cache_readonly, Substitution from pandas.lib import Timedelta -import pandas.lib as lib import pandas.tslib as tslib import pandas.core.missing as missing from pandas.compat import zip, u @@ -209,7 +222,7 @@ def _generate_range(cls, start, end, periods, freq, fields): def _from_arraylike(cls, data, freq, tz): if not isinstance(data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): - if lib.isscalar(data) or isinstance(data, Period): + if is_scalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) @@ -219,13 +232,13 @@ def _from_arraylike(cls, data, freq, tz): data = list(data) try: - data = com._ensure_int64(data) + data = _ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq).ordinal for x in data], dtype=np.int64) except (TypeError, ValueError): - data = com._ensure_object(data) + data = _ensure_object(data) if freq is None: freq = period.extract_freq(data) @@ -242,7 +255,7 @@ def _from_arraylike(cls, data, freq, tz): base1, base2, 1) else: - if freq is None and com.is_object_dtype(data): + if freq is None and is_object_dtype(data): # must contain Period instance and thus extract ordinals freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) @@ -256,9 +269,9 @@ def _from_arraylike(cls, data, freq, tz): data = dt64arr_to_periodarr(data, freq, tz) else: try: - data = com._ensure_int64(data) + data = _ensure_int64(data) except (TypeError, ValueError): - data = com._ensure_object(data) + data = _ensure_object(data) data = period.extract_ordinals(data, freq) return data, freq @@ -266,9 +279,9 @@ def _from_arraylike(cls, data, freq, tz): @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): - if not com.is_integer_dtype(values): + if not is_integer_dtype(values): values = np.array(values, copy=False) - if (len(values) > 0 and com.is_float_dtype(values)): + if (len(values) > 0 and is_float_dtype(values)): raise TypeError("PeriodIndex can't take floats") else: return PeriodIndex(values, name=name, freq=freq, **kwargs) @@ -339,7 +352,7 @@ def __array_wrap__(self, result, context=None): # from here because numpy catches. raise ValueError(msg.format(func.__name__)) - if com.is_bool_dtype(result): + if is_bool_dtype(result): return result return PeriodIndex(result, freq=self.freq, name=self.name) @@ -580,9 +593,9 @@ def _maybe_convert_timedelta(self, other): msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) elif isinstance(other, np.ndarray): - if com.is_integer_dtype(other): + if is_integer_dtype(other): return other - elif com.is_timedelta64_dtype(other): + elif is_timedelta64_dtype(other): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) @@ -657,10 +670,11 @@ def get_value(self, series, key): Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ - s = _values_from_object(series) + s = com._values_from_object(series) try: - return _maybe_box(self, super(PeriodIndex, self).get_value(s, key), - series, key) + return com._maybe_box(self, + super(PeriodIndex, self).get_value(s, key), + series, key) except (KeyError, IndexError): try: asdt, parsed, reso = parse_time_string(key, self.freq) @@ -683,16 +697,16 @@ def get_value(self, series, key): return series[key] elif grp == freqn: key = Period(asdt, freq=self.freq).ordinal - return _maybe_box(self, self._engine.get_value(s, key), - series, key) + return com._maybe_box(self, self._engine.get_value(s, key), + series, key) else: raise KeyError(key) except TypeError: pass key = Period(key, self.freq).ordinal - return _maybe_box(self, self._engine.get_value(s, key), - series, key) + return com._maybe_box(self, self._engine.get_value(s, key), + series, key) def get_indexer(self, target, method=None, limit=None, tolerance=None): if hasattr(target, 'freq') and target.freq != self.freq: @@ -849,7 +863,7 @@ def _apply_meta(self, rawarr): def __getitem__(self, key): getitem = self._data.__getitem__ - if lib.isscalar(key): + if is_scalar(key): val = getitem(key) return Period(ordinal=val, freq=self.freq) else: diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index dbc0078b67ae7..f9fb51ebf710c 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -2,11 +2,20 @@ from datetime import timedelta import numpy as np -from pandas.core.common import (ABCSeries, _TD_DTYPE, _maybe_box, - _values_from_object, isnull, - is_integer, is_float, is_integer_dtype, - is_object_dtype, is_timedelta64_dtype, - is_timedelta64_ns_dtype) +from pandas.types.common import (_TD_DTYPE, + is_integer, is_float, + is_bool_dtype, + is_list_like, + is_scalar, + is_integer_dtype, + is_object_dtype, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + _ensure_int64) +from pandas.types.missing import isnull +from pandas.types.generic import ABCSeries +from pandas.core.common import _maybe_box, _values_from_object + from pandas.core.index import Index, Int64Index import pandas.compat as compat from pandas.compat import u @@ -44,10 +53,10 @@ def wrapper(self, other): # failed to parse as timedelta raise TypeError(msg.format(type(other))) result = func(other) - if com.isnull(other): + if isnull(other): result.fill(nat_result) else: - if not com.is_list_like(other): + if not is_list_like(other): raise TypeError(msg.format(type(other))) other = TimedeltaIndex(other).values @@ -66,7 +75,7 @@ def wrapper(self, other): result[self._isnan] = nat_result # support of bool dtype indexers - if com.is_bool_dtype(result): + if is_bool_dtype(result): return result return Index(result) @@ -175,7 +184,7 @@ def __new__(cls, data=None, unit=None, data = to_timedelta(data, unit=unit, box=False) if not isinstance(data, (np.ndarray, Index, ABCSeries)): - if lib.isscalar(data): + if is_scalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) @@ -261,7 +270,7 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): if values.dtype == np.object_: values = tslib.array_to_timedelta64(values) if values.dtype != _TD_DTYPE: - values = com._ensure_int64(values).view(_TD_DTYPE) + values = _ensure_int64(values).view(_TD_DTYPE) result = object.__new__(cls) result._data = values @@ -905,9 +914,9 @@ def delete(self, loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: - if com.is_list_like(loc): + if is_list_like(loc): loc = lib.maybe_indices_to_slice( - com._ensure_int64(np.array(loc)), len(self)) + _ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq diff --git a/pandas/tseries/tests/test_bin_groupby.py b/pandas/tseries/tests/test_bin_groupby.py index 6b6c468b7c391..08c0833be0cd6 100644 --- a/pandas/tseries/tests/test_bin_groupby.py +++ b/pandas/tseries/tests/test_bin_groupby.py @@ -3,12 +3,12 @@ from numpy import nan import numpy as np +from pandas.types.common import _ensure_int64 from pandas import Index, isnull from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm import pandas.lib as lib import pandas.algos as algos -from pandas.core import common as com def test_series_grouper(): @@ -90,8 +90,8 @@ def _check(dtype): bins = np.array([6, 12, 20]) out = np.zeros((3, 4), dtype) counts = np.zeros(len(out), dtype=np.int64) - labels = com._ensure_int64(np.repeat(np.arange(3), - np.diff(np.r_[0, bins]))) + labels = _ensure_int64(np.repeat(np.arange(3), + np.diff(np.r_[0, bins]))) func = getattr(algos, 'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], labels) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 807fb86b1b4da..591fa19aad585 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -4326,10 +4326,10 @@ def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='period[D]') val = series[3] - self.assertTrue(com.isnull(val)) + self.assertTrue(isnull(val)) series[2] = val - self.assertTrue(com.isnull(series[2])) + self.assertTrue(isnull(series[2])) def test_NaT_cast(self): result = Series([np.nan]).astype('period[D]') diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 2236d20975eee..518f69485004c 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -11,10 +11,11 @@ import pandas.util.testing as tm from pandas import (Series, DataFrame, Panel, Index, isnull, notnull, Timestamp) + +from pandas.types.generic import ABCSeries, ABCDataFrame from pandas.compat import range, lrange, zip, product, OrderedDict from pandas.core.base import SpecificationError -from pandas.core.common import (ABCSeries, ABCDataFrame, - UnsupportedFunctionCall) +from pandas.core.common import UnsupportedFunctionCall from pandas.core.groupby import DataError from pandas.tseries.frequencies import MONTHS, DAYS from pandas.tseries.frequencies import to_offset diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index e594d31e57296..299ec374567e7 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -12,6 +12,7 @@ import pandas.lib as lib import pandas.tslib as tslib +from pandas.types.common import is_datetime64_ns_dtype import pandas as pd import pandas.compat as compat import pandas.core.common as com @@ -2282,7 +2283,7 @@ def test_to_datetime_tz_psycopg2(self): i = pd.DatetimeIndex([ '2000-01-01 08:00:00+00:00' ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) - self.assertFalse(com.is_datetime64_ns_dtype(i)) + self.assertFalse(is_datetime64_ns_dtype(i)) # tz coerceion result = pd.to_datetime(i, errors='coerce') diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 71a041d5139a2..470aafafec547 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -5,6 +5,7 @@ import numpy as np import pytz +from pandas.types.dtypes import DatetimeTZDtype from pandas import (Index, Series, DataFrame, isnull, Timestamp) from pandas import DatetimeIndex, to_datetime, NaT @@ -17,7 +18,6 @@ from pytz import NonExistentTimeError import pandas.util.testing as tm -from pandas.types.api import DatetimeTZDtype from pandas.util.testing import assert_frame_equal, set_timezone from pandas.compat import lrange, zip diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 5a28218500858..7f28ec86ec40d 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -4,9 +4,11 @@ import numpy as np import pandas.tslib as tslib -from pandas.core.common import (ABCSeries, is_integer_dtype, - is_timedelta64_dtype, is_list_like, - _ensure_object, ABCIndexClass) +from pandas.types.common import (_ensure_object, + is_integer_dtype, + is_timedelta64_dtype, + is_list_like) +from pandas.types.generic import ABCSeries, ABCIndexClass from pandas.util.decorators import deprecate_kwarg diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index efb8590dfccf4..067e8ec19f644 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -4,8 +4,17 @@ import pandas.lib as lib import pandas.tslib as tslib -import pandas.core.common as com -from pandas.core.common import ABCIndexClass, ABCSeries, ABCDataFrame + +from pandas.types.common import (_ensure_object, + is_datetime64_ns_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_integer_dtype, + is_list_like) +from pandas.types.generic import (ABCIndexClass, ABCSeries, + ABCDataFrame) +from pandas.types.missing import notnull + import pandas.compat as compat from pandas.util.decorators import deprecate_kwarg @@ -161,7 +170,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, def _guess_datetime_format_for_array(arr, **kwargs): # Try to guess the format based on the first non-NaN element - non_nan_elements = com.notnull(arr).nonzero()[0] + non_nan_elements = notnull(arr).nonzero()[0] if len(non_nan_elements): return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs) @@ -307,7 +316,7 @@ def _convert_listlike(arg, box, format, name=None): arg = np.array(arg, dtype='O') # these are shortcutable - if com.is_datetime64_ns_dtype(arg): + if is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None, @@ -317,7 +326,7 @@ def _convert_listlike(arg, box, format, name=None): return arg - elif com.is_datetime64tz_dtype(arg): + elif is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz='utc' if utc else None) if utc: @@ -342,7 +351,7 @@ def _convert_listlike(arg, box, format, name=None): raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') - arg = com._ensure_object(arg) + arg = _ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: @@ -399,7 +408,7 @@ def _convert_listlike(arg, box, format, name=None): require_iso8601=require_iso8601 ) - if com.is_datetime64_dtype(result) and box: + if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None, name=name) @@ -424,7 +433,7 @@ def _convert_listlike(arg, box, format, name=None): return _assemble_from_unit_mappings(arg, errors=errors) elif isinstance(arg, ABCIndexClass): return _convert_listlike(arg, box, format, name=arg.name) - elif com.is_list_like(arg): + elif is_list_like(arg): return _convert_listlike(arg, box, format) return _convert_listlike(np.array([arg]), box, format)[0] @@ -511,7 +520,7 @@ def coerce(values): values = to_numeric(values, errors=errors) # prevent overflow in case of int8 or int16 - if com.is_integer_dtype(values): + if is_integer_dtype(values): values = values.astype('int64', copy=False) return values @@ -574,7 +583,7 @@ def calc_with_mask(carg, mask): # a float with actual np.nan try: carg = arg.astype(np.float64) - return calc_with_mask(carg, com.notnull(carg)) + return calc_with_mask(carg, notnull(carg)) except: pass @@ -654,7 +663,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): def _guess_time_format_for_array(arr): # Try to guess the format based on the first non-NaN element - non_nan_elements = com.notnull(arr).nonzero()[0] + non_nan_elements = notnull(arr).nonzero()[0] if len(non_nan_elements): element = arr[non_nan_elements[0]] for time_format in _time_formats: @@ -705,7 +714,7 @@ def _convert_listlike(arg, format): raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') - arg = com._ensure_object(arg) + arg = _ensure_object(arg) if infer_time_format and format is None: format = _guess_time_format_for_array(arg) @@ -762,7 +771,7 @@ def _convert_listlike(arg, format): return Series(values, index=arg.index, name=arg.name) elif isinstance(arg, ABCIndexClass): return _convert_listlike(arg, format) - elif com.is_list_like(arg): + elif is_list_like(arg): return _convert_listlike(arg, format) return _convert_listlike(np.array([arg]), format)[0] diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index 7e314657cb25c..98a93d22b09a6 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -1,6 +1,6 @@ from pandas.compat import lrange import numpy as np -import pandas.core.common as com +from pandas.types.common import _ensure_platform_int from pandas.core.frame import DataFrame import pandas.core.nanops as nanops @@ -69,7 +69,7 @@ def pivot_annual(series, freq=None): raise NotImplementedError(freq) flat_index = (year - years.min()) * width + offset - flat_index = com._ensure_platform_int(flat_index) + flat_index = _ensure_platform_int(flat_index) values = np.empty((len(years), width)) values.fill(np.nan) diff --git a/pandas/types/api.py b/pandas/types/api.py index 721d8d29bba8b..2d68e041f632e 100644 --- a/pandas/types/api.py +++ b/pandas/types/api.py @@ -1,75 +1,54 @@ # flake8: noqa import numpy as np -from pandas.compat import string_types -from .dtypes import (CategoricalDtype, CategoricalDtypeType, - DatetimeTZDtype, DatetimeTZDtypeType) -from .generic import (ABCIndex, ABCInt64Index, ABCRangeIndex, - ABCFloat64Index, ABCMultiIndex, - ABCDatetimeIndex, - ABCTimedeltaIndex, ABCPeriodIndex, - ABCCategoricalIndex, - ABCIndexClass, - ABCSeries, ABCDataFrame, ABCPanel, - ABCSparseSeries, ABCSparseArray, - ABCCategorical, ABCPeriod, - ABCGeneric) - -def pandas_dtype(dtype): - """ - Converts input into a pandas only dtype object or a numpy dtype object. - - Parameters - ---------- - dtype : object to be converted - - Returns - ------- - np.dtype or a pandas dtype - """ - if isinstance(dtype, DatetimeTZDtype): - return dtype - elif isinstance(dtype, CategoricalDtype): - return dtype - elif isinstance(dtype, string_types): - try: - return DatetimeTZDtype.construct_from_string(dtype) - except TypeError: - pass - - try: - return CategoricalDtype.construct_from_string(dtype) - except TypeError: - pass - - return np.dtype(dtype) - -def na_value_for_dtype(dtype): - """ - Return a dtype compat na value - - Parameters - ---------- - dtype : string / dtype - - Returns - ------- - dtype compat na value - """ - - from pandas.core import common as com - from pandas import NaT - dtype = pandas_dtype(dtype) - - if (com.is_datetime64_dtype(dtype) or - com.is_datetime64tz_dtype(dtype) or - com.is_timedelta64_dtype(dtype)): - return NaT - elif com.is_float_dtype(dtype): - return np.nan - elif com.is_integer_dtype(dtype): - return 0 - elif com.is_bool_dtype(dtype): - return False - return np.nan +from .common import (pandas_dtype, + is_dtype_equal, + is_extension_type, + + # categorical + is_categorical, + is_categorical_dtype, + + # datetimelike + is_datetimetz, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_datetime64_any_dtype, + is_datetime64_ns_dtype, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + + # string-like + is_string_dtype, + is_object_dtype, + + # sparse + is_sparse, + + # numeric types + is_scalar, + is_sparse, + is_bool, + is_integer, + is_float, + is_complex, + is_number, + is_any_int_dtype, + is_integer_dtype, + is_int64_dtype, + is_numeric_dtype, + is_float_dtype, + is_floating_dtype, + is_bool_dtype, + is_complex_dtype, + + # like + is_re, + is_re_compilable, + is_dict_like, + is_iterator, + is_list_like, + is_hashable, + is_named_tuple, + is_sequence) diff --git a/pandas/types/cast.py b/pandas/types/cast.py new file mode 100644 index 0000000000000..e55cb91d36430 --- /dev/null +++ b/pandas/types/cast.py @@ -0,0 +1,860 @@ +""" routings for casting """ + +from datetime import datetime, timedelta +import numpy as np +from pandas import lib, tslib +from pandas.tslib import iNaT +from pandas.compat import string_types, text_type, PY3 +from .common import (_ensure_object, is_bool, is_integer, is_float, + is_complex, is_datetimetz, is_categorical_dtype, + is_extension_type, is_object_dtype, + is_datetime64tz_dtype, is_datetime64_dtype, + is_timedelta64_dtype, is_dtype_equal, + is_float_dtype, is_complex_dtype, + is_integer_dtype, is_datetime_or_timedelta_dtype, + is_scalar, + _string_dtypes, + _coerce_to_dtype, + _ensure_int8, _ensure_int16, + _ensure_int32, _ensure_int64, + _NS_DTYPE, _TD_DTYPE, _INT64_DTYPE, + _DATELIKE_DTYPES, _POSSIBLY_CAST_DTYPES) +from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries +from .missing import isnull, notnull +from .inference import is_list_like + +_int8_max = np.iinfo(np.int8).max +_int16_max = np.iinfo(np.int16).max +_int32_max = np.iinfo(np.int32).max +_int64_max = np.iinfo(np.int64).max + + +def _possibly_convert_platform(values): + """ try to do platform conversion, allow ndarray or list here """ + + if isinstance(values, (list, tuple)): + values = lib.list_to_object_array(values) + if getattr(values, 'dtype', None) == np.object_: + if hasattr(values, '_values'): + values = values._values + values = lib.maybe_convert_objects(values) + + return values + + +def _possibly_downcast_to_dtype(result, dtype): + """ try to cast to the specified dtype (e.g. convert back to bool/int + or could be an astype of float64->float32 + """ + + if is_scalar(result): + return result + + def trans(x): + return x + + if isinstance(dtype, string_types): + if dtype == 'infer': + inferred_type = lib.infer_dtype(_ensure_object(result.ravel())) + if inferred_type == 'boolean': + dtype = 'bool' + elif inferred_type == 'integer': + dtype = 'int64' + elif inferred_type == 'datetime64': + dtype = 'datetime64[ns]' + elif inferred_type == 'timedelta64': + dtype = 'timedelta64[ns]' + + # try to upcast here + elif inferred_type == 'floating': + dtype = 'int64' + if issubclass(result.dtype.type, np.number): + + def trans(x): # noqa + return x.round() + else: + dtype = 'object' + + if isinstance(dtype, string_types): + dtype = np.dtype(dtype) + + try: + + # don't allow upcasts here (except if empty) + if dtype.kind == result.dtype.kind: + if (result.dtype.itemsize <= dtype.itemsize and + np.prod(result.shape)): + return result + + if issubclass(dtype.type, np.floating): + return result.astype(dtype) + elif dtype == np.bool_ or issubclass(dtype.type, np.integer): + + # if we don't have any elements, just astype it + if not np.prod(result.shape): + return trans(result).astype(dtype) + + # do a test on the first element, if it fails then we are done + r = result.ravel() + arr = np.array([r[0]]) + + # if we have any nulls, then we are done + if isnull(arr).any() or not np.allclose(arr, + trans(arr).astype(dtype)): + return result + + # a comparable, e.g. a Decimal may slip in here + elif not isinstance(r[0], (np.integer, np.floating, np.bool, int, + float, bool)): + return result + + if (issubclass(result.dtype.type, (np.object_, np.number)) and + notnull(result).all()): + new_result = trans(result).astype(dtype) + try: + if np.allclose(new_result, result): + return new_result + except: + + # comparison of an object dtype with a number type could + # hit here + if (new_result == result).all(): + return new_result + + # a datetimelike + elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i']: + try: + result = result.astype(dtype) + except: + if dtype.tz: + # convert to datetime and change timezone + from pandas import to_datetime + result = to_datetime(result).tz_localize(dtype.tz) + + except: + pass + + return result + + +def _maybe_upcast_putmask(result, mask, other): + """ + A safe version of putmask that potentially upcasts the result + + Parameters + ---------- + result : ndarray + The destination array. This will be mutated in-place if no upcasting is + necessary. + mask : boolean ndarray + other : ndarray or scalar + The source array or value + + Returns + ------- + result : ndarray + changed : boolean + Set to true if the result array was upcasted + """ + + if mask.any(): + # Two conversions for date-like dtypes that can't be done automatically + # in np.place: + # NaN -> NaT + # integer or integer array -> date-like array + if result.dtype in _DATELIKE_DTYPES: + if is_scalar(other): + if isnull(other): + other = result.dtype.type('nat') + elif is_integer(other): + other = np.array(other, dtype=result.dtype) + elif is_integer_dtype(other): + other = np.array(other, dtype=result.dtype) + + def changeit(): + + # try to directly set by expanding our array to full + # length of the boolean + try: + om = other[mask] + om_at = om.astype(result.dtype) + if (om == om_at).all(): + new_result = result.values.copy() + new_result[mask] = om_at + result[:] = new_result + return result, False + except: + pass + + # we are forced to change the dtype of the result as the input + # isn't compatible + r, _ = _maybe_upcast(result, fill_value=other, copy=True) + np.place(r, mask, other) + + return r, True + + # we want to decide whether place will work + # if we have nans in the False portion of our mask then we need to + # upcast (possibly), otherwise we DON't want to upcast (e.g. if we + # have values, say integers, in the success portion then it's ok to not + # upcast) + new_dtype, _ = _maybe_promote(result.dtype, other) + if new_dtype != result.dtype: + + # we have a scalar or len 0 ndarray + # and its nan and we are changing some values + if (is_scalar(other) or + (isinstance(other, np.ndarray) and other.ndim < 1)): + if isnull(other): + return changeit() + + # we have an ndarray and the masking has nans in it + else: + + if isnull(other[mask]).any(): + return changeit() + + try: + np.place(result, mask, other) + except: + return changeit() + + return result, False + + +def _maybe_promote(dtype, fill_value=np.nan): + + # if we passed an array here, determine the fill value by dtype + if isinstance(fill_value, np.ndarray): + if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)): + fill_value = iNaT + else: + + # we need to change to object type as our + # fill_value is of object type + if fill_value.dtype == np.object_: + dtype = np.dtype(np.object_) + fill_value = np.nan + + # returns tuple of (dtype, fill_value) + if issubclass(dtype.type, (np.datetime64, np.timedelta64)): + # for now: refuse to upcast datetime64 + # (this is because datetime64 will not implicitly upconvert + # to object correctly as of numpy 1.6.1) + if isnull(fill_value): + fill_value = iNaT + else: + if issubclass(dtype.type, np.datetime64): + try: + fill_value = lib.Timestamp(fill_value).value + except: + # the proper thing to do here would probably be to upcast + # to object (but numpy 1.6.1 doesn't do this properly) + fill_value = iNaT + elif issubclass(dtype.type, np.timedelta64): + try: + fill_value = lib.Timedelta(fill_value).value + except: + # as for datetimes, cannot upcast to object + fill_value = iNaT + else: + fill_value = iNaT + elif is_datetimetz(dtype): + if isnull(fill_value): + fill_value = iNaT + elif is_float(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.object_ + elif issubclass(dtype.type, np.integer): + dtype = np.float64 + elif is_bool(fill_value): + if not issubclass(dtype.type, np.bool_): + dtype = np.object_ + elif is_integer(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.object_ + elif issubclass(dtype.type, np.integer): + # upcast to prevent overflow + arr = np.asarray(fill_value) + if arr != arr.astype(dtype): + dtype = arr.dtype + elif is_complex(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.object_ + elif issubclass(dtype.type, (np.integer, np.floating)): + dtype = np.complex128 + elif fill_value is None: + if is_float_dtype(dtype) or is_complex_dtype(dtype): + fill_value = np.nan + elif is_integer_dtype(dtype): + dtype = np.float64 + fill_value = np.nan + elif is_datetime_or_timedelta_dtype(dtype): + fill_value = iNaT + else: + dtype = np.object_ + else: + dtype = np.object_ + + # in case we have a string that looked like a number + if is_categorical_dtype(dtype): + pass + elif is_datetimetz(dtype): + pass + elif issubclass(np.dtype(dtype).type, string_types): + dtype = np.object_ + + return dtype, fill_value + + +def _infer_dtype_from_scalar(val): + """ interpret the dtype from a scalar """ + + dtype = np.object_ + + # a 1-element ndarray + if isinstance(val, np.ndarray): + if val.ndim != 0: + raise ValueError( + "invalid ndarray passed to _infer_dtype_from_scalar") + + dtype = val.dtype + val = val.item() + + elif isinstance(val, string_types): + + # If we create an empty array using a string to infer + # the dtype, NumPy will only allocate one character per entry + # so this is kind of bad. Alternately we could use np.repeat + # instead of np.empty (but then you still don't want things + # coming out as np.str_! + + dtype = np.object_ + + elif isinstance(val, (np.datetime64, + datetime)) and getattr(val, 'tzinfo', None) is None: + val = lib.Timestamp(val).value + dtype = np.dtype('M8[ns]') + + elif isinstance(val, (np.timedelta64, timedelta)): + val = lib.Timedelta(val).value + dtype = np.dtype('m8[ns]') + + elif is_bool(val): + dtype = np.bool_ + + elif is_integer(val): + if isinstance(val, np.integer): + dtype = type(val) + else: + dtype = np.int64 + + elif is_float(val): + if isinstance(val, np.floating): + dtype = type(val) + else: + dtype = np.float64 + + elif is_complex(val): + dtype = np.complex_ + + return dtype, val + + +def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): + """ provide explict type promotion and coercion + + Parameters + ---------- + values : the ndarray that we want to maybe upcast + fill_value : what we want to fill with + dtype : if None, then use the dtype of the values, else coerce to this type + copy : if True always make a copy even if no upcast is required + """ + + if is_extension_type(values): + if copy: + values = values.copy() + else: + if dtype is None: + dtype = values.dtype + new_dtype, fill_value = _maybe_promote(dtype, fill_value) + if new_dtype != values.dtype: + values = values.astype(new_dtype) + elif copy: + values = values.copy() + + return values, fill_value + + +def _possibly_cast_item(obj, item, dtype): + chunk = obj[item] + + if chunk.values.dtype != dtype: + if dtype in (np.object_, np.bool_): + obj[item] = chunk.astype(np.object_) + elif not issubclass(dtype, (np.integer, np.bool_)): # pragma: no cover + raise ValueError("Unexpected dtype encountered: %s" % dtype) + + +def _invalidate_string_dtypes(dtype_set): + """Change string like dtypes to object for + ``DataFrame.select_dtypes()``. + """ + non_string_dtypes = dtype_set - _string_dtypes + if non_string_dtypes != dtype_set: + raise TypeError("string dtypes are not allowed, use 'object' instead") + + +def _maybe_convert_string_to_object(values): + """ + + Convert string-like and string-like array to convert object dtype. + This is to avoid numpy to handle the array as str dtype. + """ + if isinstance(values, string_types): + values = np.array([values], dtype=object) + elif (isinstance(values, np.ndarray) and + issubclass(values.dtype.type, (np.string_, np.unicode_))): + values = values.astype(object) + return values + + +def _maybe_convert_scalar(values): + """ + Convert a python scalar to the appropriate numpy dtype if possible + This avoids numpy directly converting according to platform preferences + """ + if is_scalar(values): + dtype, values = _infer_dtype_from_scalar(values) + try: + values = dtype(values) + except TypeError: + pass + return values + + +def _coerce_indexer_dtype(indexer, categories): + """ coerce the indexer input array to the smallest dtype possible """ + l = len(categories) + if l < _int8_max: + return _ensure_int8(indexer) + elif l < _int16_max: + return _ensure_int16(indexer) + elif l < _int32_max: + return _ensure_int32(indexer) + return _ensure_int64(indexer) + + +def _coerce_to_dtypes(result, dtypes): + """ + given a dtypes and a result set, coerce the result elements to the + dtypes + """ + if len(result) != len(dtypes): + raise AssertionError("_coerce_to_dtypes requires equal len arrays") + + from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type + + def conv(r, dtype): + try: + if isnull(r): + pass + elif dtype == _NS_DTYPE: + r = lib.Timestamp(r) + elif dtype == _TD_DTYPE: + r = _coerce_scalar_to_timedelta_type(r) + elif dtype == np.bool_: + # messy. non 0/1 integers do not get converted. + if is_integer(r) and r not in [0, 1]: + return int(r) + r = bool(r) + elif dtype.kind == 'f': + r = float(r) + elif dtype.kind == 'i': + r = int(r) + except: + pass + + return r + + return [conv(r, dtype) for r, dtype in zip(result, dtypes)] + + +def _astype_nansafe(arr, dtype, copy=True): + """ return a view if copy is False, but + need to be very careful as the result shape could change! """ + if not isinstance(dtype, np.dtype): + dtype = _coerce_to_dtype(dtype) + + if issubclass(dtype.type, text_type): + # in Py3 that's str, in Py2 that's unicode + return lib.astype_unicode(arr.ravel()).reshape(arr.shape) + elif issubclass(dtype.type, string_types): + return lib.astype_str(arr.ravel()).reshape(arr.shape) + elif is_datetime64_dtype(arr): + if dtype == object: + return tslib.ints_to_pydatetime(arr.view(np.int64)) + elif dtype == np.int64: + return arr.view(dtype) + elif dtype != _NS_DTYPE: + raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % + (arr.dtype, dtype)) + return arr.astype(_NS_DTYPE) + elif is_timedelta64_dtype(arr): + if dtype == np.int64: + return arr.view(dtype) + elif dtype == object: + return tslib.ints_to_pytimedelta(arr.view(np.int64)) + + # in py3, timedelta64[ns] are int64 + elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or + (not PY3 and dtype != _TD_DTYPE)): + + # allow frequency conversions + if dtype.kind == 'm': + mask = isnull(arr) + result = arr.astype(dtype).astype(np.float64) + result[mask] = np.nan + return result + + raise TypeError("cannot astype a timedelta from [%s] to [%s]" % + (arr.dtype, dtype)) + + return arr.astype(_TD_DTYPE) + elif (np.issubdtype(arr.dtype, np.floating) and + np.issubdtype(dtype, np.integer)): + + if np.isnan(arr).any(): + raise ValueError('Cannot convert NA to integer') + elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): + # work around NumPy brokenness, #1987 + return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + + if copy: + return arr.astype(dtype) + return arr.view(dtype) + + +def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, + convert_timedeltas=True, copy=True): + """ if we have an object dtype, try to coerce dates and/or numbers """ + + # if we have passed in a list or scalar + if isinstance(values, (list, tuple)): + values = np.array(values, dtype=np.object_) + if not hasattr(values, 'dtype'): + values = np.array([values], dtype=np.object_) + + # convert dates + if convert_dates and values.dtype == np.object_: + + # we take an aggressive stance and convert to datetime64[ns] + if convert_dates == 'coerce': + new_values = _possibly_cast_to_datetime(values, 'M8[ns]', + errors='coerce') + + # if we are all nans then leave me alone + if not isnull(new_values).all(): + values = new_values + + else: + values = lib.maybe_convert_objects(values, + convert_datetime=convert_dates) + + # convert timedeltas + if convert_timedeltas and values.dtype == np.object_: + + if convert_timedeltas == 'coerce': + from pandas.tseries.timedeltas import to_timedelta + new_values = to_timedelta(values, coerce=True) + + # if we are all nans then leave me alone + if not isnull(new_values).all(): + values = new_values + + else: + values = lib.maybe_convert_objects( + values, convert_timedelta=convert_timedeltas) + + # convert to numeric + if values.dtype == np.object_: + if convert_numeric: + try: + new_values = lib.maybe_convert_numeric(values, set(), + coerce_numeric=True) + + # if we are all nans then leave me alone + if not isnull(new_values).all(): + values = new_values + + except: + pass + else: + # soft-conversion + values = lib.maybe_convert_objects(values) + + values = values.copy() if copy else values + + return values + + +def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, + coerce=False, copy=True): + """ if we have an object dtype, try to coerce dates and/or numbers """ + + conversion_count = sum((datetime, numeric, timedelta)) + if conversion_count == 0: + raise ValueError('At least one of datetime, numeric or timedelta must ' + 'be True.') + elif conversion_count > 1 and coerce: + raise ValueError("Only one of 'datetime', 'numeric' or " + "'timedelta' can be True when when coerce=True.") + + if isinstance(values, (list, tuple)): + # List or scalar + values = np.array(values, dtype=np.object_) + elif not hasattr(values, 'dtype'): + values = np.array([values], dtype=np.object_) + elif not is_object_dtype(values.dtype): + # If not object, do not attempt conversion + values = values.copy() if copy else values + return values + + # If 1 flag is coerce, ensure 2 others are False + if coerce: + # Immediate return if coerce + if datetime: + from pandas import to_datetime + return to_datetime(values, errors='coerce', box=False) + elif timedelta: + from pandas import to_timedelta + return to_timedelta(values, errors='coerce', box=False) + elif numeric: + from pandas import to_numeric + return to_numeric(values, errors='coerce') + + # Soft conversions + if datetime: + values = lib.maybe_convert_objects(values, convert_datetime=datetime) + + if timedelta and is_object_dtype(values.dtype): + # Object check to ensure only run if previous did not convert + values = lib.maybe_convert_objects(values, convert_timedelta=timedelta) + + if numeric and is_object_dtype(values.dtype): + try: + converted = lib.maybe_convert_numeric(values, set(), + coerce_numeric=True) + # If all NaNs, then do not-alter + values = converted if not isnull(converted).all() else values + values = values.copy() if copy else values + except: + pass + + return values + + +def _possibly_castable(arr): + # return False to force a non-fastpath + + # check datetime64[ns]/timedelta64[ns] are valid + # otherwise try to coerce + kind = arr.dtype.kind + if kind == 'M' or kind == 'm': + return arr.dtype in _DATELIKE_DTYPES + + return arr.dtype.name not in _POSSIBLY_CAST_DTYPES + + +def _possibly_infer_to_datetimelike(value, convert_dates=False): + """ + we might have a array (or single object) that is datetime like, + and no dtype is passed don't change the value unless we find a + datetime/timedelta set + + this is pretty strict in that a datetime/timedelta is REQUIRED + in addition to possible nulls/string likes + + ONLY strings are NOT datetimelike + + Parameters + ---------- + value : np.array / Series / Index / list-like + convert_dates : boolean, default False + if True try really hard to convert dates (such as datetime.date), other + leave inferred dtype 'date' alone + + """ + + if isinstance(value, (ABCDatetimeIndex, ABCPeriodIndex)): + return value + elif isinstance(value, ABCSeries): + if isinstance(value._values, ABCDatetimeIndex): + return value._values + + v = value + + if not is_list_like(v): + v = [v] + v = np.array(v, copy=False) + shape = v.shape + if not v.ndim == 1: + v = v.ravel() + + if len(v): + + def _try_datetime(v): + # safe coerce to datetime64 + try: + v = tslib.array_to_datetime(v, errors='raise') + except ValueError: + + # we might have a sequence of the same-datetimes with tz's + # if so coerce to a DatetimeIndex; if they are not the same, + # then these stay as object dtype + try: + from pandas import to_datetime + return to_datetime(v) + except: + pass + + except: + pass + + return v.reshape(shape) + + def _try_timedelta(v): + # safe coerce to timedelta64 + + # will try first with a string & object conversion + from pandas import to_timedelta + try: + return to_timedelta(v)._values.reshape(shape) + except: + return v + + # do a quick inference for perf + sample = v[:min(3, len(v))] + inferred_type = lib.infer_dtype(sample) + + if (inferred_type in ['datetime', 'datetime64'] or + (convert_dates and inferred_type in ['date'])): + value = _try_datetime(v) + elif inferred_type in ['timedelta', 'timedelta64']: + value = _try_timedelta(v) + + # It's possible to have nulls intermixed within the datetime or + # timedelta. These will in general have an inferred_type of 'mixed', + # so have to try both datetime and timedelta. + + # try timedelta first to avoid spurious datetime conversions + # e.g. '00:00:01' is a timedelta but technically is also a datetime + elif inferred_type in ['mixed']: + + if lib.is_possible_datetimelike_array(_ensure_object(v)): + value = _try_timedelta(v) + if lib.infer_dtype(value) in ['mixed']: + value = _try_datetime(v) + + return value + + +def _possibly_cast_to_datetime(value, dtype, errors='raise'): + """ try to cast the array/value to a datetimelike dtype, converting float + nan to iNaT + """ + from pandas.tseries.timedeltas import to_timedelta + from pandas.tseries.tools import to_datetime + + if dtype is not None: + if isinstance(dtype, string_types): + dtype = np.dtype(dtype) + + is_datetime64 = is_datetime64_dtype(dtype) + is_datetime64tz = is_datetime64tz_dtype(dtype) + is_timedelta64 = is_timedelta64_dtype(dtype) + + if is_datetime64 or is_datetime64tz or is_timedelta64: + + # force the dtype if needed + if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): + if dtype.name == 'datetime64[ns]': + dtype = _NS_DTYPE + else: + raise TypeError("cannot convert datetimelike to " + "dtype [%s]" % dtype) + elif is_datetime64tz: + + # our NaT doesn't support tz's + # this will coerce to DatetimeIndex with + # a matching dtype below + if is_scalar(value) and isnull(value): + value = [value] + + elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): + if dtype.name == 'timedelta64[ns]': + dtype = _TD_DTYPE + else: + raise TypeError("cannot convert timedeltalike to " + "dtype [%s]" % dtype) + + if is_scalar(value): + if value == tslib.iNaT or isnull(value): + value = tslib.iNaT + else: + value = np.array(value, copy=False) + + # have a scalar array-like (e.g. NaT) + if value.ndim == 0: + value = tslib.iNaT + + # we have an array of datetime or timedeltas & nulls + elif np.prod(value.shape) or not is_dtype_equal(value.dtype, + dtype): + try: + if is_datetime64: + value = to_datetime(value, errors=errors)._values + elif is_datetime64tz: + # input has to be UTC at this point, so just + # localize + value = to_datetime( + value, + errors=errors).tz_localize(dtype.tz) + elif is_timedelta64: + value = to_timedelta(value, errors=errors)._values + except (AttributeError, ValueError, TypeError): + pass + + # coerce datetimelike to object + elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype): + if is_object_dtype(dtype): + ints = np.asarray(value).view('i8') + return tslib.ints_to_pydatetime(ints) + + # we have a non-castable dtype that was passed + raise TypeError('Cannot cast datetime64 to %s' % dtype) + + else: + + is_array = isinstance(value, np.ndarray) + + # catch a datetime/timedelta that is not of ns variety + # and no coercion specified + if is_array and value.dtype.kind in ['M', 'm']: + dtype = value.dtype + + if dtype.kind == 'M' and dtype != _NS_DTYPE: + value = value.astype(_NS_DTYPE) + + elif dtype.kind == 'm' and dtype != _TD_DTYPE: + value = to_timedelta(value) + + # only do this if we have an array and the dtype of the array is not + # setup already we are not an integer/object, so don't bother with this + # conversion + elif not (is_array and not (issubclass(value.dtype.type, np.integer) or + value.dtype == np.object_)): + value = _possibly_infer_to_datetimelike(value) + + return value diff --git a/pandas/types/common.py b/pandas/types/common.py new file mode 100644 index 0000000000000..9d0ccaac843ef --- /dev/null +++ b/pandas/types/common.py @@ -0,0 +1,448 @@ +""" common type operations """ + +import numpy as np +from pandas.compat import string_types, text_type, binary_type +from pandas import lib, algos +from .dtypes import (CategoricalDtype, CategoricalDtypeType, + DatetimeTZDtype, DatetimeTZDtypeType, + ExtensionDtype) +from .generic import (ABCCategorical, ABCPeriodIndex, + ABCDatetimeIndex, ABCSeries, + ABCSparseArray, ABCSparseSeries) +from .inference import is_integer, is_string_like +from .inference import * # noqa + + +_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name + for t in ['O', 'int8', 'uint8', 'int16', 'uint16', + 'int32', 'uint32', 'int64', 'uint64']]) + +_NS_DTYPE = np.dtype('M8[ns]') +_TD_DTYPE = np.dtype('m8[ns]') +_INT64_DTYPE = np.dtype(np.int64) +_DATELIKE_DTYPES = set([np.dtype(t) + for t in ['M8[ns]', 'M8[ns]', + 'm8[ns]', 'm8[ns]']]) + +_ensure_float64 = algos.ensure_float64 +_ensure_float32 = algos.ensure_float32 + + +def _ensure_float(arr): + if issubclass(arr.dtype.type, (np.integer, np.bool_)): + arr = arr.astype(float) + return arr + +_ensure_int64 = algos.ensure_int64 +_ensure_int32 = algos.ensure_int32 +_ensure_int16 = algos.ensure_int16 +_ensure_int8 = algos.ensure_int8 +_ensure_platform_int = algos.ensure_platform_int +_ensure_object = algos.ensure_object + + +def is_object_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.object_) + + +def is_sparse(array): + """ return if we are a sparse array """ + return isinstance(array, (ABCSparseArray, ABCSparseSeries)) + + +def is_categorical(array): + """ return if we are a categorical possibility """ + return isinstance(array, ABCCategorical) or is_categorical_dtype(array) + + +def is_datetimetz(array): + """ return if we are a datetime with tz array """ + return ((isinstance(array, ABCDatetimeIndex) and + getattr(array, 'tz', None) is not None) or + is_datetime64tz_dtype(array)) + + +def is_datetime64_dtype(arr_or_dtype): + try: + tipo = _get_dtype_type(arr_or_dtype) + except TypeError: + return False + return issubclass(tipo, np.datetime64) + + +def is_datetime64tz_dtype(arr_or_dtype): + return DatetimeTZDtype.is_dtype(arr_or_dtype) + + +def is_timedelta64_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.timedelta64) + + +def is_categorical_dtype(arr_or_dtype): + return CategoricalDtype.is_dtype(arr_or_dtype) + + +def is_string_dtype(arr_or_dtype): + dtype = _get_dtype(arr_or_dtype) + return dtype.kind in ('O', 'S', 'U') + + +def is_period_arraylike(arr): + """ return if we are period arraylike / PeriodIndex """ + if isinstance(arr, ABCPeriodIndex): + return True + elif isinstance(arr, (np.ndarray, ABCSeries)): + return arr.dtype == object and lib.infer_dtype(arr) == 'period' + return getattr(arr, 'inferred_type', None) == 'period' + + +def is_datetime_arraylike(arr): + """ return if we are datetime arraylike / DatetimeIndex """ + if isinstance(arr, ABCDatetimeIndex): + return True + elif isinstance(arr, (np.ndarray, ABCSeries)): + return arr.dtype == object and lib.infer_dtype(arr) == 'datetime' + return getattr(arr, 'inferred_type', None) == 'datetime' + + +def is_datetimelike(arr): + return (arr.dtype in _DATELIKE_DTYPES or + isinstance(arr, ABCPeriodIndex) or + is_datetimetz(arr)) + + +def is_dtype_equal(source, target): + """ return a boolean if the dtypes are equal """ + try: + source = _get_dtype(source) + target = _get_dtype(target) + return source == target + except (TypeError, AttributeError): + + # invalid comparison + # object == category will hit this + return False + + +def is_any_int_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.integer) + + +def is_integer_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return (issubclass(tipo, np.integer) and + not issubclass(tipo, (np.datetime64, np.timedelta64))) + + +def is_int64_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.int64) + + +def is_int_or_datetime_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return (issubclass(tipo, np.integer) or + issubclass(tipo, (np.datetime64, np.timedelta64))) + + +def is_datetime64_any_dtype(arr_or_dtype): + return (is_datetime64_dtype(arr_or_dtype) or + is_datetime64tz_dtype(arr_or_dtype)) + + +def is_datetime64_ns_dtype(arr_or_dtype): + try: + tipo = _get_dtype(arr_or_dtype) + except TypeError: + return False + return tipo == _NS_DTYPE + + +def is_timedelta64_ns_dtype(arr_or_dtype): + tipo = _get_dtype(arr_or_dtype) + return tipo == _TD_DTYPE + + +def is_datetime_or_timedelta_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, (np.datetime64, np.timedelta64)) + + +def is_numeric_v_string_like(a, b): + """ + numpy doesn't like to compare numeric arrays vs scalar string-likes + + return a boolean result if this is the case for a,b or b,a + + """ + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + is_a_numeric_array = is_a_array and is_numeric_dtype(a) + is_b_numeric_array = is_b_array and is_numeric_dtype(b) + is_a_string_array = is_a_array and is_string_like_dtype(a) + is_b_string_array = is_b_array and is_string_like_dtype(b) + + is_a_scalar_string_like = not is_a_array and is_string_like(a) + is_b_scalar_string_like = not is_b_array and is_string_like(b) + + return ((is_a_numeric_array and is_b_scalar_string_like) or + (is_b_numeric_array and is_a_scalar_string_like) or + (is_a_numeric_array and is_b_string_array) or + (is_b_numeric_array and is_a_string_array)) + + +def is_datetimelike_v_numeric(a, b): + # return if we have an i8 convertible and numeric comparison + if not hasattr(a, 'dtype'): + a = np.asarray(a) + if not hasattr(b, 'dtype'): + b = np.asarray(b) + + def is_numeric(x): + return is_integer_dtype(x) or is_float_dtype(x) + + is_datetimelike = needs_i8_conversion + return ((is_datetimelike(a) and is_numeric(b)) or + (is_datetimelike(b) and is_numeric(a))) + + +def is_datetimelike_v_object(a, b): + # return if we have an i8 convertible and object comparsion + if not hasattr(a, 'dtype'): + a = np.asarray(a) + if not hasattr(b, 'dtype'): + b = np.asarray(b) + + def f(x): + return is_object_dtype(x) + + def is_object(x): + return is_integer_dtype(x) or is_float_dtype(x) + + is_datetimelike = needs_i8_conversion + return ((is_datetimelike(a) and is_object(b)) or + (is_datetimelike(b) and is_object(a))) + + +def needs_i8_conversion(arr_or_dtype): + return (is_datetime_or_timedelta_dtype(arr_or_dtype) or + is_datetime64tz_dtype(arr_or_dtype)) + + +def is_numeric_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return (issubclass(tipo, (np.number, np.bool_)) and + not issubclass(tipo, (np.datetime64, np.timedelta64))) + + +def is_string_like_dtype(arr_or_dtype): + # exclude object as its a mixed dtype + dtype = _get_dtype(arr_or_dtype) + return dtype.kind in ('S', 'U') + + +def is_float_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.floating) + + +def is_floating_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return isinstance(tipo, np.floating) + + +def is_bool_dtype(arr_or_dtype): + try: + tipo = _get_dtype_type(arr_or_dtype) + except ValueError: + # this isn't even a dtype + return False + return issubclass(tipo, np.bool_) + + +def is_extension_type(value): + """ + if we are a klass that is preserved by the internals + these are internal klasses that we represent (and don't use a np.array) + """ + if is_categorical(value): + return True + elif is_sparse(value): + return True + elif is_datetimetz(value): + return True + return False + + +def is_complex_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.complexfloating) + + +def _coerce_to_dtype(dtype): + """ coerce a string / np.dtype to a dtype """ + if is_categorical_dtype(dtype): + dtype = CategoricalDtype() + elif is_datetime64tz_dtype(dtype): + dtype = DatetimeTZDtype(dtype) + else: + dtype = np.dtype(dtype) + return dtype + + +def _get_dtype(arr_or_dtype): + if isinstance(arr_or_dtype, np.dtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, type): + return np.dtype(arr_or_dtype) + elif isinstance(arr_or_dtype, CategoricalDtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, DatetimeTZDtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, string_types): + if is_categorical_dtype(arr_or_dtype): + return CategoricalDtype.construct_from_string(arr_or_dtype) + elif is_datetime64tz_dtype(arr_or_dtype): + return DatetimeTZDtype.construct_from_string(arr_or_dtype) + + if hasattr(arr_or_dtype, 'dtype'): + arr_or_dtype = arr_or_dtype.dtype + return np.dtype(arr_or_dtype) + + +def _get_dtype_type(arr_or_dtype): + if isinstance(arr_or_dtype, np.dtype): + return arr_or_dtype.type + elif isinstance(arr_or_dtype, type): + return np.dtype(arr_or_dtype).type + elif isinstance(arr_or_dtype, CategoricalDtype): + return CategoricalDtypeType + elif isinstance(arr_or_dtype, DatetimeTZDtype): + return DatetimeTZDtypeType + elif isinstance(arr_or_dtype, string_types): + if is_categorical_dtype(arr_or_dtype): + return CategoricalDtypeType + elif is_datetime64tz_dtype(arr_or_dtype): + return DatetimeTZDtypeType + return _get_dtype_type(np.dtype(arr_or_dtype)) + try: + return arr_or_dtype.dtype.type + except AttributeError: + return type(None) + + +def _get_dtype_from_object(dtype): + """Get a numpy dtype.type-style object. This handles the datetime64[ns] + and datetime64[ns, TZ] compat + + Notes + ----- + If nothing can be found, returns ``object``. + """ + + # type object from a dtype + if isinstance(dtype, type) and issubclass(dtype, np.generic): + return dtype + elif is_categorical(dtype): + return CategoricalDtype().type + elif is_datetimetz(dtype): + return DatetimeTZDtype(dtype).type + elif isinstance(dtype, np.dtype): # dtype object + try: + _validate_date_like_dtype(dtype) + except TypeError: + # should still pass if we don't have a datelike + pass + return dtype.type + elif isinstance(dtype, string_types): + if dtype == 'datetime' or dtype == 'timedelta': + dtype += '64' + + try: + return _get_dtype_from_object(getattr(np, dtype)) + except (AttributeError, TypeError): + # handles cases like _get_dtype(int) + # i.e., python objects that are valid dtypes (unlike user-defined + # types, in general) + # TypeError handles the float16 typecode of 'e' + # further handle internal types + pass + + return _get_dtype_from_object(np.dtype(dtype)) + + +def _validate_date_like_dtype(dtype): + try: + typ = np.datetime_data(dtype)[0] + except ValueError as e: + raise TypeError('%s' % e) + if typ != 'generic' and typ != 'ns': + raise ValueError('%r is too specific of a frequency, try passing %r' % + (dtype.name, dtype.type.__name__)) + + +def _lcd_dtypes(a_dtype, b_dtype): + """ return the lcd dtype to hold these types """ + + if is_datetime64_dtype(a_dtype) or is_datetime64_dtype(b_dtype): + return _NS_DTYPE + elif is_timedelta64_dtype(a_dtype) or is_timedelta64_dtype(b_dtype): + return _TD_DTYPE + elif is_complex_dtype(a_dtype): + if is_complex_dtype(b_dtype): + return a_dtype + return np.float64 + elif is_integer_dtype(a_dtype): + if is_integer_dtype(b_dtype): + if a_dtype.itemsize == b_dtype.itemsize: + return a_dtype + return np.int64 + return np.float64 + elif is_float_dtype(a_dtype): + if is_float_dtype(b_dtype): + if a_dtype.itemsize == b_dtype.itemsize: + return a_dtype + else: + return np.float64 + elif is_integer(b_dtype): + return np.float64 + return np.object + +_string_dtypes = frozenset(map(_get_dtype_from_object, (binary_type, + text_type))) + + +def pandas_dtype(dtype): + """ + Converts input into a pandas only dtype object or a numpy dtype object. + + Parameters + ---------- + dtype : object to be converted + + Returns + ------- + np.dtype or a pandas dtype + """ + if isinstance(dtype, DatetimeTZDtype): + return dtype + elif isinstance(dtype, CategoricalDtype): + return dtype + elif isinstance(dtype, string_types): + try: + return DatetimeTZDtype.construct_from_string(dtype) + except TypeError: + pass + + try: + return CategoricalDtype.construct_from_string(dtype) + except TypeError: + pass + elif isinstance(dtype, ExtensionDtype): + return dtype + + return np.dtype(dtype) diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 44338f26eb2e8..3b30531fb30ac 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -3,10 +3,19 @@ """ import numpy as np -import pandas.core.common as com import pandas.tslib as tslib from pandas import compat from pandas.compat import map +from .common import (is_categorical_dtype, + is_sparse, + is_datetimetz, + is_datetime64_dtype, + is_timedelta64_dtype, + is_object_dtype, + is_bool_dtype, + is_dtype_equal, + _NS_DTYPE, + _TD_DTYPE) def get_dtype_kinds(l): @@ -24,19 +33,19 @@ def get_dtype_kinds(l): for arr in l: dtype = arr.dtype - if com.is_categorical_dtype(dtype): + if is_categorical_dtype(dtype): typ = 'category' - elif com.is_sparse(arr): + elif is_sparse(arr): typ = 'sparse' - elif com.is_datetimetz(arr): + elif is_datetimetz(arr): typ = 'datetimetz' - elif com.is_datetime64_dtype(dtype): + elif is_datetime64_dtype(dtype): typ = 'datetime' - elif com.is_timedelta64_dtype(dtype): + elif is_timedelta64_dtype(dtype): typ = 'timedelta' - elif com.is_object_dtype(dtype): + elif is_object_dtype(dtype): typ = 'object' - elif com.is_bool_dtype(dtype): + elif is_bool_dtype(dtype): typ = 'bool' else: typ = dtype.kind @@ -51,14 +60,14 @@ def _get_series_result_type(result): """ if isinstance(result, dict): # concat Series with axis 1 - if all(com.is_sparse(c) for c in compat.itervalues(result)): + if all(is_sparse(c) for c in compat.itervalues(result)): from pandas.sparse.api import SparseDataFrame return SparseDataFrame else: from pandas.core.frame import DataFrame return DataFrame - elif com.is_sparse(result): + elif is_sparse(result): # concat Series with axis 1 from pandas.sparse.api import SparseSeries return SparseSeries @@ -165,7 +174,7 @@ def _concat_categorical(to_concat, axis=0): def convert_categorical(x): # coerce to object dtype - if com.is_categorical_dtype(x.dtype): + if is_categorical_dtype(x.dtype): return x.get_values() return x.ravel() @@ -177,7 +186,7 @@ def convert_categorical(x): # we could have object blocks and categoricals here # if we only have a single categoricals then combine everything # else its a non-compat categorical - categoricals = [x for x in to_concat if com.is_categorical_dtype(x.dtype)] + categoricals = [x for x in to_concat if is_categorical_dtype(x.dtype)] # validate the categories categories = categoricals[0] @@ -235,7 +244,7 @@ def union_categoricals(to_union): if any(c.ordered for c in to_union): raise TypeError("Can only combine unordered Categoricals") - if not all(com.is_dtype_equal(c.categories.dtype, first.categories.dtype) + if not all(is_dtype_equal(c.categories.dtype, first.categories.dtype) for c in to_union): raise TypeError("dtype of categories must be the same") @@ -272,7 +281,7 @@ def convert_to_pydatetime(x, axis): # coerce to an object dtype # if dtype is of datetimetz or timezone - if x.dtype.kind == com._NS_DTYPE.kind: + if x.dtype.kind == _NS_DTYPE.kind: if getattr(x, 'tz', None) is not None: x = x.asobject.values else: @@ -280,7 +289,7 @@ def convert_to_pydatetime(x, axis): x = tslib.ints_to_pydatetime(x.view(np.int64).ravel()) x = x.reshape(shape) - elif x.dtype == com._TD_DTYPE: + elif x.dtype == _TD_DTYPE: shape = x.shape x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel()) x = x.reshape(shape) @@ -310,12 +319,12 @@ def convert_to_pydatetime(x, axis): elif 'datetime' in typs: new_values = np.concatenate([x.view(np.int64) for x in to_concat], axis=axis) - return new_values.view(com._NS_DTYPE) + return new_values.view(_NS_DTYPE) elif 'timedelta' in typs: new_values = np.concatenate([x.view(np.int64) for x in to_concat], axis=axis) - return new_values.view(com._TD_DTYPE) + return new_values.view(_TD_DTYPE) # need to coerce to object to_concat = [convert_to_pydatetime(x, axis) for x in to_concat] @@ -350,7 +359,7 @@ def convert_sparse(x, axis): return x if typs is None: - typs = com.get_dtype_kinds(to_concat) + typs = get_dtype_kinds(to_concat) if len(typs) == 1: # concat input as it is if all inputs are sparse @@ -374,7 +383,7 @@ def convert_sparse(x, axis): # input may be sparse / dense mixed and may have different fill_value # input must contain sparse at least 1 - sparses = [c for c in to_concat if com.is_sparse(c)] + sparses = [c for c in to_concat if is_sparse(c)] fill_values = [c.fill_value for c in sparses] sp_indexes = [c.sp_index for c in sparses] diff --git a/pandas/types/inference.py b/pandas/types/inference.py new file mode 100644 index 0000000000000..35a2dc2fb831b --- /dev/null +++ b/pandas/types/inference.py @@ -0,0 +1,104 @@ +""" basic inference routines """ + +import collections +import re +import numpy as np +from numbers import Number +from pandas.compat import (string_types, text_type, + string_and_binary_types) +from pandas import lib + +is_bool = lib.is_bool + +is_integer = lib.is_integer + +is_float = lib.is_float + +is_complex = lib.is_complex + +is_scalar = lib.isscalar + + +def is_number(obj): + return isinstance(obj, (Number, np.number)) + + +def is_string_like(obj): + return isinstance(obj, (text_type, string_types)) + + +def _iterable_not_string(x): + return (isinstance(x, collections.Iterable) and + not isinstance(x, string_types)) + + +def is_iterator(obj): + # python 3 generators have __next__ instead of next + return hasattr(obj, 'next') or hasattr(obj, '__next__') + + +def is_re(obj): + return isinstance(obj, re._pattern_type) + + +def is_re_compilable(obj): + try: + re.compile(obj) + except TypeError: + return False + else: + return True + + +def is_list_like(arg): + return (hasattr(arg, '__iter__') and + not isinstance(arg, string_and_binary_types)) + + +def is_dict_like(arg): + return hasattr(arg, '__getitem__') and hasattr(arg, 'keys') + + +def is_named_tuple(arg): + return isinstance(arg, tuple) and hasattr(arg, '_fields') + + +def is_hashable(arg): + """Return True if hash(arg) will succeed, False otherwise. + + Some types will pass a test against collections.Hashable but fail when they + are actually hashed with hash(). + + Distinguish between these and other types by trying the call to hash() and + seeing if they raise TypeError. + + Examples + -------- + >>> a = ([],) + >>> isinstance(a, collections.Hashable) + True + >>> is_hashable(a) + False + """ + # unfortunately, we can't use isinstance(arg, collections.Hashable), which + # can be faster than calling hash, because numpy scalars on Python 3 fail + # this test + + # reconsider this decision once this numpy bug is fixed: + # https://github.com/numpy/numpy/issues/5562 + + try: + hash(arg) + except TypeError: + return False + else: + return True + + +def is_sequence(x): + try: + iter(x) + len(x) # it has a length + return not isinstance(x, string_and_binary_types) + except (TypeError, AttributeError): + return False diff --git a/pandas/types/missing.py b/pandas/types/missing.py new file mode 100644 index 0000000000000..8b4193d02beb7 --- /dev/null +++ b/pandas/types/missing.py @@ -0,0 +1,394 @@ +""" +missing types & inference +""" +import numpy as np +from pandas import lib +from pandas.tslib import NaT, iNaT +from .generic import (ABCMultiIndex, ABCSeries, + ABCIndexClass, ABCGeneric) +from .common import (is_string_dtype, is_datetimelike, + is_datetimelike_v_numeric, is_float_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, + is_timedelta64_dtype, + is_complex_dtype, is_categorical_dtype, + is_string_like_dtype, is_bool_dtype, + is_integer_dtype, is_dtype_equal, + needs_i8_conversion, _ensure_object, + pandas_dtype, + is_scalar, + is_object_dtype, + is_integer, + _TD_DTYPE, + _NS_DTYPE, + _DATELIKE_DTYPES) +from .inference import is_list_like + + +def isnull(obj): + """Detect missing values (NaN in numeric arrays, None/NaN in object arrays) + + Parameters + ---------- + arr : ndarray or object value + Object to check for null-ness + + Returns + ------- + isnulled : array-like of bool or bool + Array or bool indicating whether an object is null or if an array is + given which of the element is null. + + See also + -------- + pandas.notnull: boolean inverse of pandas.isnull + """ + return _isnull(obj) + + +def _isnull_new(obj): + if is_scalar(obj): + return lib.checknull(obj) + # hack (for now) because MI registers as ndarray + elif isinstance(obj, ABCMultiIndex): + raise NotImplementedError("isnull is not defined for MultiIndex") + elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): + return _isnull_ndarraylike(obj) + elif isinstance(obj, ABCGeneric): + return obj._constructor(obj._data.isnull(func=isnull)) + elif isinstance(obj, list) or hasattr(obj, '__array__'): + return _isnull_ndarraylike(np.asarray(obj)) + else: + return obj is None + + +def _isnull_old(obj): + """Detect missing values. Treat None, NaN, INF, -INF as null. + + Parameters + ---------- + arr: ndarray or object value + + Returns + ------- + boolean ndarray or boolean + """ + if is_scalar(obj): + return lib.checknull_old(obj) + # hack (for now) because MI registers as ndarray + elif isinstance(obj, ABCMultiIndex): + raise NotImplementedError("isnull is not defined for MultiIndex") + elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): + return _isnull_ndarraylike_old(obj) + elif isinstance(obj, ABCGeneric): + return obj._constructor(obj._data.isnull(func=_isnull_old)) + elif isinstance(obj, list) or hasattr(obj, '__array__'): + return _isnull_ndarraylike_old(np.asarray(obj)) + else: + return obj is None + + +_isnull = _isnull_new + + +def _use_inf_as_null(key): + """Option change callback for null/inf behaviour + Choose which replacement for numpy.isnan / -numpy.isfinite is used. + + Parameters + ---------- + flag: bool + True means treat None, NaN, INF, -INF as null (old way), + False means None and NaN are null, but INF, -INF are not null + (new way). + + Notes + ----- + This approach to setting global module values is discussed and + approved here: + + * http://stackoverflow.com/questions/4859217/ + programmatically-creating-variables-in-python/4859312#4859312 + """ + from pandas.core.config import get_option + flag = get_option(key) + if flag: + globals()['_isnull'] = _isnull_old + else: + globals()['_isnull'] = _isnull_new + + +def _isnull_ndarraylike(obj): + + values = getattr(obj, 'values', obj) + dtype = values.dtype + + if is_string_dtype(dtype): + if is_categorical_dtype(values): + from pandas import Categorical + if not isinstance(values, Categorical): + values = values.values + result = values.isnull() + else: + + # Working around NumPy ticket 1542 + shape = values.shape + + if is_string_like_dtype(dtype): + result = np.zeros(values.shape, dtype=bool) + else: + result = np.empty(shape, dtype=bool) + vec = lib.isnullobj(values.ravel()) + result[...] = vec.reshape(shape) + + elif is_datetimelike(obj): + # this is the NaT pattern + result = values.view('i8') == iNaT + else: + result = np.isnan(values) + + # box + if isinstance(obj, ABCSeries): + from pandas import Series + result = Series(result, index=obj.index, name=obj.name, copy=False) + + return result + + +def _isnull_ndarraylike_old(obj): + values = getattr(obj, 'values', obj) + dtype = values.dtype + + if is_string_dtype(dtype): + # Working around NumPy ticket 1542 + shape = values.shape + + if is_string_like_dtype(dtype): + result = np.zeros(values.shape, dtype=bool) + else: + result = np.empty(shape, dtype=bool) + vec = lib.isnullobj_old(values.ravel()) + result[:] = vec.reshape(shape) + + elif dtype in _DATELIKE_DTYPES: + # this is the NaT pattern + result = values.view('i8') == iNaT + else: + result = ~np.isfinite(values) + + # box + if isinstance(obj, ABCSeries): + from pandas import Series + result = Series(result, index=obj.index, name=obj.name, copy=False) + + return result + + +def notnull(obj): + """Replacement for numpy.isfinite / -numpy.isnan which is suitable for use + on object arrays. + + Parameters + ---------- + arr : ndarray or object value + Object to check for *not*-null-ness + + Returns + ------- + isnulled : array-like of bool or bool + Array or bool indicating whether an object is *not* null or if an array + is given which of the element is *not* null. + + See also + -------- + pandas.isnull : boolean inverse of pandas.notnull + """ + res = isnull(obj) + if is_scalar(res): + return not res + return ~res + + +def is_null_datelike_scalar(other): + """ test whether the object is a null datelike, e.g. Nat + but guard against passing a non-scalar """ + if other is NaT or other is None: + return True + elif is_scalar(other): + + # a timedelta + if hasattr(other, 'dtype'): + return other.view('i8') == iNaT + elif is_integer(other) and other == iNaT: + return True + return isnull(other) + return False + + +def _is_na_compat(arr, fill_value=np.nan): + """ + Parameters + ---------- + arr: a numpy array + fill_value: fill value, default to np.nan + + Returns + ------- + True if we can fill using this fill_value + """ + dtype = arr.dtype + if isnull(fill_value): + return not (is_bool_dtype(dtype) or + is_integer_dtype(dtype)) + return True + + +def array_equivalent(left, right, strict_nan=False): + """ + True if two arrays, left and right, have equal non-NaN elements, and NaNs + in corresponding locations. False otherwise. It is assumed that left and + right are NumPy arrays of the same dtype. The behavior of this function + (particularly with respect to NaNs) is not defined if the dtypes are + different. + + Parameters + ---------- + left, right : ndarrays + strict_nan : bool, default False + If True, consider NaN and None to be different. + + Returns + ------- + b : bool + Returns True if the arrays are equivalent. + + Examples + -------- + >>> array_equivalent( + ... np.array([1, 2, np.nan]), + ... np.array([1, 2, np.nan])) + True + >>> array_equivalent( + ... np.array([1, np.nan, 2]), + ... np.array([1, 2, np.nan])) + False + """ + + left, right = np.asarray(left), np.asarray(right) + + # shape compat + if left.shape != right.shape: + return False + + # Object arrays can contain None, NaN and NaT. + # string dtypes must be come to this path for NumPy 1.7.1 compat + if is_string_dtype(left) or is_string_dtype(right): + + if not strict_nan: + # isnull considers NaN and None to be equivalent. + return lib.array_equivalent_object( + _ensure_object(left.ravel()), _ensure_object(right.ravel())) + + for left_value, right_value in zip(left, right): + if left_value is NaT and right_value is not NaT: + return False + + elif isinstance(left_value, float) and np.isnan(left_value): + if (not isinstance(right_value, float) or + not np.isnan(right_value)): + return False + else: + if left_value != right_value: + return False + return True + + # NaNs can occur in float and complex arrays. + if is_float_dtype(left) or is_complex_dtype(left): + return ((left == right) | (np.isnan(left) & np.isnan(right))).all() + + # numpy will will not allow this type of datetimelike vs integer comparison + elif is_datetimelike_v_numeric(left, right): + return False + + # M8/m8 + elif needs_i8_conversion(left) and needs_i8_conversion(right): + if not is_dtype_equal(left.dtype, right.dtype): + return False + + left = left.view('i8') + right = right.view('i8') + + # NaNs cannot occur otherwise. + try: + return np.array_equal(left, right) + except AttributeError: + # see gh-13388 + # + # NumPy v1.7.1 has a bug in its array_equal + # function that prevents it from correctly + # comparing two arrays with complex dtypes. + # This bug is corrected in v1.8.0, so remove + # this try-except block as soon as we stop + # supporting NumPy versions < 1.8.0 + if not is_dtype_equal(left.dtype, right.dtype): + return False + + left = left.tolist() + right = right.tolist() + + return left == right + + +def _infer_fill_value(val): + """ + infer the fill value for the nan/NaT from the provided + scalar/ndarray/list-like if we are a NaT, return the correct dtyped + element to provide proper block construction + """ + + if not is_list_like(val): + val = [val] + val = np.array(val, copy=False) + if is_datetimelike(val): + return np.array('NaT', dtype=val.dtype) + elif is_object_dtype(val.dtype): + dtype = lib.infer_dtype(_ensure_object(val)) + if dtype in ['datetime', 'datetime64']: + return np.array('NaT', dtype=_NS_DTYPE) + elif dtype in ['timedelta', 'timedelta64']: + return np.array('NaT', dtype=_TD_DTYPE) + return np.nan + + +def _maybe_fill(arr, fill_value=np.nan): + """ + if we have a compatiable fill_value and arr dtype, then fill + """ + if _is_na_compat(arr, fill_value): + arr.fill(fill_value) + return arr + + +def na_value_for_dtype(dtype): + """ + Return a dtype compat na value + + Parameters + ---------- + dtype : string / dtype + + Returns + ------- + np.dtype or a pandas dtype + """ + dtype = pandas_dtype(dtype) + + if (is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype) or + is_timedelta64_dtype(dtype)): + return NaT + elif is_float_dtype(dtype): + return np.nan + elif is_integer_dtype(dtype): + return 0 + elif is_bool_dtype(dtype): + return False + return np.nan diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 2961b2fb2241f..4442eed898b60 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -23,11 +23,14 @@ import numpy as np import pandas as pd -from pandas.core.common import (is_sequence, array_equivalent, - is_list_like, is_datetimelike_v_numeric, - is_datetimelike_v_object, - is_number, is_bool, - needs_i8_conversion, is_categorical_dtype) +from pandas.types.missing import array_equivalent +from pandas.types.common import (is_datetimelike_v_numeric, + is_datetimelike_v_object, + is_number, is_bool, + needs_i8_conversion, + is_categorical_dtype, + is_sequence, + is_list_like) from pandas.formats.printing import pprint_thing from pandas.core.algorithms import take_1d @@ -1001,17 +1004,20 @@ def assert_categorical_equal(left, right, check_dtype=True, assert_attr_equal('ordered', left, right, obj=obj) -def raise_assert_detail(obj, message, left, right): +def raise_assert_detail(obj, message, left, right, diff=None): if isinstance(left, np.ndarray): left = pprint_thing(left) if isinstance(right, np.ndarray): right = pprint_thing(right) + if diff is not None: + diff = "\n[diff]: {diff}".format(diff=diff) + msg = """{0} are different {1} [left]: {2} -[right]: {3}""".format(obj, message, left, right) +[right]: {3}{4}""".format(obj, message, left, right, diff) raise AssertionError(msg) diff --git a/pandas/util/validators.py b/pandas/util/validators.py index bbfd24df9c13e..964fa9d9b38d5 100644 --- a/pandas/util/validators.py +++ b/pandas/util/validators.py @@ -3,6 +3,8 @@ for validating data or function arguments """ +from pandas.types.common import is_bool + def _check_arg_length(fname, args, max_fname_arg_count, compat_args): """ @@ -35,8 +37,6 @@ def _check_for_default_values(fname, arg_val_dict, compat_args): checked that arg_val_dict.keys() is a subset of compat_args """ - from pandas.core.common import is_bool - for key in arg_val_dict: # try checking equality directly with '=' operator, # as comparison may have been overriden for the left