From 475ed6de720155589ab1902315ce479c447c9c8d Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Wed, 10 Jul 2019 16:59:54 +0800 Subject: [PATCH 01/12] Use IntergerArray for integer arrays with null --- pandas/_libs/lib.pyx | 26 +++++++++++++++++++++----- pandas/core/frame.py | 3 ++- pandas/core/internals/construction.py | 16 +++++++++++----- pandas/io/json/_normalize.py | 5 +++-- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a14efd3313eaf..62b0c4e62268d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -971,6 +971,7 @@ cdef class Seen: bint nat_ # seen nat bint bool_ # seen_bool bint null_ # seen_null + bint nan_ # seen_np.nan bint uint_ # seen_uint (unsigned integer) bint sint_ # seen_sint (signed integer) bint float_ # seen_float @@ -995,6 +996,7 @@ cdef class Seen: self.nat_ = 0 self.bool_ = 0 self.null_ = 0 + self.nan_ = 0 self.uint_ = 0 self.sint_ = 0 self.float_ = 0 @@ -1956,7 +1958,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, @cython.wraparound(False) def maybe_convert_objects(ndarray[object] objects, bint try_float=0, bint safe=0, bint convert_datetime=0, - bint convert_timedelta=0): + bint convert_timedelta=0, to_integer_array=False): """ Type inference function-- convert object array to proper dtype """ @@ -1980,6 +1982,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, ints = np.empty(n, dtype='i8') uints = np.empty(n, dtype='u8') bools = np.empty(n, dtype=np.uint8) + mask = np.full(n, False) if convert_datetime: datetimes = np.empty(n, dtype='M8[ns]') @@ -1997,6 +2000,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if val is None: seen.null_ = 1 floats[i] = complexes[i] = fnan + mask[i] = True elif val is NaT: seen.nat_ = 1 if convert_datetime: @@ -2006,6 +2010,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if not (convert_datetime or convert_timedelta): seen.object_ = 1 break + elif val is np.nan: + seen.nan_ = 1 + mask[i] = True + floats[i] = complexes[i] = val elif util.is_bool_object(val): seen.bool_ = 1 bools[i] = val @@ -2087,11 +2095,19 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if not seen.object_: if not safe: - if seen.null_: + if seen.null_ or seen.nan_: if seen.is_float_or_complex: if seen.complex_: return complexes - elif seen.float_ or seen.int_: + elif seen.float_: + return floats + elif seen.int_: + if to_integer_array: + from pandas.core.arrays import IntegerArray + return IntegerArray(ints, mask) + else: + return floats + elif seen.nan_: return floats else: if not seen.bool_: @@ -2130,7 +2146,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if seen.complex_: if not seen.int_: return complexes - elif seen.float_: + elif seen.float_ or seen.nan_: if not seen.int_: return floats else: @@ -2154,7 +2170,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if seen.complex_: if not seen.int_: return complexes - elif seen.float_: + elif seen.float_ or seen.nan_: if not seen.int_: return floats elif seen.int_: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7e3c2200dbabc..fb9ee5c285092 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -461,7 +461,8 @@ def __init__( if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: if is_named_tuple(data[0]) and columns is None: columns = data[0]._fields - arrays, columns = to_arrays(data, columns, dtype=dtype) + arrays, columns = to_arrays(data, columns, dtype=dtype, + to_integer_array=to_integer_array) columns = ensure_index(columns) # set the index diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 05a2803b3fc2f..6ed725fd85369 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -429,7 +429,8 @@ def _get_axes(N, K, index, columns): # Conversion of Inputs to Arrays -def to_arrays(data, columns, coerce_float=False, dtype=None): +def to_arrays(data, columns, coerce_float=False, dtype=None, + to_integer_array=False): """ Return list of arrays, columns. """ @@ -456,7 +457,8 @@ def to_arrays(data, columns, coerce_float=False, dtype=None): return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) elif isinstance(data[0], abc.Mapping): return _list_of_dict_to_arrays( - data, columns, coerce_float=coerce_float, dtype=dtype + data, columns, coerce_float=coerce_float, dtype=dtype, + to_integer_array=to_integer_array ) elif isinstance(data[0], ABCSeries): return _list_of_series_to_arrays( @@ -548,6 +550,7 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): tuple arrays, columns """ + if columns is None: gen = (list(x.keys()) for x in data) types = (dict, OrderedDict) if PY36 else OrderedDict @@ -560,11 +563,13 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): content = list(lib.dicts_to_array(data, list(columns)).T) return _convert_object_array( - content, columns, dtype=dtype, coerce_float=coerce_float + content, columns, dtype=dtype, coerce_float=coerce_float, + to_integer_array=to_integer_array ) -def _convert_object_array(content, columns, coerce_float=False, dtype=None): +def _convert_object_array(content, columns, coerce_float=False, dtype=None, + to_integer_array=False): if columns is None: columns = ibase.default_index(len(content)) else: @@ -578,7 +583,8 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): # provide soft conversion of object dtypes def convert(arr): if dtype != object and dtype != np.object: - arr = lib.maybe_convert_objects(arr, try_float=coerce_float) + arr = lib.maybe_convert_objects(arr, try_float=coerce_float, + to_integer_array=to_integer_array) arr = maybe_cast_to_datetime(arr, dtype) return arr diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 702241bde2b34..ce51d157d21a6 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -117,6 +117,7 @@ def json_normalize( errors: Optional[str] = "raise", sep: str = ".", max_level: Optional[int] = None, + to_integer_array: Optional[bool] = False ): """ Normalize semi-structured JSON data into a flat table. @@ -255,7 +256,7 @@ def _pull_field(js, spec): # TODO: handle record value which are lists, at least error # reasonably data = nested_to_record(data, sep=sep, max_level=max_level) - return DataFrame(data) + return DataFrame(data, to_integer_array=to_integer_array) elif not isinstance(record_path, list): record_path = [record_path] @@ -315,7 +316,7 @@ def _recursive_extract(data, path, seen_meta, level=0): _recursive_extract(data, record_path, {}, level=0) - result = DataFrame(records) + result = DataFrame(records, to_integer_array=to_integer_array) if record_prefix is not None: result = result.rename(columns=lambda x: "{p}{c}".format(p=record_prefix, c=x)) From f47b60f31a87b8987f1d9a22a91fb1f5c827bec9 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Mon, 7 Oct 2019 12:50:50 +0800 Subject: [PATCH 02/12] Reformat with black --- pandas/core/frame.py | 5 +++-- pandas/core/internals/construction.py | 28 +++++++++++++++++---------- pandas/io/json/_normalize.py | 2 +- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fb9ee5c285092..d37276175f2e9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -461,8 +461,9 @@ def __init__( if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: if is_named_tuple(data[0]) and columns is None: columns = data[0]._fields - arrays, columns = to_arrays(data, columns, dtype=dtype, - to_integer_array=to_integer_array) + arrays, columns = to_arrays( + data, columns, dtype=dtype, to_integer_array=to_integer_array + ) columns = ensure_index(columns) # set the index diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 6ed725fd85369..e1890ca35479a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -429,8 +429,7 @@ def _get_axes(N, K, index, columns): # Conversion of Inputs to Arrays -def to_arrays(data, columns, coerce_float=False, dtype=None, - to_integer_array=False): +def to_arrays(data, columns, coerce_float=False, dtype=None, to_integer_array=False): """ Return list of arrays, columns. """ @@ -457,8 +456,11 @@ def to_arrays(data, columns, coerce_float=False, dtype=None, return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) elif isinstance(data[0], abc.Mapping): return _list_of_dict_to_arrays( - data, columns, coerce_float=coerce_float, dtype=dtype, - to_integer_array=to_integer_array + data, + columns, + coerce_float=coerce_float, + dtype=dtype, + to_integer_array=to_integer_array, ) elif isinstance(data[0], ABCSeries): return _list_of_series_to_arrays( @@ -530,6 +532,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): + """Convert list of dicts to numpy arrays if `columns` is not passed, column names are inferred from the records @@ -563,13 +566,17 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): content = list(lib.dicts_to_array(data, list(columns)).T) return _convert_object_array( - content, columns, dtype=dtype, coerce_float=coerce_float, - to_integer_array=to_integer_array + content, + columns, + dtype=dtype, + coerce_float=coerce_float, + to_integer_array=to_integer_array, ) -def _convert_object_array(content, columns, coerce_float=False, dtype=None, - to_integer_array=False): +def _convert_object_array( + content, columns, coerce_float=False, dtype=None, to_integer_array=False +): if columns is None: columns = ibase.default_index(len(content)) else: @@ -583,8 +590,9 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None, # provide soft conversion of object dtypes def convert(arr): if dtype != object and dtype != np.object: - arr = lib.maybe_convert_objects(arr, try_float=coerce_float, - to_integer_array=to_integer_array) + arr = lib.maybe_convert_objects( + arr, try_float=coerce_float, to_integer_array=to_integer_array + ) arr = maybe_cast_to_datetime(arr, dtype) return arr diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index ce51d157d21a6..a8a76f81e3341 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -117,7 +117,7 @@ def json_normalize( errors: Optional[str] = "raise", sep: str = ".", max_level: Optional[int] = None, - to_integer_array: Optional[bool] = False + to_integer_array: bool = False, ): """ Normalize semi-structured JSON data into a flat table. From 57c613af3c09cd9d49a59523b8d3fc8aca29f36c Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Sun, 27 Oct 2019 16:19:07 +0800 Subject: [PATCH 03/12] Remove to_integer_array keyword in frame.py --- pandas/core/frame.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d37276175f2e9..c5b6995e5d3bd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -461,9 +461,7 @@ def __init__( if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: if is_named_tuple(data[0]) and columns is None: columns = data[0]._fields - arrays, columns = to_arrays( - data, columns, dtype=dtype, to_integer_array=to_integer_array - ) + arrays, columns = to_arrays(data, columns, dtype=dtype) columns = ensure_index(columns) # set the index @@ -859,9 +857,9 @@ def style(self): ... index=['panda', 'polar', 'koala']) >>> df species population - panda bear 1864 - polar bear 22000 - koala marsupial 80000 + panda bear 1864 + polar bear 22000 + koala marsupial 80000 >>> for label, content in df.items(): ... print('label:', label) ... print('content:', content, sep='\n') From b5698c021572e0333d82bd67177c35dd9be27f11 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Sun, 27 Oct 2019 16:25:31 +0800 Subject: [PATCH 04/12] Remove to_integer_array keyword in internals/construction.py --- pandas/core/internals/construction.py | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index e1890ca35479a..bb62db431ac73 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -429,7 +429,7 @@ def _get_axes(N, K, index, columns): # Conversion of Inputs to Arrays -def to_arrays(data, columns, coerce_float=False, dtype=None, to_integer_array=False): +def to_arrays(data, columns, coerce_float=False, dtype=None): """ Return list of arrays, columns. """ @@ -456,11 +456,7 @@ def to_arrays(data, columns, coerce_float=False, dtype=None, to_integer_array=Fa return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) elif isinstance(data[0], abc.Mapping): return _list_of_dict_to_arrays( - data, - columns, - coerce_float=coerce_float, - dtype=dtype, - to_integer_array=to_integer_array, + data, columns, coerce_float=coerce_float, dtype=dtype ) elif isinstance(data[0], ABCSeries): return _list_of_series_to_arrays( @@ -532,7 +528,6 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): - """Convert list of dicts to numpy arrays if `columns` is not passed, column names are inferred from the records @@ -566,17 +561,11 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): content = list(lib.dicts_to_array(data, list(columns)).T) return _convert_object_array( - content, - columns, - dtype=dtype, - coerce_float=coerce_float, - to_integer_array=to_integer_array, + content, columns, dtype=dtype, coerce_float=coerce_float ) -def _convert_object_array( - content, columns, coerce_float=False, dtype=None, to_integer_array=False -): +def _convert_object_array(content, columns, coerce_float=False, dtype=None): if columns is None: columns = ibase.default_index(len(content)) else: @@ -590,9 +579,7 @@ def _convert_object_array( # provide soft conversion of object dtypes def convert(arr): if dtype != object and dtype != np.object: - arr = lib.maybe_convert_objects( - arr, try_float=coerce_float, to_integer_array=to_integer_array - ) + arr = lib.maybe_convert_objects(arr, try_float=coerce_float) arr = maybe_cast_to_datetime(arr, dtype) return arr From e2b98036748a079cbed2d6e5acf89959f38d6d3e Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Sun, 27 Oct 2019 16:27:17 +0800 Subject: [PATCH 05/12] Remove to_integer_array keyword in io/json/_normalize.py --- pandas/io/json/_normalize.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index a8a76f81e3341..702241bde2b34 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -117,7 +117,6 @@ def json_normalize( errors: Optional[str] = "raise", sep: str = ".", max_level: Optional[int] = None, - to_integer_array: bool = False, ): """ Normalize semi-structured JSON data into a flat table. @@ -256,7 +255,7 @@ def _pull_field(js, spec): # TODO: handle record value which are lists, at least error # reasonably data = nested_to_record(data, sep=sep, max_level=max_level) - return DataFrame(data, to_integer_array=to_integer_array) + return DataFrame(data) elif not isinstance(record_path, list): record_path = [record_path] @@ -316,7 +315,7 @@ def _recursive_extract(data, path, seen_meta, level=0): _recursive_extract(data, record_path, {}, level=0) - result = DataFrame(records, to_integer_array=to_integer_array) + result = DataFrame(records) if record_prefix is not None: result = result.rename(columns=lambda x: "{p}{c}".format(p=record_prefix, c=x)) From 63d4bdd936a2b750da68fd37056d4c78e8ae623a Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Sun, 27 Oct 2019 16:36:49 +0800 Subject: [PATCH 06/12] Refactor keyword 'to_integer_array' to 'convert_to_nullable_integer' --- pandas/_libs/lib.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 62b0c4e62268d..97b2393e8b339 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1958,7 +1958,8 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, @cython.wraparound(False) def maybe_convert_objects(ndarray[object] objects, bint try_float=0, bint safe=0, bint convert_datetime=0, - bint convert_timedelta=0, to_integer_array=False): + bint convert_timedelta=0, + bint convert_to_nullable_integer=0): """ Type inference function-- convert object array to proper dtype """ @@ -2102,7 +2103,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, elif seen.float_: return floats elif seen.int_: - if to_integer_array: + if convert_to_nullable_integer: from pandas.core.arrays import IntegerArray return IntegerArray(ints, mask) else: From a74e473be5559d68d0ace1dbd497c429e12fc326 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Sun, 27 Oct 2019 19:56:42 +0800 Subject: [PATCH 07/12] Add test for IntegerArray conversion --- pandas/tests/dtypes/test_inference.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 62fb118f719e3..8973f5d2d1aee 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -552,6 +552,14 @@ def test_maybe_convert_objects_datetime(self): out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) tm.assert_numpy_array_equal(out, exp) + def test_maybe_convert_objects_nullable_integer(self): + # GH27335 + arr = np.array([2, np.NaN], dtype=object) + result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1) + from pandas.core.arrays import IntegerArray + exp = IntegerArray(np.array([2, 0], dtype='i8'), np.array([False, True])) + tm.assert_equal(result, exp) + def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) From 7672fa68e8ab19012886b73d8b0b81082e3ab8f3 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Sun, 27 Oct 2019 20:54:38 +0800 Subject: [PATCH 08/12] Reformat with black --- pandas/tests/dtypes/test_inference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 8973f5d2d1aee..072d14a3c2db0 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -557,7 +557,8 @@ def test_maybe_convert_objects_nullable_integer(self): arr = np.array([2, np.NaN], dtype=object) result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1) from pandas.core.arrays import IntegerArray - exp = IntegerArray(np.array([2, 0], dtype='i8'), np.array([False, True])) + + exp = IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])) tm.assert_equal(result, exp) def test_mixed_dtypes_remain_object_array(self): From c24de12df633538c1c6092490a3993c1d981ae11 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Sat, 2 Nov 2019 23:58:07 +0800 Subject: [PATCH 09/12] Parameterize test and use assert_extension_array_equal instead of assert_equal --- pandas/tests/dtypes/test_inference.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 072d14a3c2db0..8d612e2048b88 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -18,6 +18,7 @@ from pandas._libs import iNaT, lib, missing as libmissing import pandas.util._test_decorators as td +from pandas.core.arrays import IntegerArray from pandas.core.dtypes import inference from pandas.core.dtypes.common import ( ensure_categorical, @@ -552,14 +553,19 @@ def test_maybe_convert_objects_datetime(self): out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) tm.assert_numpy_array_equal(out, exp) - def test_maybe_convert_objects_nullable_integer(self): + @pytest.mark.parametrize( + "exp", + [ + IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])), + IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])), + ], + ) + def test_maybe_convert_objects_nullable_integer(self, exp): # GH27335 arr = np.array([2, np.NaN], dtype=object) result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1) - from pandas.core.arrays import IntegerArray - exp = IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])) - tm.assert_equal(result, exp) + tm.assert_extension_array_equal(result, exp) def test_mixed_dtypes_remain_object_array(self): # GH14956 From f071bf69ff1a899e82489c20cc6df1646708f542 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Sun, 3 Nov 2019 12:29:26 +0800 Subject: [PATCH 10/12] Sort import sequence with isort --- pandas/tests/dtypes/test_inference.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 8d612e2048b88..a5d183d540520 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -18,7 +18,6 @@ from pandas._libs import iNaT, lib, missing as libmissing import pandas.util._test_decorators as td -from pandas.core.arrays import IntegerArray from pandas.core.dtypes import inference from pandas.core.dtypes.common import ( ensure_categorical, @@ -52,7 +51,9 @@ Timestamp, isna, ) -import pandas.util.testing as tm + +from pandas.core.arrays import IntegerArray +from pandas.util import testing as tm @pytest.fixture(params=[True, False], ids=str) From e8591ef3f6c701ca753837cd0927067d8b9799a8 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Mon, 11 Nov 2019 21:03:19 +0800 Subject: [PATCH 11/12] Add doc-string for maybe_convert_objects --- pandas/_libs/lib.pyx | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 97b2393e8b339..5b27c953450e3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1962,7 +1962,33 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, bint convert_to_nullable_integer=0): """ Type inference function-- convert object array to proper dtype + + Parameters + ---------- + values : ndarray + Array of object elements to convert. + try_float : bool, default False + If an array-like object contains only float or NaN values is + encountered, whether to convert and return an array of float dtype. + safe : bool, default False + Whether to upcast numeric type (e.g. int cast to float). If set to + True, no upcasting will be performed. + convert_datetime : bool, default False + If an array-like object contains only datetime values or NaT is + encountered, whether to convert and return an array of M8[ns] dtype. + convert_timedelta : bool, default False + If an array-like object contains only timedelta values or NaT is + encountered, whether to convert and return an array of m8[ns] dtype. + convert_to_nullable_integer : bool, default False + If an array-like object contains only interger values (and NaN) is + encountered, whether to convert and return an IntegerArray. + + Returns + ------- + array : array of converted object values to more specific dtypes if + pplicable """ + cdef: Py_ssize_t i, n ndarray[float64_t] floats From cc179be1be8a249df3c7e429e250989c50e2b10e Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Mon, 11 Nov 2019 22:10:35 +0800 Subject: [PATCH 12/12] Solve unwanted import pattern --- pandas/tests/dtypes/test_inference.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index a5d183d540520..0408c78ac1536 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -51,9 +51,8 @@ Timestamp, isna, ) - from pandas.core.arrays import IntegerArray -from pandas.util import testing as tm +import pandas.util.testing as tm @pytest.fixture(params=[True, False], ids=str)