@@ -5702,6 +5702,7 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
5702
5702
to_datetime : Convert argument to datetime.
5703
5703
to_timedelta : Convert argument to timedelta.
5704
5704
to_numeric : Convert argument to numeric type.
5705
+ convert_dtypes : Convert argument to best possible dtype.
5705
5706
5706
5707
Examples
5707
5708
--------
@@ -5730,6 +5731,142 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
5730
5731
)
5731
5732
).__finalize__ (self )
5732
5733
5734
+ def convert_dtypes (
5735
+ self : FrameOrSeries ,
5736
+ infer_objects : bool_t = True ,
5737
+ convert_string : bool_t = True ,
5738
+ convert_integer : bool_t = True ,
5739
+ convert_boolean : bool_t = True ,
5740
+ ) -> FrameOrSeries :
5741
+ """
5742
+ Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
5743
+
5744
+ .. versionadded:: 1.1.0
5745
+
5746
+ Parameters
5747
+ ----------
5748
+ infer_objects : bool, default True
5749
+ Whether object dtypes should be converted to the best possible types.
5750
+ convert_string : bool, default True
5751
+ Whether object dtypes should be converted to ``StringDtype()``.
5752
+ convert_integer : bool, default True
5753
+ Whether, if possible, conversion can be done to integer extension types.
5754
+ convert_boolean : bool, defaults True
5755
+ Whether object dtypes should be converted to ``BooleanDtypes()``.
5756
+
5757
+ Returns
5758
+ -------
5759
+ Series or DataFrame
5760
+ Copy of input object with new dtype.
5761
+
5762
+ See Also
5763
+ --------
5764
+ infer_objects : Infer dtypes of objects.
5765
+ to_datetime : Convert argument to datetime.
5766
+ to_timedelta : Convert argument to timedelta.
5767
+ to_numeric : Convert argument to a numeric type.
5768
+
5769
+ Notes
5770
+ -----
5771
+
5772
+ By default, ``convert_dtypes`` will attempt to convert a Series (or each
5773
+ Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
5774
+ ``convert_string``, ``convert_integer``, and ``convert_boolean``, it is
5775
+ possible to turn off individual conversions to ``StringDtype``, the integer
5776
+ extension types or ``BooleanDtype``, respectively.
5777
+
5778
+ For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
5779
+ rules as during normal Series/DataFrame construction. Then, if possible,
5780
+ convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer extension
5781
+ type, otherwise leave as ``object``.
5782
+
5783
+ If the dtype is integer, convert to an appropriate integer extension type.
5784
+
5785
+ If the dtype is numeric, and consists of all integers, convert to an
5786
+ appropriate integer extension type.
5787
+
5788
+ In the future, as new dtypes are added that support ``pd.NA``, the results
5789
+ of this method will change to support those new dtypes.
5790
+
5791
+ Examples
5792
+ --------
5793
+ >>> df = pd.DataFrame(
5794
+ ... {
5795
+ ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
5796
+ ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
5797
+ ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
5798
+ ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
5799
+ ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
5800
+ ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
5801
+ ... }
5802
+ ... )
5803
+
5804
+ Start with a DataFrame with default dtypes.
5805
+
5806
+ >>> df
5807
+ a b c d e f
5808
+ 0 1 x True h 10.0 NaN
5809
+ 1 2 y False i NaN 100.5
5810
+ 2 3 z NaN NaN 20.0 200.0
5811
+
5812
+ >>> df.dtypes
5813
+ a int32
5814
+ b object
5815
+ c object
5816
+ d object
5817
+ e float64
5818
+ f float64
5819
+ dtype: object
5820
+
5821
+ Convert the DataFrame to use best possible dtypes.
5822
+
5823
+ >>> dfn = df.convert_dtypes()
5824
+ >>> dfn
5825
+ a b c d e f
5826
+ 0 1 x True h 10 NaN
5827
+ 1 2 y False i <NA> 100.5
5828
+ 2 3 z <NA> <NA> 20 200.0
5829
+
5830
+ >>> dfn.dtypes
5831
+ a Int32
5832
+ b string
5833
+ c boolean
5834
+ d string
5835
+ e Int64
5836
+ f float64
5837
+ dtype: object
5838
+
5839
+ Start with a Series of strings and missing data represented by ``np.nan``.
5840
+
5841
+ >>> s = pd.Series(["a", "b", np.nan])
5842
+ >>> s
5843
+ 0 a
5844
+ 1 b
5845
+ 2 NaN
5846
+ dtype: object
5847
+
5848
+ Obtain a Series with dtype ``StringDtype``.
5849
+
5850
+ >>> s.convert_dtypes()
5851
+ 0 a
5852
+ 1 b
5853
+ 2 <NA>
5854
+ dtype: string
5855
+ """
5856
+ if self .ndim == 1 :
5857
+ return self ._convert_dtypes (
5858
+ infer_objects , convert_string , convert_integer , convert_boolean
5859
+ )
5860
+ else :
5861
+ results = [
5862
+ col ._convert_dtypes (
5863
+ infer_objects , convert_string , convert_integer , convert_boolean
5864
+ )
5865
+ for col_name , col in self .items ()
5866
+ ]
5867
+ result = pd .concat (results , axis = 1 , copy = False )
5868
+ return result
5869
+
5733
5870
# ----------------------------------------------------------------------
5734
5871
# Filling NA's
5735
5872
0 commit comments