From c37272d0f72b5925aeaf17642def6c752ce8ace0 Mon Sep 17 00:00:00 2001 From: pckSF Date: Tue, 27 Apr 2021 20:03:26 +0100 Subject: [PATCH 1/9] TYP: Overload concat --- pandas/core/reshape/concat.py | 83 ++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 15 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index b3b453ea6355a..6d868d4453a38 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -15,7 +15,10 @@ import numpy as np -from pandas._typing import FrameOrSeriesUnion +from pandas._typing import ( + Axis, + FrameOrSeriesUnion, +) from pandas.util._decorators import cache_readonly from pandas.core.dtypes.concat import concat_compat @@ -42,6 +45,8 @@ from pandas.core.internals import concatenate_managers if TYPE_CHECKING: + from typing import Literal + from pandas import ( DataFrame, Series, @@ -55,15 +60,63 @@ @overload def concat( objs: Iterable[DataFrame] | Mapping[Hashable, DataFrame], - axis=0, - join: str = "outer", - ignore_index: bool = False, - keys=None, - levels=None, - names=None, - verify_integrity: bool = False, - sort: bool = False, - copy: bool = True, + axis: Literal[0, "index"] = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame: + ... + + +@overload +def concat( + objs: Iterable[DataFrame] | Mapping[Hashable, DataFrame], + axis: Literal[1, "columns"], + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame: + ... + + +@overload +def concat( + objs: Iterable[Series] | Mapping[Hashable, Series], + axis: Literal[0, "index"] = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> Series: + ... + + +@overload +def concat( + objs: Iterable[Series] | Mapping[Hashable, Series], + axis: Literal[1, "columns"], + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., ) -> DataFrame: ... @@ -71,9 +124,9 @@ def concat( @overload def concat( objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], - axis=0, - join: str = "outer", - ignore_index: bool = False, + axis: Axis = ..., + join: str = ..., + ignore_index: bool = ..., keys=None, levels=None, names=None, @@ -86,8 +139,8 @@ def concat( def concat( objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], - axis=0, - join="outer", + axis: Axis = 0, + join: str = "outer", ignore_index: bool = False, keys=None, levels=None, From 7bf21ec3fba57821893c2db9f931526ef9bf9726 Mon Sep 17 00:00:00 2001 From: pckSF Date: Sat, 1 May 2021 20:41:11 +0100 Subject: [PATCH 2/9] Fix NDFrame columns overload --- pandas/core/reshape/concat.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 6d868d4453a38..3989abd21e134 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -121,6 +121,38 @@ def concat( ... +@overload +def concat( + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], + axis: Literal[0, "index"] = ..., + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> FrameOrSeriesUnion: + ... + + +@overload +def concat( + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], + axis: Literal[1, "columns"], + join: str = ..., + ignore_index: bool = ..., + keys=..., + levels=..., + names=..., + verify_integrity: bool = ..., + sort: bool = ..., + copy: bool = ..., +) -> DataFrame: + ... + + @overload def concat( objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], From ee491ac8b7f36aad5bd0e851ca8f6428156ee3c1 Mon Sep 17 00:00:00 2001 From: pckSF Date: Sun, 11 Jul 2021 20:42:21 +0100 Subject: [PATCH 3/9] Fix FrameOrSeries --- pandas/core/reshape/concat.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 14a09fee0f912..aa83ef064a027 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -46,10 +46,7 @@ if TYPE_CHECKING: from typing import Literal - from pandas._typing import ( - Axis, - FrameOrSeriesUnion, - ) + from pandas._typing import Axis from pandas import ( DataFrame, @@ -137,7 +134,7 @@ def concat( verify_integrity: bool = ..., sort: bool = ..., copy: bool = ..., -) -> FrameOrSeriesUnion: +) -> DataFrame | Series: ... From 29d3da4dc82f8a013c755f7b93d53f7a8b07cd2f Mon Sep 17 00:00:00 2001 From: pckSF Date: Sat, 17 Jul 2021 23:38:57 +0100 Subject: [PATCH 4/9] Fix redundant casts --- pandas/core/groupby/generic.py | 8 ++------ pandas/core/reshape/concat.py | 3 +-- pandas/core/reshape/melt.py | 12 +++--------- pandas/core/reshape/reshape.py | 10 ++-------- 4 files changed, 8 insertions(+), 25 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 88d1baae86467..d9ae2e1c52d15 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -307,9 +307,7 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame: res_df = concat( results.values(), axis=1, keys=[key.label for key in results.keys()] ) - # error: Incompatible return value type (got "Union[DataFrame, Series]", - # expected "DataFrame") - return res_df # type: ignore[return-value] + return res_df indexed_output = {key.position: val for key, val in results.items()} output = self.obj._constructor_expanddim(indexed_output, index=None) @@ -546,9 +544,7 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series: result = self.obj._constructor(dtype=np.float64) result.name = self.obj.name - # error: Incompatible return value type (got "Union[DataFrame, Series]", - # expected "Series") - return result # type: ignore[return-value] + return result def _can_use_transform_fast(self, result) -> bool: return True diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index aa83ef064a027..6c65507738234 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -8,6 +8,7 @@ TYPE_CHECKING, Hashable, Iterable, + Literal, Mapping, cast, overload, @@ -44,8 +45,6 @@ from pandas.core.internals import concatenate_managers if TYPE_CHECKING: - from typing import Literal - from pandas._typing import Axis from pandas import ( diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index acd6e540aaae3..1b217a592987f 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -1,10 +1,7 @@ from __future__ import annotations import re -from typing import ( - TYPE_CHECKING, - cast, -) +from typing import TYPE_CHECKING import warnings import numpy as np @@ -34,10 +31,7 @@ from pandas.core.tools.numeric import to_numeric if TYPE_CHECKING: - from pandas import ( - DataFrame, - Series, - ) + from pandas import DataFrame @Appender(_shared_docs["melt"] % {"caller": "pd.melt(df, ", "other": "DataFrame.melt"}) @@ -136,7 +130,7 @@ def melt( for col in id_vars: id_data = frame.pop(col) if is_extension_array_dtype(id_data): - id_data = cast("Series", concat([id_data] * K, ignore_index=True)) + id_data = concat([id_data] * K, ignore_index=True) else: id_data = np.tile(id_data._values, K) mdata[col] = id_data diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 0edb150bdc273..377254580cdd0 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1,10 +1,7 @@ from __future__ import annotations import itertools -from typing import ( - TYPE_CHECKING, - cast, -) +from typing import TYPE_CHECKING import numpy as np @@ -1048,10 +1045,7 @@ def get_empty_frame(data) -> DataFrame: ) sparse_series.append(Series(data=sarr, index=index, name=col)) - out = concat(sparse_series, axis=1, copy=False) - # TODO: overload concat with Literal for axis - out = cast(DataFrame, out) - return out + return concat(sparse_series, axis=1, copy=False) else: # take on axis=1 + transpose to ensure ndarray layout is column-major From 93143b43ecc2572efa9bfbc19130e5ba1096a4ae Mon Sep 17 00:00:00 2001 From: pckSF Date: Mon, 19 Jul 2021 20:39:23 +0100 Subject: [PATCH 5/9] Import Axis outside of TYPE_CHECKING --- pandas/core/reshape/concat.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 6c65507738234..20e130e9aa62a 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -16,6 +16,7 @@ import numpy as np +from pandas._typing import Axis from pandas.util._decorators import ( cache_readonly, deprecate_nonkeyword_arguments, @@ -45,8 +46,6 @@ from pandas.core.internals import concatenate_managers if TYPE_CHECKING: - from pandas._typing import Axis - from pandas import ( DataFrame, Series, From 87832146f91a135967cc0636520083837aa65c3c Mon Sep 17 00:00:00 2001 From: pckSF Date: Mon, 19 Jul 2021 21:52:41 +0100 Subject: [PATCH 6/9] Remove redundant overloads --- pandas/core/reshape/concat.py | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 20e130e9aa62a..8ea5ed9ef6c8f 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -72,22 +72,6 @@ def concat( ... -@overload -def concat( - objs: Iterable[DataFrame] | Mapping[Hashable, DataFrame], - axis: Literal[1, "columns"], - join: str = ..., - ignore_index: bool = ..., - keys=..., - levels=..., - names=..., - verify_integrity: bool = ..., - sort: bool = ..., - copy: bool = ..., -) -> DataFrame: - ... - - @overload def concat( objs: Iterable[Series] | Mapping[Hashable, Series], @@ -104,22 +88,6 @@ def concat( ... -@overload -def concat( - objs: Iterable[Series] | Mapping[Hashable, Series], - axis: Literal[1, "columns"], - join: str = ..., - ignore_index: bool = ..., - keys=..., - levels=..., - names=..., - verify_integrity: bool = ..., - sort: bool = ..., - copy: bool = ..., -) -> DataFrame: - ... - - @overload def concat( objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], From eacfe5b0a66002572173365571d9ada24eb65495 Mon Sep 17 00:00:00 2001 From: pckSF Date: Mon, 26 Jul 2021 00:01:32 +0100 Subject: [PATCH 7/9] Mark mypy/issues/8354 return type issues --- pandas/core/arrays/categorical.py | 2 +- pandas/core/generic.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3fdb52a73dc3e..57e7c0af52c91 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2315,7 +2315,7 @@ def describe(self): from pandas.core.reshape.concat import concat result = concat([counts, freqs], axis=1) - result.columns = ["counts", "freqs"] + result.columns = Index(["counts", "freqs"]) result.index.name = "categories" return result diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3b8458fac2942..6205463aad5ad 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5749,7 +5749,8 @@ def astype( # GH 19920: retain column metadata after concat result = concat(results, axis=1, copy=False) result.columns = self.columns - return result + # https://github.com/python/mypy/issues/8354 + return cast(FrameOrSeries, result) @final def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: @@ -6112,7 +6113,8 @@ def convert_dtypes( for col_name, col in self.items() ] if len(results) > 0: - return concat(results, axis=1, copy=False) + # https://github.com/python/mypy/issues/8354 + return cast(FrameOrSeries, concat(results, axis=1, copy=False)) else: return self.copy() From 1f71cef45b01374d0a093bcbb0879d446d42de58 Mon Sep 17 00:00:00 2001 From: pckSF Date: Wed, 11 Aug 2021 15:09:11 +0200 Subject: [PATCH 8/9] Fix missing imports --- pandas/core/arrays/categorical.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 1e46703350725..1f3197597a83c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2308,6 +2308,7 @@ def describe(self): counts = self.value_counts(dropna=False) freqs = counts / counts.sum() + from pandas import Index from pandas.core.reshape.concat import concat result = concat([counts, freqs], axis=1) From 8d523c3969684f8e3537e674a61b2b2c09ca3ae6 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 21 Aug 2021 18:16:57 +0100 Subject: [PATCH 9/9] noop