From 439d17ae739f3c465867f8276067b428af730473 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Thu, 17 Sep 2020 19:01:33 +0100
Subject: [PATCH 1/4] PERF: construct DataFrame with string array and dtype=str

---
 pandas/core/dtypes/cast.py            |  2 +-
 pandas/core/internals/construction.py | 20 +++++++++++---------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 05759ffb43dde..747862348d754 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1618,7 +1618,7 @@ def construct_1d_ndarray_preserving_na(
     array(['1.0', '2.0', None], dtype=object)
     """
 
-    if dtype is not None and dtype.kind == "U":
+    if is_string_dtype(dtype):
         subarr = lib.ensure_string_array(values, convert_na_value=False, copy=copy)
     else:
         subarr = np.array(values, dtype=dtype, copy=copy)
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 2d4163e0dee89..3f82035be0e67 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -13,6 +13,7 @@
 
 from pandas.core.dtypes.cast import (
     construct_1d_arraylike_from_scalar,
+    construct_1d_ndarray_preserving_na,
     maybe_cast_to_datetime,
     maybe_convert_platform,
     maybe_infer_to_datetimelike,
@@ -189,15 +190,16 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
     # the dtypes will be coerced to a single dtype
     values = _prep_ndarray(values, copy=copy)
 
-    if dtype is not None:
-        if not is_dtype_equal(values.dtype, dtype):
-            try:
-                values = values.astype(dtype)
-            except Exception as orig:
-                # e.g. ValueError when trying to cast object dtype to float64
-                raise ValueError(
-                    f"failed to cast to '{dtype}' (Exception was: {orig})"
-                ) from orig
+    if not is_dtype_equal(values.dtype, dtype):
+        try:
+            values = construct_1d_ndarray_preserving_na(
+                values.ravel(), dtype=dtype, copy=False
+            ).reshape(values.shape)
+        except Exception as orig:
+            # e.g. ValueError when trying to cast object dtype to float64
+            raise ValueError(
+                f"failed to cast to '{dtype}' (Exception was: {orig})"
+            ) from orig
 
     # _prep_ndarray ensures that values.ndim == 2 at this point
     index, columns = _get_axes(

From f1d66cb6163f655cb0a4a0aefee7cb6aa8cb1669 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Thu, 17 Sep 2020 19:09:22 +0100
Subject: [PATCH 2/4] add GH number

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 1286577748afa..177485c6dfe7e 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -222,7 +222,7 @@ Deprecations
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-- Performance improvements when creating Series with dtype `str` or :class:`StringDtype` from array with many string elements (:issue:`36304`, :issue:`36317`, :issue:`36325`)
+- Performance improvements when creating DataFrame or Series with dtype `str` or :class:`StringDtype` from array with many string elements (:issue:`36304`, :issue:`36317`, :issue:`36325`, :issue:`36432`)
 - Performance improvement in :meth:`GroupBy.agg` with the ``numba`` engine (:issue:`35759`)
 - Performance improvements when creating :meth:`pd.Series.map` from a huge dictionary (:issue:`34717`)
 - Performance improvement in :meth:`GroupBy.transform` with the ``numba`` engine (:issue:`36240`)

From 5d1dd81d7310534bb3c7a16e65503ec5d9798aa7 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Thu, 17 Sep 2020 19:19:32 +0100
Subject: [PATCH 3/4] fix dtype issues

---
 pandas/core/dtypes/cast.py            | 2 +-
 pandas/core/internals/construction.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 747862348d754..05759ffb43dde 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1618,7 +1618,7 @@ def construct_1d_ndarray_preserving_na(
     array(['1.0', '2.0', None], dtype=object)
     """
 
-    if is_string_dtype(dtype):
+    if dtype is not None and dtype.kind == "U":
         subarr = lib.ensure_string_array(values, convert_na_value=False, copy=copy)
     else:
         subarr = np.array(values, dtype=dtype, copy=copy)
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 3f82035be0e67..d19a0dd8f29e3 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -190,7 +190,7 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
     # the dtypes will be coerced to a single dtype
     values = _prep_ndarray(values, copy=copy)
 
-    if not is_dtype_equal(values.dtype, dtype):
+    if dtype is not None and not is_dtype_equal(values.dtype, dtype):
         try:
             values = construct_1d_ndarray_preserving_na(
                 values.ravel(), dtype=dtype, copy=False

From 256ad703a46462bf5fe2b54ac9cc01da69934142 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 19 Sep 2020 08:14:36 +0100
Subject: [PATCH 4/4] add ASVs

---
 asv_bench/benchmarks/strings.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index 2023858181baa..d8b35abb94b9d 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -13,13 +13,20 @@ class Construction:
     param_names = ["dtype"]
 
     def setup(self, dtype):
-        self.data = tm.rands_array(nchars=10 ** 5, size=10)
+        self.series_arr = tm.rands_array(nchars=10, size=10 ** 5)
+        self.frame_arr = self.series_arr.reshape((50_000, 2)).copy()
 
-    def time_construction(self, dtype):
-        Series(self.data, dtype=dtype)
+    def time_series_construction(self, dtype):
+        Series(self.series_arr, dtype=dtype)
 
-    def peakmem_construction(self, dtype):
-        Series(self.data, dtype=dtype)
+    def peakmem_series_construction(self, dtype):
+        Series(self.series_arr, dtype=dtype)
+
+    def time_frame_construction(self, dtype):
+        DataFrame(self.frame_arr, dtype=dtype)
+
+    def peakmem_frame_construction(self, dtype):
+        DataFrame(self.frame_arr, dtype=dtype)
 
 
 class Methods: