From dd93ad3f18201a709fbf2cc1c528b0487dfa8b9e Mon Sep 17 00:00:00 2001
From: Charlie-XIAO <yx2436@nyu.edu>
Date: Wed, 21 Jun 2023 14:07:16 +0800
Subject: [PATCH 1/3] BUG: bad display for complex series with nan

---
 pandas/io/formats/format.py              | 36 ++++++++++++------------
 pandas/tests/io/formats/test_printing.py | 22 +++++++++++++++
 2 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index a425944647b5c..e8e72ad3be456 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1506,14 +1506,16 @@ def format_values_with(float_format):
 
             # default formatter leaves a space to the left when formatting
             # floats, must be consistent for left-justifying NaNs (GH #25061)
-            if self.justify == "left":
-                na_rep = " " + self.na_rep
-            else:
-                na_rep = self.na_rep
+            na_rep = " " + self.na_rep if self.justify == "left" else self.na_rep
 
-            # separate the wheat from the chaff
+            # different formatting strategies for complex and non-complex data
+            # need to distinguish complex and float NaNs (GH #53762)
             values = self.values
             is_complex = is_complex_dtype(values)
+            if is_complex:
+                na_rep = f"{na_rep}+{0:.{self.digits}f}j"
+
+            # separate the wheat from the chaff
             values = format_with_na_rep(values, formatter, na_rep)
 
             if self.fixed_width:
@@ -1912,22 +1914,20 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[s
     Separates the real and imaginary parts from the complex number, and
     executes the _trim_zeros_float method on each of those.
     """
-    trimmed = [
-        "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal))
-        for x in str_complexes
-    ]
+    real_part, imag_part = [], []
+    for x in str_complexes:
+        trimmed = re.split(r"([j+-])", x)
+        real_part.append("".join(trimmed[:-4]))
+        imag_part.append("".join(trimmed[-4:-2]))
 
     # pad strings to the length of the longest trimmed string for alignment
-    lengths = [len(s) for s in trimmed]
-    max_length = max(lengths)
+    n = len(str_complexes)
+    padded_parts = _trim_zeros_float(real_part + imag_part, decimal)
     padded = [
-        s[: -((k - 1) // 2 + 1)]  # real part
-        + (max_length - k) // 2 * "0"
-        + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)]  # + / -
-        + s[-((k - 1) // 2) : -1]  # imaginary part
-        + (max_length - k) // 2 * "0"
-        + s[-1]
-        for s, k in zip(trimmed, lengths)
+        padded_parts[i]  # real part (including - or space, possibly "NaN")
+        + padded_parts[i + n]  # imaginary part (including + or -)
+        + "j"
+        for i in range(n)
     ]
     return padded
 
diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py
index 6f578b45bf71d..56134828b2000 100644
--- a/pandas/tests/io/formats/test_printing.py
+++ b/pandas/tests/io/formats/test_printing.py
@@ -1,6 +1,7 @@
 import string
 
 import numpy as np
+import pytest
 
 import pandas._config.config as cf
 
@@ -207,3 +208,24 @@ def test_multiindex_long_element():
         "cccccccccccccccccccccc',)],\n           )"
     )
     assert str(data) == expected
+
+
+@pytest.mark.parametrize(
+    "data,output",
+    [
+        ([2, complex("nan"), 1], ["2.0+0.0j", "NaN+0.0j", "1.0+0.0j"]),
+        ([1.23j, complex("nan"), 1.23], ["0.00+1.23j", " NaN+0.00j", "1.23+0.00j"]),
+    ],
+)
+@pytest.mark.parametrize("as_frame", [True, False])
+def test_ser_df_with_complex_nans(data, output, as_frame):
+    # GH#53762
+    obj = pd.Series(data)
+    if as_frame:
+        obj = obj.to_frame(name="val")
+        reprs = [f"{i}  {val}" for i, val in enumerate(output)]
+        expected = f"{'val': >{len(reprs[0])}}\n" + "\n".join(reprs)
+    else:
+        reprs = [f"{i}    {val}" for i, val in enumerate(output)]
+        expected = "\n".join(reprs) + "\ndtype: complex128"
+    assert str(obj) == expected

From fecdc069f509142a3bd400ca4e5eecb964c7086a Mon Sep 17 00:00:00 2001
From: Charlie-XIAO <yx2436@nyu.edu>
Date: Thu, 22 Jun 2023 00:15:17 +0800
Subject: [PATCH 2/3] added comments

---
 pandas/io/formats/format.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index e8e72ad3be456..6cc00ffc8889c 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1916,11 +1916,17 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[s
     """
     real_part, imag_part = [], []
     for x in str_complexes:
+        # Complex numbers are represented as "(-)xxx(+/-)xxxj"
+        # The split will give [maybe "-", "xxx", "+/-", "xxx", "j", ""]
+        # Therefore, the imaginary part is the 4th and 3rd last elements,
+        # and the real part is everything before the imaginary part
         trimmed = re.split(r"([j+-])", x)
         real_part.append("".join(trimmed[:-4]))
         imag_part.append("".join(trimmed[-4:-2]))
 
-    # pad strings to the length of the longest trimmed string for alignment
+    # We want to align the lengths of the real and imaginary parts of each complex
+    # number, as well as the lengths the real (resp. complex) parts of all numbers
+    # in the array
     n = len(str_complexes)
     padded_parts = _trim_zeros_float(real_part + imag_part, decimal)
     padded = [

From 0228ac5c72da8bc06a93e0c2c7f05eaecf83fab6 Mon Sep 17 00:00:00 2001
From: Charlie-XIAO <yx2436@nyu.edu>
Date: Thu, 22 Jun 2023 00:36:51 +0800
Subject: [PATCH 3/3] added more test cases

---
 pandas/tests/io/formats/test_printing.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py
index 56134828b2000..dc106c9bebd45 100644
--- a/pandas/tests/io/formats/test_printing.py
+++ b/pandas/tests/io/formats/test_printing.py
@@ -213,8 +213,11 @@ def test_multiindex_long_element():
 @pytest.mark.parametrize(
     "data,output",
     [
-        ([2, complex("nan"), 1], ["2.0+0.0j", "NaN+0.0j", "1.0+0.0j"]),
-        ([1.23j, complex("nan"), 1.23], ["0.00+1.23j", " NaN+0.00j", "1.23+0.00j"]),
+        ([2, complex("nan"), 1], [" 2.0+0.0j", " NaN+0.0j", " 1.0+0.0j"]),
+        ([2, complex("nan"), -1], [" 2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]),
+        ([-2, complex("nan"), -1], ["-2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]),
+        ([-1.23j, complex("nan"), -1], ["-0.00-1.23j", "  NaN+0.00j", "-1.00+0.00j"]),
+        ([1.23j, complex("nan"), 1.23], [" 0.00+1.23j", "  NaN+0.00j", " 1.23+0.00j"]),
     ],
 )
 @pytest.mark.parametrize("as_frame", [True, False])
@@ -223,9 +226,9 @@ def test_ser_df_with_complex_nans(data, output, as_frame):
     obj = pd.Series(data)
     if as_frame:
         obj = obj.to_frame(name="val")
-        reprs = [f"{i}  {val}" for i, val in enumerate(output)]
+        reprs = [f"{i} {val}" for i, val in enumerate(output)]
         expected = f"{'val': >{len(reprs[0])}}\n" + "\n".join(reprs)
     else:
-        reprs = [f"{i}    {val}" for i, val in enumerate(output)]
+        reprs = [f"{i}   {val}" for i, val in enumerate(output)]
         expected = "\n".join(reprs) + "\ndtype: complex128"
     assert str(obj) == expected