From dd93ad3f18201a709fbf2cc1c528b0487dfa8b9e Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Wed, 21 Jun 2023 14:07:16 +0800 Subject: [PATCH 1/3] BUG: bad display for complex series with nan --- pandas/io/formats/format.py | 36 ++++++++++++------------ pandas/tests/io/formats/test_printing.py | 22 +++++++++++++++ 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index a425944647b5c..e8e72ad3be456 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1506,14 +1506,16 @@ def format_values_with(float_format): # default formatter leaves a space to the left when formatting # floats, must be consistent for left-justifying NaNs (GH #25061) - if self.justify == "left": - na_rep = " " + self.na_rep - else: - na_rep = self.na_rep + na_rep = " " + self.na_rep if self.justify == "left" else self.na_rep - # separate the wheat from the chaff + # different formatting strategies for complex and non-complex data + # need to distinguish complex and float NaNs (GH #53762) values = self.values is_complex = is_complex_dtype(values) + if is_complex: + na_rep = f"{na_rep}+{0:.{self.digits}f}j" + + # separate the wheat from the chaff values = format_with_na_rep(values, formatter, na_rep) if self.fixed_width: @@ -1912,22 +1914,20 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[s Separates the real and imaginary parts from the complex number, and executes the _trim_zeros_float method on each of those. """ - trimmed = [ - "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal)) - for x in str_complexes - ] + real_part, imag_part = [], [] + for x in str_complexes: + trimmed = re.split(r"([j+-])", x) + real_part.append("".join(trimmed[:-4])) + imag_part.append("".join(trimmed[-4:-2])) # pad strings to the length of the longest trimmed string for alignment - lengths = [len(s) for s in trimmed] - max_length = max(lengths) + n = len(str_complexes) + padded_parts = _trim_zeros_float(real_part + imag_part, decimal) padded = [ - s[: -((k - 1) // 2 + 1)] # real part - + (max_length - k) // 2 * "0" - + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / - - + s[-((k - 1) // 2) : -1] # imaginary part - + (max_length - k) // 2 * "0" - + s[-1] - for s, k in zip(trimmed, lengths) + padded_parts[i] # real part (including - or space, possibly "NaN") + + padded_parts[i + n] # imaginary part (including + or -) + + "j" + for i in range(n) ] return padded diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 6f578b45bf71d..56134828b2000 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -1,6 +1,7 @@ import string import numpy as np +import pytest import pandas._config.config as cf @@ -207,3 +208,24 @@ def test_multiindex_long_element(): "cccccccccccccccccccccc',)],\n )" ) assert str(data) == expected + + +@pytest.mark.parametrize( + "data,output", + [ + ([2, complex("nan"), 1], ["2.0+0.0j", "NaN+0.0j", "1.0+0.0j"]), + ([1.23j, complex("nan"), 1.23], ["0.00+1.23j", " NaN+0.00j", "1.23+0.00j"]), + ], +) +@pytest.mark.parametrize("as_frame", [True, False]) +def test_ser_df_with_complex_nans(data, output, as_frame): + # GH#53762 + obj = pd.Series(data) + if as_frame: + obj = obj.to_frame(name="val") + reprs = [f"{i} {val}" for i, val in enumerate(output)] + expected = f"{'val': >{len(reprs[0])}}\n" + "\n".join(reprs) + else: + reprs = [f"{i} {val}" for i, val in enumerate(output)] + expected = "\n".join(reprs) + "\ndtype: complex128" + assert str(obj) == expected From fecdc069f509142a3bd400ca4e5eecb964c7086a Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Thu, 22 Jun 2023 00:15:17 +0800 Subject: [PATCH 2/3] added comments --- pandas/io/formats/format.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index e8e72ad3be456..6cc00ffc8889c 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1916,11 +1916,17 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[s """ real_part, imag_part = [], [] for x in str_complexes: + # Complex numbers are represented as "(-)xxx(+/-)xxxj" + # The split will give [maybe "-", "xxx", "+/-", "xxx", "j", ""] + # Therefore, the imaginary part is the 4th and 3rd last elements, + # and the real part is everything before the imaginary part trimmed = re.split(r"([j+-])", x) real_part.append("".join(trimmed[:-4])) imag_part.append("".join(trimmed[-4:-2])) - # pad strings to the length of the longest trimmed string for alignment + # We want to align the lengths of the real and imaginary parts of each complex + # number, as well as the lengths the real (resp. complex) parts of all numbers + # in the array n = len(str_complexes) padded_parts = _trim_zeros_float(real_part + imag_part, decimal) padded = [ From 0228ac5c72da8bc06a93e0c2c7f05eaecf83fab6 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Thu, 22 Jun 2023 00:36:51 +0800 Subject: [PATCH 3/3] added more test cases --- pandas/tests/io/formats/test_printing.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 56134828b2000..dc106c9bebd45 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -213,8 +213,11 @@ def test_multiindex_long_element(): @pytest.mark.parametrize( "data,output", [ - ([2, complex("nan"), 1], ["2.0+0.0j", "NaN+0.0j", "1.0+0.0j"]), - ([1.23j, complex("nan"), 1.23], ["0.00+1.23j", " NaN+0.00j", "1.23+0.00j"]), + ([2, complex("nan"), 1], [" 2.0+0.0j", " NaN+0.0j", " 1.0+0.0j"]), + ([2, complex("nan"), -1], [" 2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]), + ([-2, complex("nan"), -1], ["-2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]), + ([-1.23j, complex("nan"), -1], ["-0.00-1.23j", " NaN+0.00j", "-1.00+0.00j"]), + ([1.23j, complex("nan"), 1.23], [" 0.00+1.23j", " NaN+0.00j", " 1.23+0.00j"]), ], ) @pytest.mark.parametrize("as_frame", [True, False]) @@ -223,9 +226,9 @@ def test_ser_df_with_complex_nans(data, output, as_frame): obj = pd.Series(data) if as_frame: obj = obj.to_frame(name="val") - reprs = [f"{i} {val}" for i, val in enumerate(output)] + reprs = [f"{i} {val}" for i, val in enumerate(output)] expected = f"{'val': >{len(reprs[0])}}\n" + "\n".join(reprs) else: - reprs = [f"{i} {val}" for i, val in enumerate(output)] + reprs = [f"{i} {val}" for i, val in enumerate(output)] expected = "\n".join(reprs) + "\ndtype: complex128" assert str(obj) == expected