Skip to content

Commit f4a976e

Browse files
committed
Fix.
1 parent 976d447 commit f4a976e

File tree

5 files changed

+29
-7
lines changed

5 files changed

+29
-7
lines changed

databricks/koalas/frame.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -649,8 +649,8 @@ def _reduce_for_stat_function(self, sfun, name, axis=None, numeric_only=True, mi
649649

650650
if min_count > 0:
651651
scol = F.when(
652-
Frame._count_expr(spark_column, spark_type) < min_count, F.lit(np.nan)
653-
).otherwise(scol)
652+
Frame._count_expr(spark_column, spark_type) >= min_count, scol
653+
)
654654

655655
exprs.append(scol.alias(name_like_string(label)))
656656
new_column_labels.append(label)

databricks/koalas/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1211,7 +1211,7 @@ def sum(spark_column, spark_type):
12111211
spark_column = spark_column.cast(LongType())
12121212
elif not isinstance(spark_type, NumericType):
12131213
raise TypeError("Could not convert {} to numeric".format(spark_type.simpleString()))
1214-
return F.sum(spark_column)
1214+
return F.coalesce(F.sum(spark_column), F.lit(0))
12151215

12161216
return self._reduce_for_stat_function(
12171217
sum, name="sum", axis=axis, numeric_only=numeric_only, min_count=min_count

databricks/koalas/series.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5859,11 +5859,10 @@ def _reduce_for_stat_function(self, sfun, name, axis=None, numeric_only=None, mi
58595859
scol = sfun(spark_column, spark_type)
58605860

58615861
if min_count > 0:
5862-
scol = F.when(
5863-
Frame._count_expr(spark_column, spark_type) < min_count, F.lit(np.nan)
5864-
).otherwise(scol)
5862+
scol = F.when(Frame._count_expr(spark_column, spark_type) >= min_count, scol)
58655863

5866-
return unpack_scalar(self._internal.spark_frame.select(scol))
5864+
result = unpack_scalar(self._internal.spark_frame.select(scol))
5865+
return result if result is not None else np.nan
58675866

58685867
def __getitem__(self, key):
58695868
try:

databricks/koalas/testing/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,8 @@ def assert_eq(self, left, right, check_exact=True, almost=False):
264264
self.assertTrue(len(left) == len(right))
265265
for litem, ritem in zip(left, right):
266266
self.assert_eq(litem, ritem, check_exact=check_exact, almost=almost)
267+
elif (lobj is not None and pd.isna(lobj)) and (robj is not None and pd.isna(robj)):
268+
pass
267269
else:
268270
if almost:
269271
self.assertAlmostEqual(lobj, robj)

databricks/koalas/tests/test_stats.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ def test_stat_functions(self):
5353
self._test_stat_functions(pdf.A, kdf.A)
5454
self._test_stat_functions(pdf, kdf)
5555

56+
# empty
57+
self._test_stat_functions(pdf.A.loc[[]], kdf.A.loc[[]])
58+
self._test_stat_functions(pdf.loc[[]], kdf.loc[[]])
59+
5660
def test_stat_functions_multiindex_column(self):
5761
arrays = [np.array(["A", "A", "B", "B"]), np.array(["one", "two", "one", "two"])]
5862
pdf = pd.DataFrame(np.random.randn(3, 4), index=["A", "B", "C"], columns=arrays)
@@ -70,6 +74,23 @@ def test_stat_functions_with_no_numeric_columns(self):
7074
kdf = ks.from_pandas(pdf)
7175
self._test_stat_functions(pdf, kdf)
7276

77+
def test_sum(self):
78+
pdf = pd.DataFrame({"a": [1, 2, 3, np.nan], "b": [0.1, np.nan, 0.3, np.nan]})
79+
kdf = ks.from_pandas(pdf)
80+
81+
self.assert_eq(kdf.sum(), pdf.sum())
82+
self.assert_eq(kdf.sum(axis=1), pdf.sum(axis=1))
83+
self.assert_eq(kdf.sum(min_count=3), pdf.sum(min_count=3))
84+
self.assert_eq(kdf.sum(axis=1, min_count=1), pdf.sum(axis=1, min_count=1))
85+
self.assert_eq(kdf.loc[[]].sum(), pdf.loc[[]].sum())
86+
self.assert_eq(kdf.loc[[]].sum(min_count=1), pdf.loc[[]].sum(min_count=1))
87+
88+
self.assert_eq(kdf["a"].sum(), pdf["a"].sum())
89+
self.assert_eq(kdf["a"].sum(min_count=3), pdf["a"].sum(min_count=3))
90+
self.assert_eq(kdf["b"].sum(min_count=3), pdf["b"].sum(min_count=3))
91+
self.assert_eq(kdf["a"].loc[[]].sum(), pdf["a"].loc[[]].sum())
92+
self.assert_eq(kdf["a"].loc[[]].sum(min_count=1), pdf["a"].loc[[]].sum(min_count=1))
93+
7394
def test_abs(self):
7495
pdf = pd.DataFrame(
7596
{

0 commit comments

Comments
 (0)