Skip to content

Commit 0b4f27d

Browse files
authored
repr and repr_html improvements (#258)
1 parent e7e3e02 commit 0b4f27d

File tree

7 files changed

+112
-32
lines changed

7 files changed

+112
-32
lines changed

databricks/koalas/frame.py

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,13 @@ def to_koalas(self):
733733
__index_level_0__ col1 col2
734734
0 0 1 3
735735
1 1 2 4
736+
737+
Calling to_koalas on a Koalas DataFrame simply returns itself.
738+
739+
>>> df.to_koalas()
740+
col1 col2
741+
0 1 3
742+
1 2 4
736743
"""
737744
if isinstance(self, DataFrame):
738745
return self
@@ -1298,33 +1305,45 @@ def sort_values(self, by, ascending=True, inplace=False, na_position='last'):
12981305
Examples
12991306
--------
13001307
>>> df = ks.DataFrame({
1301-
... 'col1': ['A', 'A', 'B', None, 'D', 'C'],
1302-
... 'col2': [2, 1, 9, 8, 7, 4],
1303-
... 'col3': [0, 1, 9, 4, 2, 3],
1308+
... 'col1': ['A', 'B', None, 'D', 'C'],
1309+
... 'col2': [2, 9, 8, 7, 4],
1310+
... 'col3': [0, 9, 4, 2, 3],
13041311
... })
13051312
>>> df
13061313
col1 col2 col3
13071314
0 A 2 0
1308-
1 A 1 1
1309-
2 B 9 9
1310-
3 None 8 4
1311-
4 D 7 2
1312-
5 C 4 3
1315+
1 B 9 9
1316+
2 None 8 4
1317+
3 D 7 2
1318+
4 C 4 3
13131319
13141320
Sort by col1
13151321
13161322
>>> df.sort_values(by=['col1'])
13171323
col1 col2 col3
13181324
0 A 2 0
1319-
1 A 1 1
1320-
2 B 9 9
1321-
5 C 4 3
1322-
4 D 7 2
1323-
3 None 8 4
1325+
1 B 9 9
1326+
4 C 4 3
1327+
3 D 7 2
1328+
2 None 8 4
13241329
1330+
Sort Descending
1331+
1332+
>>> df.sort_values(by='col1', ascending=False)
1333+
col1 col2 col3
1334+
3 D 7 2
1335+
4 C 4 3
1336+
1 B 9 9
1337+
0 A 2 0
1338+
2 None 8 4
13251339
13261340
Sort by multiple columns
13271341
1342+
>>> df = ks.DataFrame({
1343+
... 'col1': ['A', 'A', 'B', None, 'D', 'C'],
1344+
... 'col2': [2, 1, 9, 8, 7, 4],
1345+
... 'col3': [0, 1, 9, 4, 2, 3],
1346+
... })
13281347
>>> df.sort_values(by=['col1', 'col2'])
13291348
col1 col2 col3
13301349
1 A 1 1
@@ -1333,17 +1352,6 @@ def sort_values(self, by, ascending=True, inplace=False, na_position='last'):
13331352
5 C 4 3
13341353
4 D 7 2
13351354
3 None 8 4
1336-
1337-
Sort Descending
1338-
1339-
>>> df.sort_values(by='col1', ascending=False)
1340-
col1 col2 col3
1341-
4 D 7 2
1342-
5 C 4 3
1343-
2 B 9 9
1344-
0 A 2 0
1345-
1 A 1 1
1346-
3 None 8 4
13471355
"""
13481356
if isinstance(by, string_types):
13491357
by = [by]
@@ -1496,7 +1504,10 @@ def _pd_getitem(self, key):
14961504
raise NotImplementedError(key)
14971505

14981506
def __repr__(self):
1499-
return repr(self.toPandas())
1507+
return repr(self.head(max_display_count).to_pandas())
1508+
1509+
def _repr_html_(self):
1510+
return self.head(max_display_count).to_pandas()._repr_html_()
15001511

15011512
def __getitem__(self, key):
15021513
return self._pd_getitem(key)
@@ -1541,9 +1552,6 @@ def __dir__(self):
15411552
fields = [f for f in self._sdf.schema.fieldNames() if ' ' not in f]
15421553
return super(DataFrame, self).__dir__() + fields
15431554

1544-
def _repr_html_(self):
1545-
return self.head(max_display_count).toPandas()._repr_html_()
1546-
15471555
@classmethod
15481556
def _validate_axis(cls, axis=0):
15491557
if axis not in (0, 1, 'index', 'columns', None):

databricks/koalas/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def groupby(self, by):
164164
'got [%s]' % (df_or_s,))
165165

166166
def compute(self):
167-
"""Alias of `toPandas()` to mimic dask for easily porting tests."""
167+
"""Alias of `to_pandas()` to mimic dask for easily porting tests."""
168168
return self.toPandas()
169169

170170
@staticmethod

databricks/koalas/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -731,7 +731,7 @@ def __str__(self):
731731
return self._pandas_orig_repr()
732732

733733
def __repr__(self):
734-
return repr(self.head(max_display_count).toPandas())
734+
return repr(self.head(max_display_count).to_pandas())
735735

736736
def __dir__(self):
737737
if not isinstance(self.schema, StructType):

databricks/koalas/tests/test_dataframe.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import pandas as pd
2121

2222
from databricks import koalas
23+
from databricks.koalas.generic import max_display_count
2324
from databricks.koalas.testing.utils import ReusedSQLTestCase, SQLTestUtils
2425
from databricks.koalas.exceptions import PandasNotImplementedError
2526
from databricks.koalas.missing.frame import _MissingPandasLikeDataFrame
@@ -68,8 +69,31 @@ def test_dataframe(self):
6869

6970
self.assertEqual(ddf.a.notnull().alias("x").name, "x")
7071

71-
def test_empty_dataframe(self):
72+
def test_repr(self):
73+
# Make sure we only fetch max_display_count
74+
self.assertEqual(koalas.range(1001).__repr__(),
75+
koalas.range(max_display_count).__repr__())
76+
77+
def test_repr_cache_invalidation(self):
78+
# If there is any cache, inplace operations should invalidate it.
79+
df = koalas.range(10)
80+
df.__repr__()
81+
df['a'] = df['id']
82+
self.assertEqual(df.__repr__(), df.to_pandas().__repr__())
83+
84+
def test_repr_html(self):
85+
# Make sure we only fetch max_display_count
86+
self.assertEqual(koalas.range(1001)._repr_html_(),
87+
koalas.range(max_display_count)._repr_html_())
88+
89+
def test_repr_html_cache_invalidation(self):
90+
# If there is any cache, inplace operations should invalidate it.
91+
df = koalas.range(10)
92+
df._repr_html_()
93+
df['a'] = df['id']
94+
self.assertEqual(df._repr_html_(), df.to_pandas()._repr_html_())
7295

96+
def test_empty_dataframe(self):
7397
pdf = pd.DataFrame({'a': pd.Series([], dtype='i1'),
7498
'b': pd.Series([], dtype='str')})
7599

databricks/koalas/tests/test_series.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from databricks import koalas
2323
from databricks.koalas import Series
24+
from databricks.koalas.generic import max_display_count
2425
from databricks.koalas.testing.utils import ReusedSQLTestCase, SQLTestUtils
2526
from databricks.koalas.exceptions import PandasNotImplementedError
2627
from databricks.koalas.missing.series import _MissingPandasLikeSeries
@@ -43,6 +44,18 @@ def test_series(self):
4344

4445
# TODO: self.assert_eq(d + 1, pdf + 1)
4546

47+
def test_repr(self):
48+
# Make sure we only fetch max_display_count
49+
self.assertEqual(koalas.range(1001)['id'].__repr__(),
50+
koalas.range(max_display_count)['id'].__repr__())
51+
52+
def test_repr_cache_invalidation(self):
53+
# If there is any cache, inplace operations should invalidate it.
54+
s = koalas.range(10)['id']
55+
s.__repr__()
56+
s.rename('a', inplace=True)
57+
self.assertEqual(s.__repr__(), s.rename("a").__repr__())
58+
4659
def test_empty_series(self):
4760
a = pd.Series([], dtype='i1')
4861
b = pd.Series([], dtype='str')

databricks/koalas/tests/test_utils.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
import pandas as pd
1818

1919
from databricks.koalas.testing.utils import ReusedSQLTestCase, SQLTestUtils
20-
from databricks.koalas.utils import validate_arguments_and_invoke_function
20+
from databricks.koalas.utils import lazy_property, validate_arguments_and_invoke_function
21+
22+
some_global_variable = 0
2123

2224

2325
class UtilsTest(ReusedSQLTestCase, SQLTestUtils):
@@ -43,3 +45,20 @@ def test_validate_arguments_and_invoke_function(self):
4345
# to a non-default value
4446
with self.assertRaises(TypeError):
4547
self.to_html(unsupported_param=1)
48+
49+
def test_lazy_property(self):
50+
obj = TestClassForLazyProp()
51+
# If lazy prop is not working, the second test would fail (because it'd be 2)
52+
self.assert_eq(obj.lazy_prop, 1)
53+
self.assert_eq(obj.lazy_prop, 1)
54+
55+
56+
class TestClassForLazyProp:
57+
58+
def __init__(self):
59+
self.some_variable = 0
60+
61+
@lazy_property
62+
def lazy_prop(self):
63+
self.some_variable += 1
64+
return self.some_variable

databricks/koalas/utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,19 @@ def validate_arguments_and_invoke_function(pobj: Union[pd.DataFrame, pd.Series],
7373

7474
args['self'] = pobj
7575
return pandas_func(**args)
76+
77+
78+
def lazy_property(fn):
79+
"""
80+
Decorator that makes a property lazy-evaluated.
81+
82+
Copied from https://stevenloria.com/lazy-properties/
83+
"""
84+
attr_name = '_lazy_' + fn.__name__
85+
86+
@property
87+
def _lazy_property(self):
88+
if not hasattr(self, attr_name):
89+
setattr(self, attr_name, fn(self))
90+
return getattr(self, attr_name)
91+
return _lazy_property

0 commit comments

Comments
 (0)