databricks · HyukjinKwon · Oct 24, 2019 · Oct 15, 2019 · Oct 15, 2019 · Oct 22, 2019
diff --git a/databricks/koalas/missing/series.py b/databricks/koalas/missing/series.py
@@ -105,7 +105,6 @@ class _MissingPandasLikeSeries(object):
     to_period = unsupported_function('to_period')
     to_sql = unsupported_function('to_sql')
     to_timestamp = unsupported_function('to_timestamp')
-    truncate = unsupported_function('truncate')
     tshift = unsupported_function('tshift')
     tz_convert = unsupported_function('tz_convert')
     tz_localize = unsupported_function('tz_localize')

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
@@ -3224,6 +3224,100 @@ def copy(self) -> 'Series':
         """
         return _col(DataFrame(self._internal.copy()))
 
+    def truncate(self, before=None, after=None, copy=True):
+        """
+        Truncates a sorted Series before and/or after some particular index value.
+        Series should have sorted index.
+
+        .. note:: the current implementation of truncate uses is_monotonic_increasing internally
+            This leads to move all data into single partition in single machine and could cause
+            serious performance degradation. Avoid this method against very large dataset.
+
+        Parameters
+        ----------
+        before : string, int
+            Truncate all rows before this index value
+        after : string, int
+            Truncate all rows after this index value
+        copy : boolean, default is True,
+            return a copy of the truncated section
+
+        Returns
+        -------
+        truncated : Series
+
+        Examples
+        --------
+
+
+        A Series has index that sorted integers.
+
+        >>> s = ks.Series([10, 20, 30, 40, 50, 60, 70],
+        ...               index=[1, 2, 3, 4, 5, 6, 7])
+        >>> s
+        1    10
+        2    20
+        3    30
+        4    40
+        5    50
+        6    60
+        7    70
+        Name: 0, dtype: int64
+
+        >>> s.truncate(2, 5)
+        2    20
+        3    30
+        4    40
+        5    50
+        Name: 0, dtype: int64
+
+        A Series has index that sorted strings.
+
+        >>> s = ks.Series([10, 20, 30, 40, 50, 60, 70],
+        ...               index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])
+        >>> s
+        a    10
+        b    20
+        c    30
+        d    40
+        e    50
+        f    60
+        g    70
+        Name: 0, dtype: int64
+
+        >>> s.truncate('b', 'e')
+        b    20
+        c    30
+        d    40
+        e    50
+        Name: 0, dtype: int64
+        """
+        indexes = self.index
+        indexes_increasing = indexes.is_monotonic_increasing
+        if not any([indexes_increasing, indexes.is_monotonic_decreasing]):
+            raise ValueError("truncate requires a sorted index")
+        if (before is None) and (after is None):
+            return self.copy() if copy else self
+
+        if before is None:
+            sdf = indexes._internal.sdf
+            idx_col_name = self._internal.index_columns[0]
+            before = sdf.first()[idx_col_name] if indexes_increasing \
+                else sdf.orderBy(self._internal.scol_for(idx_col_name).desc()).first()[idx_col_name]
+        if after is None:
+            sdf = indexes._internal.sdf
+            idx_col_name = self._internal.index_columns[0]
+            after = sdf.first()[idx_col_name] if not indexes_increasing \
+                else sdf.orderBy(self._internal.scol_for(idx_col_name).desc()).first()[idx_col_name]
+        if before > after:
+            raise ValueError("Truncate: %s must be after %s" % (after, before))
+        if indexes_increasing:
+            result = _col(self.to_frame()[before:after])
+        else:
+            result = _col(self.to_frame()[after:before])
+
+        return result.copy() if copy else result
+
     def keys(self):
         """
         Return alias for index.

diff --git a/databricks/koalas/tests/test_series.py b/databricks/koalas/tests/test_series.py
@@ -729,6 +729,30 @@ def test_duplicates(self):
         self.assert_eq(pser.drop_duplicates().sort_values(),
                        kser.drop_duplicates().sort_values())
 
+    def test_truncate(self):
+        pser1 = pd.Series([10, 20, 30, 40, 50, 60, 70], index=[1, 2, 3, 4, 5, 6, 7])
+        kser1 = ks.Series(pser1)
+        pser2 = pd.Series([10, 20, 30, 40, 50, 60, 70], index=[7, 6, 5, 4, 3, 2, 1])
+        kser2 = ks.Series(pser2)
+
+        self.assert_eq(kser1.truncate(), pser1.truncate())
+        self.assert_eq(kser1.truncate(before=2), pser1.truncate(before=2))
+        self.assert_eq(kser1.truncate(after=5), pser1.truncate(after=5))
+        self.assert_eq(kser1.truncate(copy=False), pser1.truncate(copy=False))
+        self.assert_eq(kser1.truncate(2, 5, copy=False), pser1.truncate(2, 5, copy=False))
+        self.assert_eq(kser2.truncate(4, 6), pser2.truncate(4, 6))
+        self.assert_eq(kser2.truncate(4, 6, copy=False), pser2.truncate(4, 6, copy=False))
+
+        kser = ks.Series([10, 20, 30, 40, 50, 60, 70], index=[1, 2, 3, 4, 3, 2, 1])
+        msg = "truncate requires a sorted index"
+        with self.assertRaisesRegex(ValueError, msg):
+            kser.truncate()
+
+        kser = ks.Series([10, 20, 30, 40, 50, 60, 70], index=[1, 2, 3, 4, 5, 6, 7])
+        msg = "Truncate: 2 must be after 5"
+        with self.assertRaisesRegex(ValueError, msg):
+            kser.truncate(5, 2)
+
     def test_getitem(self):
         pser = pd.Series([10, 20, 15, 30, 45], ['A', 'A', 'B', 'C', 'D'])
         kser = ks.Series(pser)

diff --git a/docs/source/reference/series.rst b/docs/source/reference/series.rst
@@ -149,6 +149,7 @@ Reindexing / Selection / Label manipulation
    Series.rename
    Series.reset_index
    Series.sample
+   Series.truncate
 
 Missing data handling
 ---------------------