FIX-#2239: Improve testing for case

devin-petersohn · devin-petersohn · commit d43fce0e17b4 · 2020-10-27T10:03:10.000-05:00
Signed-off-by: Devin Petersohn &lt;devin.petersohn@gmail.com&gt;
diff --git a/modin/engines/base/io/text/csv_reader.py b/modin/engines/base/io/text/csv_reader.py
@@ -180,18 +180,6 @@ def _read(cls, filepath_or_buffer, **kwargs):
         if index_col is None:
             row_lengths = cls.materialize(index_ids)
             new_index = pandas.RangeIndex(sum(row_lengths))
-            # pandas has a really weird edge case here.
-            # The edge case is as follows:
-            # If skiprows and names are specified, pandas assigns a row number based
-            # on the number of dtypes that match above.
-            # This number is not easy for us to compute and ensure matching behavior
-            # with pandas, so we will just read 1 line with pandas and grab the start
-            # value from that.
-            if skiprows > 1 and kwargs.get("names", None) is not None:
-                start = pandas.read_csv(
-                    filepath_or_buffer, skiprows=skiprows, nrows=1, names=names
-                ).index[0]
-                new_index = pandas.RangeIndex(start, start + new_index.stop)
         else:
             index_objs = cls.materialize(index_ids)
             row_lengths = [len(o) for o in index_objs]
diff --git a/modin/pandas/test/data/issue_2239.csv b/modin/pandas/test/data/issue_2239.csv
@@ -0,0 +1,146 @@
+1585542839.000000, 1585542839.000000, 1585542839.000000
+32.000000, 32.000000, 32.000000
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-13,51
+-38,-14,51
+-38,-14,50
+-38,-13,51
+-38,-14,50
+-38,-14,51
+-38,-13,51
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
@@ -1160,6 +1160,15 @@ def test_from_csv_skiprows(make_csv_file, nrows):
     df_equals(modin_df, pandas_df)
 
 
+@pytest.mark.parametrize("names", [list("XYZ"), None])
+@pytest.mark.parametrize("skiprows", [1, 2, 3, 4, None])
+def test_from_csv_skiprows_names(names, skiprows):
+    path = "modin/pandas/test/data/issue_2239.csv"
+    pandas_df = pandas.read_csv(path, names=names, skiprows=skiprows)
+    modin_df = pd.read_csv(path, names=names, skiprows=skiprows)
+    df_equals(pandas_df, modin_df)
+
+
 @pytest.mark.parametrize(
     "encoding", ["latin8", "ISO-8859-1", "latin1", "iso-8859-1", "cp1252", "utf8"]
 )