Skip to content

Commit d43fce0

Browse files
FIX-#2239: Improve testing for case
Signed-off-by: Devin Petersohn <[email protected]>
1 parent 0d426b6 commit d43fce0

File tree

3 files changed

+155
-12
lines changed

3 files changed

+155
-12
lines changed

modin/engines/base/io/text/csv_reader.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -180,18 +180,6 @@ def _read(cls, filepath_or_buffer, **kwargs):
180180
if index_col is None:
181181
row_lengths = cls.materialize(index_ids)
182182
new_index = pandas.RangeIndex(sum(row_lengths))
183-
# pandas has a really weird edge case here.
184-
# The edge case is as follows:
185-
# If skiprows and names are specified, pandas assigns a row number based
186-
# on the number of dtypes that match above.
187-
# This number is not easy for us to compute and ensure matching behavior
188-
# with pandas, so we will just read 1 line with pandas and grab the start
189-
# value from that.
190-
if skiprows > 1 and kwargs.get("names", None) is not None:
191-
start = pandas.read_csv(
192-
filepath_or_buffer, skiprows=skiprows, nrows=1, names=names
193-
).index[0]
194-
new_index = pandas.RangeIndex(start, start + new_index.stop)
195183
else:
196184
index_objs = cls.materialize(index_ids)
197185
row_lengths = [len(o) for o in index_objs]

modin/pandas/test/data/issue_2239.csv

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
1585542839.000000, 1585542839.000000, 1585542839.000000
2+
32.000000, 32.000000, 32.000000
3+
-38,-14,51
4+
-38,-13,51
5+
-38,-14,51
6+
-38,-14,50
7+
-38,-13,51
8+
-38,-14,50
9+
-38,-14,51
10+
-38,-13,51
11+
-38,-14,51
12+
-38,-13,51
13+
-38,-14,51
14+
-38,-14,50
15+
-38,-13,51
16+
-38,-14,50
17+
-38,-14,51
18+
-38,-13,51
19+
-38,-14,51
20+
-38,-13,51
21+
-38,-14,51
22+
-38,-14,50
23+
-38,-13,51
24+
-38,-14,50
25+
-38,-14,51
26+
-38,-13,51
27+
-38,-14,51
28+
-38,-13,51
29+
-38,-14,51
30+
-38,-14,50
31+
-38,-13,51
32+
-38,-14,50
33+
-38,-14,51
34+
-38,-13,51
35+
-38,-14,51
36+
-38,-13,51
37+
-38,-14,51
38+
-38,-14,50
39+
-38,-13,51
40+
-38,-14,50
41+
-38,-14,51
42+
-38,-13,51
43+
-38,-14,51
44+
-38,-13,51
45+
-38,-14,51
46+
-38,-14,50
47+
-38,-13,51
48+
-38,-14,50
49+
-38,-14,51
50+
-38,-13,51
51+
-38,-14,51
52+
-38,-13,51
53+
-38,-14,51
54+
-38,-14,50
55+
-38,-13,51
56+
-38,-14,50
57+
-38,-14,51
58+
-38,-13,51
59+
-38,-14,51
60+
-38,-13,51
61+
-38,-14,51
62+
-38,-14,50
63+
-38,-13,51
64+
-38,-14,50
65+
-38,-14,51
66+
-38,-13,51
67+
-38,-14,51
68+
-38,-13,51
69+
-38,-14,51
70+
-38,-14,50
71+
-38,-13,51
72+
-38,-14,50
73+
-38,-14,51
74+
-38,-13,51
75+
-38,-14,51
76+
-38,-13,51
77+
-38,-14,51
78+
-38,-14,50
79+
-38,-13,51
80+
-38,-14,50
81+
-38,-14,51
82+
-38,-13,51
83+
-38,-14,51
84+
-38,-13,51
85+
-38,-14,51
86+
-38,-14,50
87+
-38,-13,51
88+
-38,-14,50
89+
-38,-14,51
90+
-38,-13,51
91+
-38,-14,51
92+
-38,-13,51
93+
-38,-14,51
94+
-38,-14,50
95+
-38,-13,51
96+
-38,-14,50
97+
-38,-14,51
98+
-38,-13,51
99+
-38,-14,51
100+
-38,-13,51
101+
-38,-14,51
102+
-38,-14,50
103+
-38,-13,51
104+
-38,-14,50
105+
-38,-14,51
106+
-38,-13,51
107+
-38,-14,51
108+
-38,-13,51
109+
-38,-14,51
110+
-38,-14,50
111+
-38,-13,51
112+
-38,-14,50
113+
-38,-14,51
114+
-38,-13,51
115+
-38,-14,51
116+
-38,-13,51
117+
-38,-14,51
118+
-38,-14,50
119+
-38,-13,51
120+
-38,-14,50
121+
-38,-14,51
122+
-38,-13,51
123+
-38,-14,51
124+
-38,-13,51
125+
-38,-14,51
126+
-38,-14,50
127+
-38,-13,51
128+
-38,-14,50
129+
-38,-14,51
130+
-38,-13,51
131+
-38,-14,51
132+
-38,-13,51
133+
-38,-14,51
134+
-38,-14,50
135+
-38,-13,51
136+
-38,-14,50
137+
-38,-14,51
138+
-38,-13,51
139+
-38,-14,51
140+
-38,-13,51
141+
-38,-14,51
142+
-38,-14,50
143+
-38,-13,51
144+
-38,-14,50
145+
-38,-14,51
146+
-38,-13,51

modin/pandas/test/test_io.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,15 @@ def test_from_csv_skiprows(make_csv_file, nrows):
11601160
df_equals(modin_df, pandas_df)
11611161

11621162

1163+
@pytest.mark.parametrize("names", [list("XYZ"), None])
1164+
@pytest.mark.parametrize("skiprows", [1, 2, 3, 4, None])
1165+
def test_from_csv_skiprows_names(names, skiprows):
1166+
path = "modin/pandas/test/data/issue_2239.csv"
1167+
pandas_df = pandas.read_csv(path, names=names, skiprows=skiprows)
1168+
modin_df = pd.read_csv(path, names=names, skiprows=skiprows)
1169+
df_equals(pandas_df, modin_df)
1170+
1171+
11631172
@pytest.mark.parametrize(
11641173
"encoding", ["latin8", "ISO-8859-1", "latin1", "iso-8859-1", "cp1252", "utf8"]
11651174
)

0 commit comments

Comments
 (0)