Skip to content

Commit 8a9a66c

Browse files
committed
Updated inheritance to use excel reader interface
1 parent 39cfecf commit 8a9a66c

File tree

2 files changed

+35
-55
lines changed

2 files changed

+35
-55
lines changed

pandas/io/excel/_odfreader.py

+35-29
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,61 @@
1+
from typing import List
2+
13
import pandas as pd
24

5+
from pandas._typing import FilePathOrBuffer, Scalar
6+
37
from pandas.compat._optional import import_optional_dependency
48

5-
from pandas.io.parsers import TextParser
9+
from pandas.io.excel._base import _BaseExcelReader
610

711

8-
class _ODFReader:
12+
class _ODFReader(_BaseExcelReader):
913
"""Read tables out of OpenDocument formatted files
1014
1115
Parameters
1216
----------
1317
filepath_or_buffer: string, path to be parsed or
1418
an open readable stream.
1519
"""
16-
def __init__(self, filepath_or_buffer):
20+
def __init__(self, filepath_or_buffer: FilePathOrBuffer):
1721
import_optional_dependency("odf")
18-
self.document = document_load(filepath_or_buffer)
19-
self.tables = self.document.getElementsByType(Table)
2022
super().__init__(filepath_or_buffer)
2123

2224
@property
23-
def sheet_names(self):
25+
def _workbook_class(self):
26+
from odf.opendocument import OpenDocument
27+
return OpenDocument
28+
29+
def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
30+
from odf.opendocument import load
31+
return load(filepath_or_buffer)
32+
33+
@property
34+
def sheet_names(self) -> List[str]:
2435
"""Return a list of sheet names present in the document"""
2536
from odf.namespaces import TABLENS
26-
return [t.attributes[(TABLENS, 'name')] for t in self.tables]
37+
from odf.table import Table
2738

28-
def get_sheet_by_index(self, index):
29-
return self.tables[index]
39+
tables = self.book.getElementsByType(Table)
40+
return [t.attributes[(TABLENS, 'name')] for t in tables]
3041

31-
def get_sheet_by_name(self, name):
32-
i = self.sheet_names.index(name)
33-
return self.tables[i]
42+
def get_sheet_by_index(self, index: int):
43+
from odf.table import Table
44+
tables = self.book.getElementsByType(Table)
45+
return tables[index]
3446

35-
def _get_sheet(self, name):
36-
"""Given a sheet name or index, return the root ODF Table node
37-
"""
38-
if isinstance(name, str):
39-
return self.get_sheet_by_name(name)
40-
elif isinstance(name, int):
41-
return self.get_sheet_by_index(name)
42-
else:
43-
raise ValueError(
44-
'Unrecognized sheet identifier type {}. Please use'
45-
'a string or integer'.format(type(name)))
47+
def get_sheet_by_name(self, name: str):
48+
from odf.namespaces import TABLENS
49+
from odf.table import Table
50+
51+
tables = self.book.getElementsByType(Table)
4652

47-
def parse(self, sheet_name=0, **kwds):
48-
tree = self._get_sheet(sheet_name)
49-
data = self.get_sheet_data(tree, convert_float=False)
50-
parser = TextParser(data, **kwds)
51-
return parser.read()
53+
key = (TABLENS, "name")
54+
for table in tables:
55+
if table.attributes[key] == name:
56+
return table
57+
58+
raise ValueError("sheet {name} not found".format(name))
5259

5360
def get_sheet_data(self, sheet, convert_float):
5461
"""Parse an ODF Table into a list of lists
@@ -97,7 +104,6 @@ def get_sheet_data(self, sheet, convert_float):
97104

98105
def _get_row_repeat(self, row):
99106
"""Return number of times this row was repeated
100-
101107
Repeating an empty row appeared to be a common way
102108
of representing sparse rows in the table.
103109
"""

pandas/tests/io/excel/test_odf.py

-26
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,6 @@
99
pytest.importorskip("odf")
1010

1111

12-
def test_get_sheet(datapath):
13-
from pandas.io.excel._odfreader import ODFReader
14-
15-
pth = datapath("io", "data", "datatypes.ods")
16-
book = ODFReader(pth)
17-
18-
assert len(book.sheet_names) == 1
19-
assert book.sheet_names == ['Sheet1']
20-
21-
22-
def test_get_sheet_raises(datapath):
23-
from pandas.io.excel._odfreader import ODFReader
24-
25-
pth = datapath("io", "data", 'datatypes.ods')
26-
book = ODFReader(pth)
27-
28-
with pytest.raises(ValueError):
29-
book._get_sheet(3.14)
30-
31-
with pytest.raises(ValueError):
32-
book.get_sheet_by_name("Invalid Sheet 77")
33-
34-
with pytest.raises(IndexError):
35-
book.get_sheet_by_index(-33)
36-
37-
3812
def test_read_types(datapath):
3913
path = datapath("io", "data", "datatypes.ods")
4014
sheet = pd.read_excel(path, header=None, engine='odf')

0 commit comments

Comments
 (0)