|
| 1 | +from typing import List |
| 2 | + |
1 | 3 | import pandas as pd
|
2 | 4 |
|
| 5 | +from pandas._typing import FilePathOrBuffer, Scalar |
| 6 | + |
3 | 7 | from pandas.compat._optional import import_optional_dependency
|
4 | 8 |
|
5 |
| -from pandas.io.parsers import TextParser |
| 9 | +from pandas.io.excel._base import _BaseExcelReader |
6 | 10 |
|
7 | 11 |
|
8 |
| -class _ODFReader: |
| 12 | +class _ODFReader(_BaseExcelReader): |
9 | 13 | """Read tables out of OpenDocument formatted files
|
10 | 14 |
|
11 | 15 | Parameters
|
12 | 16 | ----------
|
13 | 17 | filepath_or_buffer: string, path to be parsed or
|
14 | 18 | an open readable stream.
|
15 | 19 | """
|
16 |
| - def __init__(self, filepath_or_buffer): |
| 20 | + def __init__(self, filepath_or_buffer: FilePathOrBuffer): |
17 | 21 | import_optional_dependency("odf")
|
18 |
| - self.document = document_load(filepath_or_buffer) |
19 |
| - self.tables = self.document.getElementsByType(Table) |
20 | 22 | super().__init__(filepath_or_buffer)
|
21 | 23 |
|
22 | 24 | @property
|
23 |
| - def sheet_names(self): |
| 25 | + def _workbook_class(self): |
| 26 | + from odf.opendocument import OpenDocument |
| 27 | + return OpenDocument |
| 28 | + |
| 29 | + def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): |
| 30 | + from odf.opendocument import load |
| 31 | + return load(filepath_or_buffer) |
| 32 | + |
| 33 | + @property |
| 34 | + def sheet_names(self) -> List[str]: |
24 | 35 | """Return a list of sheet names present in the document"""
|
25 | 36 | from odf.namespaces import TABLENS
|
26 |
| - return [t.attributes[(TABLENS, 'name')] for t in self.tables] |
| 37 | + from odf.table import Table |
27 | 38 |
|
28 |
| - def get_sheet_by_index(self, index): |
29 |
| - return self.tables[index] |
| 39 | + tables = self.book.getElementsByType(Table) |
| 40 | + return [t.attributes[(TABLENS, 'name')] for t in tables] |
30 | 41 |
|
31 |
| - def get_sheet_by_name(self, name): |
32 |
| - i = self.sheet_names.index(name) |
33 |
| - return self.tables[i] |
| 42 | + def get_sheet_by_index(self, index: int): |
| 43 | + from odf.table import Table |
| 44 | + tables = self.book.getElementsByType(Table) |
| 45 | + return tables[index] |
34 | 46 |
|
35 |
| - def _get_sheet(self, name): |
36 |
| - """Given a sheet name or index, return the root ODF Table node |
37 |
| - """ |
38 |
| - if isinstance(name, str): |
39 |
| - return self.get_sheet_by_name(name) |
40 |
| - elif isinstance(name, int): |
41 |
| - return self.get_sheet_by_index(name) |
42 |
| - else: |
43 |
| - raise ValueError( |
44 |
| - 'Unrecognized sheet identifier type {}. Please use' |
45 |
| - 'a string or integer'.format(type(name))) |
| 47 | + def get_sheet_by_name(self, name: str): |
| 48 | + from odf.namespaces import TABLENS |
| 49 | + from odf.table import Table |
| 50 | + |
| 51 | + tables = self.book.getElementsByType(Table) |
46 | 52 |
|
47 |
| - def parse(self, sheet_name=0, **kwds): |
48 |
| - tree = self._get_sheet(sheet_name) |
49 |
| - data = self.get_sheet_data(tree, convert_float=False) |
50 |
| - parser = TextParser(data, **kwds) |
51 |
| - return parser.read() |
| 53 | + key = (TABLENS, "name") |
| 54 | + for table in tables: |
| 55 | + if table.attributes[key] == name: |
| 56 | + return table |
| 57 | + |
| 58 | + raise ValueError("sheet {name} not found".format(name)) |
52 | 59 |
|
53 | 60 | def get_sheet_data(self, sheet, convert_float):
|
54 | 61 | """Parse an ODF Table into a list of lists
|
@@ -97,7 +104,6 @@ def get_sheet_data(self, sheet, convert_float):
|
97 | 104 |
|
98 | 105 | def _get_row_repeat(self, row):
|
99 | 106 | """Return number of times this row was repeated
|
100 |
| -
|
101 | 107 | Repeating an empty row appeared to be a common way
|
102 | 108 | of representing sparse rows in the table.
|
103 | 109 | """
|
|
0 commit comments