8
8
9
9
from config import (
10
10
SCOPES ,
11
- CSV_MIME_TYPE
11
+ CSV_MIME_TYPE ,
12
+ XLSX_MIME_TYPE
12
13
)
13
14
15
+
16
+ def mime_type_to_dtype (s ):
17
+ if s == CSV_MIME_TYPE :
18
+ return 'csv'
19
+ if s == XLSX_MIME_TYPE :
20
+ return 'xlsx'
21
+ raise AssertionError ("mime type not accepted" )
22
+
23
+
14
24
def get_gmail_service (credentials_path , token_path ):
15
25
store = file .Storage (token_path )
16
26
creds = store .get ()
@@ -52,9 +62,12 @@ def _get_attachment_from_part(service, messageId, part):
52
62
return _get_attachment_data (service , messageId , attachmentId )
53
63
54
64
55
- def _convert_attachment_data_to_dataframe (data ):
56
- str_csv = base64 .urlsafe_b64decode (data .encode ('UTF-8' ))
57
- df = pd .read_csv (StringIO (str_csv ))
65
+ def _convert_attachment_data_to_dataframe (data , data_type ):
66
+ str_decoded = base64 .urlsafe_b64decode (data .encode ('UTF-8' ))
67
+ if data_type == 'csv' :
68
+ df = pd .read_csv (StringIO (str_decoded ))
69
+ elif data_type == 'xlsx' :
70
+ df = pd .read_excel (StringIO (str_decoded ))
58
71
return df
59
72
60
73
@@ -68,7 +81,7 @@ def _flatten_nested_email_parts(parts):
68
81
return all_parts
69
82
70
83
71
- def get_csv_attachments_from_msg_id (service , messageId ):
84
+ def get_csv_or_xl_attachments_from_msg_id (service , messageId ):
72
85
"""returns a dict of all CSV attachments as pd.DataFrames
73
86
in the email associated with `messageId`. The keys for the
74
87
dictionary are the csv filenames"""
@@ -79,18 +92,21 @@ def get_csv_attachments_from_msg_id(service, messageId):
79
92
if not msg_parts :
80
93
return []
81
94
msg_parts = _flatten_nested_email_parts (msg_parts )
82
- att_parts = [p for p in msg_parts if p ['mimeType' ]== CSV_MIME_TYPE ]
95
+ att_parts = [p for p in msg_parts if p ['mimeType' ] in [
96
+ CSV_MIME_TYPE , XLSX_MIME_TYPE ]]
97
+ types = [mime_type_to_dtype (p ['mimeType' ]) for p in att_parts ]
83
98
filenames = [p ['filename' ] for p in att_parts ]
84
99
datas = [_get_attachment_from_part (service , messageId , p ) for p in att_parts ]
85
- dfs = [_convert_attachment_data_to_dataframe (d ) for d in datas ]
100
+ dfs = [_convert_attachment_data_to_dataframe (d , t )
101
+ for d , t in zip (datas , types )]
86
102
return [{'emailsubject' : subject , 'filename' : f , 'data' : d }
87
103
for f , d in zip (filenames , dfs )]
88
104
89
105
90
- def query_for_csv_attachments (service , search_query ):
106
+ def query_for_csv_or_xl_attachments (service , search_query ):
91
107
message_ids = query_for_message_ids (service , search_query )
92
108
csvs = []
93
109
for msg_id in message_ids :
94
- loop_csvs = get_csv_attachments_from_msg_id (service , msg_id )
110
+ loop_csvs = get_csv_or_xl_attachments_from_msg_id (service , msg_id )
95
111
csvs .extend (loop_csvs )
96
112
return csvs
0 commit comments