Skip to content

Commit 228ef2d

Browse files
added excel (xlsx) attachments
1 parent f926c2f commit 228ef2d

File tree

7 files changed

+47
-15
lines changed

7 files changed

+47
-15
lines changed

README.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,15 @@ from google_api import gmail
3535
# get all attachments from e-mails containing 'test'
3636
search_query = "test"
3737
service = gmail.get_gmail_service(GMAIL_CREDENTIALS_PATH, GMAIL_TOKEN_PATH)
38-
csv_dfs = gmail.query_for_csv_attachments(service, search_query)
39-
print(csv_dfs)
40-
38+
results = gmail.query_for_csv_or_xl_attachments(service, search_query)
39+
40+
# 1st Attachment found:
41+
item = results[0]
42+
df = item['data']
43+
print('email: ' + item['emailsubject'])
44+
print('filename: ' + item['filename'])
45+
print("data sample: ")
46+
print(df.head())
47+
4148
```
4249

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ dependencies:
66
- pandas
77
- oauth2client
88
- google-api-python-client
9+
- xlrd

example.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,12 @@
66
search_query = "Encrave"
77
service = gmail.get_gmail_service(GMAIL_CREDENTIALS_PATH,
88
GMAIL_TOKEN_PATH)
9-
csv_dfs = gmail.query_for_csv_attachments(service, search_query)
9+
csvs_and_excel = gmail.query_for_csv_or_xl_attachments(service, search_query)
1010

11+
# 1st Attachment found:
12+
item = csvs_and_excel[0]
13+
df = item['data']
14+
print('email: ' + item['emailsubject'])
15+
print('filename: ' + item['filename'])
16+
print("data sample: ")
17+
print(df.head())

google_api/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
import gmail
22

3-
__version__ = "0.0.1"
3+
__version__ = "0.0.2"

google_api/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# If modifying these scopes, delete the file token.json.
22
SCOPES = 'https://www.googleapis.com/auth/gmail.readonly'
33
CSV_MIME_TYPE = 'text/csv'
4+
XLSX_MIME_TYPE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
45

google_api/gmail.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,19 @@
88

99
from config import (
1010
SCOPES,
11-
CSV_MIME_TYPE
11+
CSV_MIME_TYPE,
12+
XLSX_MIME_TYPE
1213
)
1314

15+
16+
def mime_type_to_dtype(s):
17+
if s == CSV_MIME_TYPE:
18+
return 'csv'
19+
if s == XLSX_MIME_TYPE:
20+
return 'xlsx'
21+
raise AssertionError("mime type not accepted")
22+
23+
1424
def get_gmail_service(credentials_path, token_path):
1525
store = file.Storage(token_path)
1626
creds = store.get()
@@ -52,9 +62,12 @@ def _get_attachment_from_part(service, messageId, part):
5262
return _get_attachment_data(service, messageId, attachmentId)
5363

5464

55-
def _convert_attachment_data_to_dataframe(data):
56-
str_csv = base64.urlsafe_b64decode(data.encode('UTF-8'))
57-
df = pd.read_csv(StringIO(str_csv))
65+
def _convert_attachment_data_to_dataframe(data, data_type):
66+
str_decoded = base64.urlsafe_b64decode(data.encode('UTF-8'))
67+
if data_type == 'csv':
68+
df = pd.read_csv(StringIO(str_decoded))
69+
elif data_type == 'xlsx':
70+
df = pd.read_excel(StringIO(str_decoded))
5871
return df
5972

6073

@@ -68,7 +81,7 @@ def _flatten_nested_email_parts(parts):
6881
return all_parts
6982

7083

71-
def get_csv_attachments_from_msg_id(service, messageId):
84+
def get_csv_or_xl_attachments_from_msg_id(service, messageId):
7285
"""returns a dict of all CSV attachments as pd.DataFrames
7386
in the email associated with `messageId`. The keys for the
7487
dictionary are the csv filenames"""
@@ -79,18 +92,21 @@ def get_csv_attachments_from_msg_id(service, messageId):
7992
if not msg_parts:
8093
return []
8194
msg_parts = _flatten_nested_email_parts(msg_parts)
82-
att_parts = [p for p in msg_parts if p['mimeType']==CSV_MIME_TYPE]
95+
att_parts = [p for p in msg_parts if p['mimeType'] in [
96+
CSV_MIME_TYPE, XLSX_MIME_TYPE]]
97+
types = [mime_type_to_dtype(p['mimeType']) for p in att_parts]
8398
filenames = [p['filename'] for p in att_parts]
8499
datas = [_get_attachment_from_part(service, messageId, p) for p in att_parts]
85-
dfs = [_convert_attachment_data_to_dataframe(d) for d in datas]
100+
dfs = [_convert_attachment_data_to_dataframe(d, t)
101+
for d, t in zip(datas, types)]
86102
return [{'emailsubject': subject, 'filename': f, 'data': d}
87103
for f, d in zip(filenames, dfs)]
88104

89105

90-
def query_for_csv_attachments(service, search_query):
106+
def query_for_csv_or_xl_attachments(service, search_query):
91107
message_ids = query_for_message_ids(service, search_query)
92108
csvs = []
93109
for msg_id in message_ids:
94-
loop_csvs = get_csv_attachments_from_msg_id(service, msg_id)
110+
loop_csvs = get_csv_or_xl_attachments_from_msg_id(service, msg_id)
95111
csvs.extend(loop_csvs)
96112
return csvs

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from setuptools import setup
22
from google_api import __version__
3-
setup(name='google_api',
3+
setup(name='google_api_rdw',
44
version=__version__,
55
description='Connect to the google api',
66
url='https://github.com/robertdavidwest/google_api',

0 commit comments

Comments
 (0)