Download A Csv File From Gmail Using Python
Solution 1:
TL;DR
I've put together a Github repo that makes getting CSV data from Gmail as simple as:
from gmail import * service = get_gmail_service() # get all attachments from e-mails containing 'test' search_query = "test" service = get_gmail_service() csv_dfs = query_for_csv_attachments(service, search_query) print(csv_dfs)
Follow the instructions in the
README
and feel free to contribute!
THE LONG ANSWER (directly using google-api-python-client
and oauth2client
)
- Follow this link and click the button: "ENABLE THE GMAIL API". After the setup you will download a file called
credentials.json
. Install the needed Python packages:
pip install --upgrade google-api-python-client oauth2client
The following code will allow you to connect to your Gmail account via Python:
from googleapiclient.discovery import build from httplib2 import Http from oauth2client import file, client, tools GMAIL_CREDENTIALS_PATH = 'credentials.json' # downloaded GMAIL_TOKEN_PATH = 'token.json' # this will be created store = file.Storage(GMAIL_TOKEN_PATH) creds = store.get() if not creds or creds.invalid: flow = client.flow_from_clientsecrets(GMAIL_CREDENTIALS_PATH, SCOPES) creds = tools.run_flow(flow, store) service = build('gmail', 'v1', http=creds.authorize(Http()))
With this service you can read your emails and any attachments.
First you can query your e-mails with a search string to find the e-mail id's that have the attachments:
search_query = "ABCD" result = service.users().messages().list(userId='me', q=search_query).execute() msgs = results['messages'] msg_ids = [msg['id'] for msg in msgs]
For each
messageId
you can find the associated attachments in the email.This part is a little messy so bear with me. First we obtain a list of "attachment parts" (and attachment filenames). These are components of the email that contain attachments:
messageId = 'XYZ' msg = service.users().messages().get(userId='me', id=messageId).execute() parts = msg.get('payload').get('parts') all_parts = [] for p in parts: if p.get('parts'): all_parts.extend(p.get('parts')) else: all_parts.append(p) att_parts = [p for p in all_parts if p['mimeType']=='text/csv'] filenames = [p['filename'] for p in att_parts]
Now we can obtain the attached CSV from each part:
messageId = 'XYZ' data = part['body'].get('data') attachmentId = part['body'].get('attachmentId') if not data: att = service.users().messages().attachments().get( userId='me', id=attachmentId, messageId=messageId).execute() data = att['data']
Now you have the CSV data but it's in an encoded format, so we change the encoding and convert the result into a Pandas dataframe:
import base64 import pandas as pd from StringIO import StringIO str_csv = base64.urlsafe_b64decode(data.encode('UTF-8')) df = pd.read_csv(StringIO(str_csv))
That's it! you have a Pandas dataframe with the contents of the CSV attachment. You can work with this dataframe or write it to disk with
pd.DataFrame.to_csv
if you simply want to download it. You can use the list offilenames
obtained earlier if you want to preserve the filename.
Solution 2:
I got it. This is not my own work. I got some codes, combined them and modified to this code. However, finally, it worked.
print 'Proceeding'
import email
import getpass
import imaplib
import os
import sys
userName = 'yourgmail@gmail.com'
passwd = 'yourpassword'
directory = '/full/path/to/the/directory'
detach_dir = '.'
if 'DataFiles' not in os.listdir(detach_dir):
os.mkdir('DataFiles')
try:
imapSession = imaplib.IMAP4_SSL('imap.gmail.com')
typ, accountDetails = imapSession.login(userName, passwd)
if typ != 'OK':
print 'Not able to sign in!'
raise
imapSession.select('[Gmail]/All Mail')
typ, data = imapSession.search(None, 'ALL')
if typ != 'OK':
print 'Error searching Inbox.'
raise
for msgId in data[0].split():
typ, messageParts = imapSession.fetch(msgId, '(RFC822)')
if typ != 'OK':
print 'Error fetching mail.'
raise
emailBody = messageParts[0][1]
mail = email.message_from_string(emailBody)
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join(detach_dir, 'DataFiles', fileName)
if not os.path.isfile(filePath) :
print fileName
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
imapSession.close()
imapSession.logout()
print 'Done'
except :
print 'Not able to download all attachments.'
Solution 3:
An up to date answer has been provided at Download attachment from mail using Python
import os
from imbox import Imbox # pip install imbox
import traceback
# enable less secure apps on your google account
# https://myaccount.google.com/lesssecureapps
host = "imap.gmail.com"
username = "username"
password = 'password'
download_folder = "/path/to/download/folder"
if not os.path.isdir(download_folder):
os.makedirs(download_folder, exist_ok=True)
mail = Imbox(host, username=username, password=password, ssl=True, ssl_context=None, starttls=False)
messages = mail.messages() # defaults to inbox
for (uid, message) in messages:
mail.mark_seen(uid) # optional, mark message as read
for idx, attachment in enumerate(message.attachments):
try:
att_fn = attachment.get('filename')
download_path = f"{download_folder}/{att_fn}"
print(download_path)
with open(download_path, "wb") as fp:
fp.write(attachment.get('content').read())
except:
pass
print(traceback.print_exc())
mail.logout()
"""
Available Message filters:
# Gets all messages from the inbox
messages = mail.messages()
# Unread messages
messages = mail.messages(unread=True)
# Flagged messages
messages = mail.messages(flagged=True)
# Un-flagged messages
messages = mail.messages(unflagged=True)
# Flagged messages
messages = mail.messages(flagged=True)
# Un-flagged messages
messages = mail.messages(unflagged=True)
# Messages sent FROM
messages = mail.messages(sent_from='sender@example.org')
# Messages sent TO
messages = mail.messages(sent_to='receiver@example.org')
# Messages received before specific date
messages = mail.messages(date__lt=datetime.date(2018, 7, 31))
# Messages received after specific date
messages = mail.messages(date__gt=datetime.date(2018, 7, 30))
# Messages received on a specific date
messages = mail.messages(date__on=datetime.date(2018, 7, 30))
# Messages whose subjects contain a string
messages = mail.messages(subject='Christmas')
# Messages from a specific folder
messages = mail.messages(folder='Social')
"""
Solution 4:
from imap_tools import MailBox
# get all .csv attachments from INBOX and save them to files
with MailBox('imap.my.ru').login('acc', 'pwd', 'INBOX') as mailbox:
for msg in mailbox.fetch():
for att in msg.attachments:
if att.filename.lower().endswith('.csv'):
with open('C:/1/{}'.format(att.filename), 'wb') as f:
f.write(att.payload)
Post a Comment for "Download A Csv File From Gmail Using Python"