I'm attempting to pull email attachments from outlook and store them in an s3 bucket in aws. This is one of my first python projects, and its proving to be very difficult for me and is probably very messy code.
# see https://github.com/AzureAD/microsoft-authentication-library-for-python/blob/dev/sample/device_flow_sample.py
# This authenticates for first time login.
# As long as you call acquire_token_silent before you invoke any graph APIs, the tokens will stay up to date.
# The refresh token is good for 90 days, and automatically updates. Once you login, the tokens will
# be updated and stored in the cache (and persisted to a file), and will stay alive more-or-less indefinitely
# (there are some things that can invalidate it on the server side).
from __future__ import with_statement
import io
import sys
import json
import logging
import os
import tarfile
import atexit
from wsgiref import headers
import requests
import msal
import boto3
import base64
from botocore.exceptions import ClientError
import codecs
print('Starting...')
# logging
logging.basicConfig(level=logging.DEBUG) # Enable DEBUG log for entire script
logging.getLogger("msal").setLevel(logging.INFO) # Optionally disable MSAL DEBUG logs
client=boto3.client('secretsmanager')
# config
config = dict(
authority = "https://login.microsoftonline.com/common",
client_id = '123456',
scope = ["Mail.ReadWrite"],
username = 'username',
cache_file = client.get_secret_value(SecretId="demo-ms-graph")['SecretBinary'],
endpoint = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$expand=attachments&$search="hasAttachments:true"'
)
# cache
cache = msal.SerializableTokenCache()
if os.path.exists(config["cache_file"]):
with tarfile.open('token.cache.tar.gz', "w:gz") as tar:
tar.add(config["cache_file"])
bts = open('token.cache.tar.gz','rb').read()
print("Length before",len(bts))
#sec=client.update_secret(SecretId="demo-ms-graph", SecretBinary=bts)
sec=client.get_secret_value(SecretId="demo-ms-graph")['SecretBinary']
print("Length after",len(sec))
with tarfile.open(fileobj=io.BytesIO(sec), mode='r:gz') as t:
d=t.extractfile('token.cache')
#print file content
print("File content",str(d.read()))
with tarfile.open(fileobj=io.BytesIO(sec), mode='r:gz') as t:
d=t.extractfile('token.cache')
# app
app = msal.PublicClientApplication(
config["client_id"], authority=config["authority"],
token_cache=cache)
print('Connecting to app..')
# exists?
result = None
accounts = app.get_accounts()
if accounts:
logging.info("found accounts in the app")
for a in accounts:
print(a)
if a["username"] == config["username"]:
result = app.acquire_token_silent(config["scope"], account=a)
break
if result and "access_token" in result:
# Calling graph using the access token
graph_data = requests.get( # Use token to call downstream service
config["endpoint"],
headers={'Authorization': 'Bearer ' + result['access_token']},).json()
#print("Graph API call result: %s" % json.dumps(graph_data, indent=2))
else:
print(result.get("error"))
print(result.get("error_description"))
print(result.get("correlation_id")) # You may need this when reporting a bug
main = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$expand=attachments&$search="hasAttachments:true"'
response = requests.get(main, headers={'Authorization': 'Bearer ' + result['access_token']})
if response.status_code != 200:
raise Exception(response.json())
response_json = response.json()
print('Starting upload...')
emails = response_json['value']
s3 = boto3.client('s3')
bucket ='demo-email-app'
for email in emails:
email_id = email['id']
subject = email['subject']
if email['hasAttachments']:
print(subject)
attachments = email['attachments']
for attachment in attachments:
name = attachment['name']
fileContent = json.dumps(email, indent=2)
s3.put_object(Bucket=bucket, Key=name.replace('.', '_') + '.json', Body=fileContent.encode('UTF-8'))
print('Upload Complete')
#download_email_attachments(email_id, headers)
print('All uploads complete')
My secret is stored as binary in secrets manager, and it seems to be able to pull the secret and prints it fine. I'm running into an error in the # exists? section with "AttributeError: 'NoneType' object has no attribute 'get'"
Am I approaching this in the wrong way? I have the token stored in secretsmanager, and am trying to retrieve it for msal to use and authenticate my user to the MS Graph API so I can pull attachments from the Outlook account and store those in an s3 bucket.
Related
I'm trying to send an email with an attachment (ideally multiple attachments) that are larger than 10 MB and smaller than the limit of 25 MB in total. The reason I mention 10 MB is because it seems to be the lower bound for when the normal way of attaching files stops working and you get Error 10053.
I've read in the documentation that the best way to do this would be by using the resumable upload method but I haven't been able to get it to work nor have I been able to find any good examples in Python. Most of the SO questions on this simply link back to the documentation which doesn't have a Python example or their code resulted in other errors.
I'm looking for an explanation in Python because I want to make sure I understand it correctly.
Questions I've looked through:
Attaching a file using Resumable upload w/ Gmail API
Gmail Api resumable upload Rest( attachment larger than 5MB)
using /upload urls with Google API client
How to upload large messages to Gmail
Error 10053 When Sending Large Attachments using Gmail API
Sending email via gmail & python
MIMEMultipart, MIMEText, MIMEBase, and payloads for sending email with file attachment in Python
Code:
import base64
import json
import os
from email import utils, encoders
from email.message import EmailMessage
from email.mime import application, multipart, text, base, image, audio
import mimetypes
from apiclient import errors
from googleapiclient import discovery, http
from google.oauth2 import service_account
def send_email(email_subject, email_body, email_sender='my_service_account#gmail.com', email_to='', email_cc='', email_bcc='', files=None):
# Getting credentials
with open(os.environ.get('SERVICE_KEY_PASSWORD')) as f:
service_account_info = json.loads(f.read())
# Define which scopes we're trying to access
SCOPES = ['https://www.googleapis.com/auth/gmail.send']
# Setting up credentials using the gmail api
credentials = service_account.Credentials.from_service_account_info(service_account_info, scopes=SCOPES)
# This allows us to assign an alias account to the message so that the messages aren't coming from 'ServiceDriod-8328balh blah blah'
delegated_credentials = credentials.with_subject(email_sender)
# 'Building' the service instance using the credentials we've passed
service = discovery.build(serviceName='gmail', version='v1', credentials=delegated_credentials)
# Building out the email
message = multipart.MIMEMultipart()
message['to'] = email_to
message['from'] = email_sender
message['date'] = utils.formatdate(localtime=True)
message['subject'] = email_subject
message['cc'] = email_cc
message['bcc'] = email_bcc
message.attach(text.MIMEText(email_body, 'html'))
for f in files or []:
mimetype, encoding = mimetypes.guess_type(f)
# If the extension is not recognized it will return: (None, None)
# If it's an .mp3, it will return: (audio/mp3, None) (None is for the encoding)
# For an unrecognized extension we set mimetype to 'application/octet-stream' so it won't return None again.
if mimetype is None or encoding is not None:
mimetype = 'application/octet-stream'
main_type, sub_type = mimetype.split('/', 1)
# Creating the attachement:
# This part is used to tell how the file should be read and stored (r, or rb, etc.)
if main_type == 'text':
print('text')
with open(f, 'rb') as outfile:
attachement = text.MIMEText(outfile.read(), _subtype=sub_type)
elif main_type == 'image':
print('image')
with open(f, 'rb') as outfile:
attachement = image.MIMEImage(outfile.read(), _subtype=sub_type)
elif main_type == 'audio':
print('audio')
with open(f, 'rb') as outfile:
attachement = audio.MIMEAudio(outfile.read(), _subtype=sub_type)
elif main_type == 'application' and sub_type == 'pdf':
with open(f, 'rb') as outfile:
attachement = application.MIMEApplication(outfile.read(), _subtype=sub_type)
else:
attachement = base.MIMEBase(main_type, sub_type)
with open(f, 'rb') as outfile:
attachement.set_payload(outfile.read())
encoders.encode_base64(attachement)
attachement.add_header('Content-Disposition', 'attachment', filename=os.path.basename(f))
message.attach(attachement)
media_body = http.MediaFileUpload(files[0], chunksize=500, resumable=True)
print('Uploading large file...')
body = {'raw': base64.urlsafe_b64encode(message.as_bytes()).decode()}
message = (service.users().messages().send(userId='me', body=body, media_body=media_body).execute())
Note: Right now, in the MediaFileUpload I'm using files[0] because I'm only using one file for testing and I just wanted to attach one file for now until it works.
Error:
Exception has occurred: ResumableUploadError
<HttpError 400 "Bad Request">
File "C:\Users\CON01599\AppData\Local\Continuum\anaconda3\Lib\site-packages\googleapiclient\http.py", line 927, in next_chunk
raise ResumableUploadError(resp, content)
File "C:\Users\CON01599\AppData\Local\Continuum\anaconda3\Lib\site-packages\googleapiclient\_helpers.py", line 130, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Users\CON01599\AppData\Local\Continuum\anaconda3\Lib\site-packages\googleapiclient\http.py", line 822, in execute
_, body = self.next_chunk(http=http, num_retries=num_retries)
File "C:\Users\CON01599\AppData\Local\Continuum\anaconda3\Lib\site-packages\googleapiclient\_helpers.py", line 130, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Users\CON01599\Documents\GitHub\pipelines\components\email\send_email.py", line 105, in send_email
message = (service.users().messages().send(userId='me', body=body, media_body=media_body).execute())
Answer:
import base64
import io
import json
import os
from email import utils, encoders
from email.message import EmailMessage
from email.mime import application, multipart, text, base, image, audio
import mimetypes
from apiclient import errors
from googleapiclient import discovery, http
from google.oauth2 import service_account
def get_environment_variables():
""" Retrieves the environment variables and returns them in
a dictionary object.
"""
env_var_dict = {
'to': os.environ.get('TO'),
'subject': os.environ.get('SUBJECT'),
'body': os.environ.get('BODY'),
'file': os.environ.get('FILE')
}
return env_var_dict
def send_email(email_subject, email_body, email_sender='my_service_account#gmail.com', email_to='', email_cc='', email_bcc='', files=None):
# Pulling in the string value of the service key from the parameter
with open(os.environ.get('SERVICE_KEY_PASSWORD')) as f:
service_account_info = json.loads(f.read())
# Define which scopes we're trying to access
SCOPES = ['https://www.googleapis.com/auth/gmail.send']
# Setting up credentials using the gmail api
credentials = service_account.Credentials.from_service_account_info(service_account_info, scopes=SCOPES)
# This allows us to assign an alias account to the message so that the messages aren't coming from 'ServiceDriod-8328balh blah blah'
delegated_credentials = credentials.with_subject(email_sender)
# 'Building' the service instance using the credentials we've passed
service = discovery.build(serviceName='gmail', version='v1', credentials=delegated_credentials)
# Building out the email
message = multipart.MIMEMultipart()
message['to'] = email_to
message['from'] = email_sender
message['date'] = utils.formatdate(localtime=True)
message['subject'] = email_subject
message['cc'] = email_cc
message['bcc'] = email_bcc
message.attach(text.MIMEText(email_body, 'html'))
for f in files or []:
f = f.strip(' ')
mimetype, encoding = mimetypes.guess_type(f)
# If the extension is not recognized it will return: (None, None)
# If it's an .mp3, it will return: (audio/mp3, None) (None is for the encoding)
# For an unrecognized extension we set mimetype to 'application/octet-stream' so it won't return None again.
if mimetype is None or encoding is not None:
mimetype = 'application/octet-stream'
main_type, sub_type = mimetype.split('/', 1)
# Creating the attachement:
# This part is used to tell how the file should be read and stored (r, or rb, etc.)
if main_type == 'text':
print('text')
with open(f, 'rb') as outfile:
attachement = text.MIMEText(outfile.read(), _subtype=sub_type)
elif main_type == 'image':
print('image')
with open(f, 'rb') as outfile:
attachement = image.MIMEImage(outfile.read(), _subtype=sub_type)
elif main_type == 'audio':
print('audio')
with open(f, 'rb') as outfile:
attachement = audio.MIMEAudio(outfile.read(), _subtype=sub_type)
elif main_type == 'application' and sub_type == 'pdf':
with open(f, 'rb') as outfile:
attachement = application.MIMEApplication(outfile.read(), _subtype=sub_type)
else:
attachement = base.MIMEBase(main_type, sub_type)
with open(f, 'rb') as outfile:
attachement.set_payload(outfile.read())
encoders.encode_base64(attachement)
attachement.add_header('Content-Disposition', 'attachment', filename=os.path.basename(f))
message.attach(attachement)
media_body = http.MediaIoBaseUpload(io.BytesIO(message.as_bytes()), mimetype='message/rfc822', resumable=True)
body_metadata = {} # no thread, no labels in this example
try:
print('Uploading file...')
response = service.users().messages().send(userId='me', body=body_metadata, media_body=media_body).execute()
print(response)
except errors.HttpError as error:
print('An error occurred when sending the email:\n{}'.format(error))
if __name__ == '__main__':
env_var_dict = get_environment_variables()
print("Sending email...")
send_email(email_subject=env_var_dict['subject'],
email_body=env_var_dict['body'],
email_to=env_var_dict['to'],
files=env_var_dict['file'].split(','))
print("Email sent!")
The issue you're having here is that your MediaUpload is a single attachment.
Instead of uploading a single attachment as a resumable MediaUpload, you need to upload the entire RFC822 message as a resumable MediaUpload.
In other words:
import ...
...
from io import BytesIO
from googleapiclient.http import MediaIoBaseUpload
SCOPES = [ 'scopes' ]
creds = get_credentials_somehow()
gmail = get_authed_service_somehow()
msg = create_rfc822_message(headers, email_body)
to_attach = get_attachment_paths_from_dir('../reports/tps/memos/2019/04')
add_attachments(msg, to_attach)
media = MediaIoBaseUpload(BytesIO(msg.as_bytes()), mimetype='message/rfc822', resumable=True)
body_metadata = {} # no thread, no labels in this example
resp = gmail.users().messages().send(userId='me', body=body_metadata, media_body=media).execute()
print(resp)
# { "id": "some new id", "threadId": "some new thread id", "labelIds": ["SENT"]}
I pieced this together from your provided code, reviewing this GitHub issue and Google's Inbox-to-Gmail email importer, specificially this bit.
When sending replies to existing messages, you will almost certainly have some sort of metadata that you should provide to help Gmail keep track of your new response and the original conversation. Namely, instead of an empty body parameter, you would pass informative metadata such as
body_metadata = { 'labelIds': [
"your label id here",
"another label id" ],
'threadId': "some thread id you took from the message you're replying to"
}
Other good refs:
API Client's Gmail PyDoc
Actual code used
You mention the attachment being larger than 10Mb, but you don't mention it being smaller than 25Mb: there's a limitation to gmail that attachments can't be larger than 25Mb, so if this is your case, there's simply no way to get this done, as it is beyond gmail limitations.
The explanation can be found here.
Can you confirm that your attachment is not too large?
I'm trying to download all the media that is sent to my Twilio account and cannot for the life of me figure out how to access the actual images.
from twilio.rest import Client
import requests
from operator import itemgetter
import json
ACCOUNT_SID = "xxxxxxx"
AUTH_TOKEN = "xxxxxxxx"
client = Client(ACCOUNT_SID, AUTH_TOKEN)
# builds a list of messages and media uris
messages = client.messages.list(from_="+19999999999")
msgs = []
for m in messages:
line = [m.from_, m.to, m.body, m.sid, m.subresource_uris['media']]
line = [str(x) for x in line]
msgs.append(line)
# with list of all messages:
msgs = sorted(msgs, key=itemgetter(0))
for m in msgs:
# get media list for each message that has one, else catch exception
try:
medias = client.messages(m[3]).media.list()
# returns Twilio.Api.V2010.MediaInstance and i'm stuck
for med in medias:
print client.messages(m[3]).media(med.sid).fetch()
except Exception as e:
pass
I am just lost and can't find any concrete examples in the documentation. I really can't even tell if I'm close, or waaaaaaaaaaay off. Thanks in advance!
SOLUTION Thanks to philnash
from twilio.rest import Client
import requests
import json
# Find these values at https://twilio.com/user/account
ACCOUNT_SID = "xxxxx"
AUTH_TOKEN = "xxxxxx"
BASE_URL = "https://%s:%s#api.twilio.com" % (ACCOUNT_SID, AUTH_TOKEN)
client = Client(ACCOUNT_SID, AUTH_TOKEN)
# with list of all messages:
messages = client.messages.list(from_="+1999999999")
for m in messages:
sid = m.sid
# get media list for each message that has one, else catch exception
try:
message = client.messages(sid).fetch()
print message.body
medias = message.media.list()
# returns Twilio.Api.V2010.MediaInstance and i'm stuck
for media in medias:
media_instance = client.messages(sid).media(media.sid).fetch()
uri = requests.get(BASE_URL + media_instance.uri).json()
uri2 = requests.get(BASE_URL + uri['uri'].replace('.json', ''))
with open(media_instance.uri.split("/")[-1].replace(".json", ".png"), "wb") as f:
f.write(uri2.content)
f.close()
except Exception as e:
print e
Twilio developer evangelist here.
When you get the Media URI from the helper library, it is the json representation of the resource and ends in .json. To get the raw resource you need only to strip the .json extension. You can use that URL to download the image.
Is it possible to upload a file to the Shared Documents library of a Microsoft SharePoint site with the Python OneDrive SDK?
This documentation says it should be (in the first sentence), but I can't make it work.
I'm able to authenticate (with Azure AD) and upload to a OneDrive folder, but when trying to upload to a SharePoint folder, I keep getting this error:
"Exception of type 'Microsoft.IdentityModel.Tokens.AudienceUriValidationFailedException' was thrown."
The code I'm using that returns an object with the error:
(...authentication...)
client = onedrivesdk.OneDriveClient('https://{tenant}.sharepoint.com/{site}/_api/v2.0/', auth, http)
client.item(path='/drive/special/documents').children['test.xlsx'].upload('test.xlsx')
I can successfully upload to https://{tenant}-my.sharepoint.com/_api/v2.0/ (notice the "-my" after the {tenant}) with the following code:
client = onedrivesdk.OneDriveClient('https://{tenant}-my.sharepoint.com/_api/v2.0/', auth, http)
returned_item = client.item(drive='me', id='root').children['test.xlsx'].upload('test.xlsx')
How could I upload the same file to a SharePoint site?
(Answers to similar questions (1,2,3,4) on Stack Overflow are either too vague or suggest using a different API. My question is if it's possible using the OneDrive Python SDK, and if so, how to do it.)
Update: Here is my full code and output. (Sensitive original data replaced with similarly formatted gibberish.)
import re
import onedrivesdk
from onedrivesdk.helpers.resource_discovery import ResourceDiscoveryRequest
# our domain (not the original)
redirect_uri = 'https://example.ourdomain.net/'
# our client id (not the original)
client_id = "a1234567-1ab2-1234-a123-ab1234abc123"
# our client secret (not the original)
client_secret = 'ABCaDEFGbHcd0e1I2fghJijkL3mn4M5NO67P8Qopq+r='
resource = 'https://api.office.com/discovery/'
auth_server_url = 'https://login.microsoftonline.com/common/oauth2/authorize'
auth_token_url = 'https://login.microsoftonline.com/common/oauth2/token'
http = onedrivesdk.HttpProvider()
auth = onedrivesdk.AuthProvider(http_provider=http, client_id=client_id,
auth_server_url=auth_server_url,
auth_token_url=auth_token_url)
should_authenticate_via_browser = False
try:
# Look for a saved session. If not found, we'll have to
# authenticate by opening the browser.
auth.load_session()
auth.refresh_token()
except FileNotFoundError as e:
should_authenticate_via_browser = True
pass
if should_authenticate_via_browser:
auth_url = auth.get_auth_url(redirect_uri)
code = ''
while not re.match(r'[a-zA-Z0-9_-]+', code):
# Ask for the code
print('Paste this URL into your browser, approve the app\'s access.')
print('Copy the resulting URL and paste it below.')
print(auth_url)
code = input('Paste code here: ')
# Parse code from URL if necessary
if re.match(r'.*?code=([a-zA-Z0-9_-]+).*', code):
code = re.sub(r'.*?code=([a-zA-Z0-9_-]*).*', r'\1', code)
auth.authenticate(code, redirect_uri, client_secret, resource=resource)
# If you have access to more than one service, you'll need to decide
# which ServiceInfo to use instead of just using the first one, as below.
service_info = ResourceDiscoveryRequest().get_service_info(auth.access_token)[0]
auth.redeem_refresh_token(service_info.service_resource_id)
auth.save_session() # Save session into a local file.
# Doesn't work
client = onedrivesdk.OneDriveClient(
'https://{tenant}.sharepoint.com/sites/{site}/_api/v2.0/', auth, http)
returned_item = client.item(path='/drive/special/documents')
.children['test.xlsx']
.upload('test.xlsx')
print(returned_item._prop_dict['error_description'])
# Works, uploads to OneDrive instead of SharePoint site
client2 = onedrivesdk.OneDriveClient(
'https://{tenant}-my.sharepoint.com/_api/v2.0/', auth, http)
returned_item2 = client2.item(drive='me', id='root')
.children['test.xlsx']
.upload('test.xlsx')
print(returned_item2.web_url)
Output:
Exception of type 'Microsoft.IdentityModel.Tokens.AudienceUriValidationFailedException' was thrown.
https://{tenant}-my.sharepoint.com/personal/user_domain_net/_layouts/15/WopiFrame.aspx?sourcedoc=%1ABCDE2345-67F8-9012-3G45-6H78IJKL9M01%2N&file=test.xlsx&action=default
I finally found a solution, with the help of (SO user) sytech.
The answer to my original question is that using the original Python OneDrive SDK, it's not possible to upload a file to the Shared Documents folder of a SharePoint Online site (at the moment of writing this): when the SDK queries the resource discovery service, it drops all services whose service_api_version is not v2.0. However, I get the SharePoint service with v1.0, so it's dropped, although it could be accessed using API v2.0 too.
However, by extending the ResourceDiscoveryRequest class (in the OneDrive SDK), we can create a workaround for this. I managed to upload a file this way:
import json
import re
import onedrivesdk
import requests
from onedrivesdk.helpers.resource_discovery import ResourceDiscoveryRequest, \
ServiceInfo
# our domain (not the original)
redirect_uri = 'https://example.ourdomain.net/'
# our client id (not the original)
client_id = "a1234567-1ab2-1234-a123-ab1234abc123"
# our client secret (not the original)
client_secret = 'ABCaDEFGbHcd0e1I2fghJijkL3mn4M5NO67P8Qopq+r='
resource = 'https://api.office.com/discovery/'
auth_server_url = 'https://login.microsoftonline.com/common/oauth2/authorize'
auth_token_url = 'https://login.microsoftonline.com/common/oauth2/token'
# our sharepoint URL (not the original)
sharepoint_base_url = 'https://{tenant}.sharepoint.com/'
# our site URL (not the original)
sharepoint_site_url = sharepoint_base_url + 'sites/{site}'
file_to_upload = 'C:/test.xlsx'
target_filename = 'test.xlsx'
class AnyVersionResourceDiscoveryRequest(ResourceDiscoveryRequest):
def get_all_service_info(self, access_token, sharepoint_base_url):
headers = {'Authorization': 'Bearer ' + access_token}
response = json.loads(requests.get(self._discovery_service_url,
headers=headers).text)
service_info_list = [ServiceInfo(x) for x in response['value']]
# Get all services, not just the ones with service_api_version 'v2.0'
# Filter only on service_resource_id
sharepoint_services = \
[si for si in service_info_list
if si.service_resource_id == sharepoint_base_url]
return sharepoint_services
http = onedrivesdk.HttpProvider()
auth = onedrivesdk.AuthProvider(http_provider=http, client_id=client_id,
auth_server_url=auth_server_url,
auth_token_url=auth_token_url)
should_authenticate_via_browser = False
try:
# Look for a saved session. If not found, we'll have to
# authenticate by opening the browser.
auth.load_session()
auth.refresh_token()
except FileNotFoundError as e:
should_authenticate_via_browser = True
pass
if should_authenticate_via_browser:
auth_url = auth.get_auth_url(redirect_uri)
code = ''
while not re.match(r'[a-zA-Z0-9_-]+', code):
# Ask for the code
print('Paste this URL into your browser, approve the app\'s access.')
print('Copy the resulting URL and paste it below.')
print(auth_url)
code = input('Paste code here: ')
# Parse code from URL if necessary
if re.match(r'.*?code=([a-zA-Z0-9_-]+).*', code):
code = re.sub(r'.*?code=([a-zA-Z0-9_-]*).*', r'\1', code)
auth.authenticate(code, redirect_uri, client_secret, resource=resource)
service_info = AnyVersionResourceDiscoveryRequest().\
get_all_service_info(auth.access_token, sharepoint_base_url)[0]
auth.redeem_refresh_token(service_info.service_resource_id)
auth.save_session()
client = onedrivesdk.OneDriveClient(sharepoint_site_url + '/_api/v2.0/',
auth, http)
# Get the drive ID of the Documents folder.
documents_drive_id = [x['id']
for x
in client.drives.get()._prop_list
if x['name'] == 'Documents'][0]
items = client.item(drive=documents_drive_id, id='root')
# Upload file
uploaded_file_info = items.children[target_filename].upload(file_to_upload)
Authenticating for a different service gives you a different token.
I've managed to setup an API Gateway secured with Cognito. The unauthenticated user role has an access policy that should grant it access to the gateway. I've also managed to use boto3 to retrieve an identity ID from the pool and obtain the associated open ID token, as well as the associated secret and access keys.
How do I now make a call to the gateway using these credentials? Is there a way to use boto3 to handle signing a request to a particular method on the API?
My code is based largely on the questioner's own answer, but I've tried to make it clearer where all the values come from.
import boto3
import requests
from requests_aws4auth import AWS4Auth
# Use 'pip install boto3 requests requests-aws4auth' to get these
region_name = 'ap-southeast-2' # or 'us-west-1' or whatever
# 12 decimal digits from your AWS login page
account_id = '123456789012'
# I've only found this in the sample code for other languages, e.g. JavaScript
# Services→Cognito→Manage Federated Identities→(your-id-pool)→Sample code
identity_pool_id = 'ap-southeast-2:fedcba98-7654-3210-1234-56789abcdef0'
# Create a new identity
boto3.setup_default_session(region_name = region_name)
identity_client = boto3.client('cognito-identity', region_name=region_name)
identity_response = identity_client.get_id(AccountId=account_id,
IdentityPoolId=identity_pool_id)
# We normally wouldn't log this, but to illustrate:
identity_id = identity_response['IdentityId']
print ('identity_id:', identity_id) # good idea not to log this
# Get the identity's credentials
credentials_response = identity_client.get_credentials_for_identity(IdentityId=identity_id)
credentials = credentials_response['Credentials']
access_key_id = credentials['AccessKeyId']
secret_key = credentials['SecretKey']
service = 'execute-api'
session_token = credentials['SessionToken']
expiration = credentials['Expiration']
# Again, we normally wouldn't log this:
print ('access_key_id', access_key_id)
print ('secret_key', secret_key)
print ('session_token', session_token)
print ('expiration', expiration)
# The access_key_id will look something like 'AKIABC123DE456FG7890', similar to
# Services→IAM→Users→(AWS_USER_NAME)→Security credentials→Access key ID
# Get the authorisation object
auth = AWS4Auth(access_key_id, secret_key, region_name, service,
session_token=session_token)
current_app['auth'] = auth
# Just an illustration again:
print ('auth: %(service)s(%(date)s) %(region)s:%(access_id)s' % auth.__dict__)
# We'll use that object to send a request to our app. This app doesn't
# exist in real life, though, so you'll need to edit the following quite
# heavily:
# Services→Cognito→Manage your User Pools→(your-user-pool)→Apps→App name
app_name = 'my-app-name'
api_path = 'dev/helloworld'
method = 'GET'
headers = {}
body = ''
url = 'https://%s.%s.%s.amazonaws.com/%s' % (app_name, service, region_name,
api_path)
response = requests.request(method, url, auth=auth, data=body, headers=headers)
The following code (and the requests-aws4auth library) did the job:
import boto3
import datetime
import json
from requests_aws4auth import AWS4Auth
import requests
boto3.setup_default_session(region_name='us-east-1')
identity = boto3.client('cognito-identity', region_name='us-east-1')
account_id='XXXXXXXXXXXXXXX'
identity_pool_id='us-east-1:YYY-YYYY-YYY-YY'
api_prefix='ZZZZZZZZZ'
response = identity.get_id(AccountId=account_id, IdentityPoolId=identity_pool_id)
identity_id = response['IdentityId']
print ("Identity ID: %s"%identity_id)
resp = identity.get_credentials_for_identity(IdentityId=identity_id)
secretKey = resp['Credentials']['SecretKey']
accessKey = resp['Credentials']['AccessKeyId']
sessionToken = resp['Credentials']['SessionToken']
expiration = resp['Credentials']['Expiration']
print ("\nSecret Key: %s"%(secretKey))
print ("\nAccess Key %s"%(accessKey))
print ("\nSession Token: %s"%(sessionToken))
print ("\nExpiration: %s"%(expiration))
method = 'GET'
headers = {}
body = ''
service = 'execute-api'
url = 'https://%s.execute-api.us-east-1.amazonaws.com/dev/helloworld' % api_prefix
region = 'us-east-1'
auth = AWS4Auth(accessKey, secretKey, region, service, session_token=sessionToken)
response = requests.request(method, url, auth=auth, data=body, headers=headers)
print(response.text)
Next code is working really well.
Hope to help:
from pprint import pprint
import requests
from pycognito import Cognito
USER_POOL_ID = 'eu-central-1_XXXXXXXXXXX'
CLIENT_ID = 'XXXXXXXXXXXX'
CLIENT_SECRET = 'XXXXXXXXXXX'
u = Cognito(USER_POOL_ID,CLIENT_ID, client_secret=CLIENT_SECRET, username='cognito user name')
u.authenticate('cognito user password')
id_token = u.id_token
headers = {'Authorization': 'Bearer ' + id_token}
api_url = 'https://XXXXXXXXXXX.execute-api.eu-central-1.amazonaws.com/stage/XXXXXXXXXXX'
r = requests.get(api_url, headers=headers)
pprint(dict(r.headers))
print(r.status_code)
print(r.text)
Here is an example from our public docs: http://docs.aws.amazon.com/general/latest/gr/sigv4-signed-request-examples.html
Cognito creds are no different than any other temporary creds, and the signing process is also the same. If you want to move back to Python the example above should be good, or I would guess that there are third-party libraries out there to do the signature for you.
identity_pool_id how to get
If you have not federated pool which could give you "identity_pool_id" ,
execution code below will give you identity_pool_id
import boto3
boto3.setup_default_session(
aws_access_key_id='AKIAJ7TBC72BPWNEWIDQ',
aws_secret_access_key='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',
region_name='ap-southeast-2')
client = boto3.client('cognito-identity')
response = client.list_identity_pools(MaxResults=3,)
print("IdentityPoolId-- ", response)
I've followed the directions in https://developers.google.com/accounts/docs/OAuth2ServiceAccount to use a service account to authenticate to the Google Cloud Storage API. I tried to send a JWT to google's authenticate servers in python, but got an error:
urllib2.HTTPError: HTTP Error 400: Bad Request
It looks like there's something wrong with the way I'm making, signing, or sending the JWT? The error wasn't specific so it could be any part of the process. Does anyone have any ideas?
import Crypto.PublicKey.RSA as RSA
import Crypto.Hash.SHA as SHA
import Crypto.Signature.PKCS1_v1_5 as PKCS1_v1_5
import base64
import json
import time
import urllib2
import urllib
# Settings
json_key_file = 'GooglePM-9f75ad112f87-service.json'
# Load the private key associated with the Google service account
with open(json_key_file) as json_file:
json_data = json.load(json_file)
key = RSA.importKey(json_data['private_key'])
# Create an PKCS1_v1_5 object
signer = PKCS1_v1_5.new(key)
# Encode the JWT header
header_b64 = base64.urlsafe_b64encode(json.dumps({'alg':'RS256','typ':'JWT'}))
# JWT claims
jwt = {
'iss': json_data['client_email'],
'scope': 'https://www.googleapis.com/auth/devstorage.read_write',
'aud': 'https://accounts.google.com/o/oauth2/token',
'exp': int(time.time())+3600,
'iat': int(time.time())
}
jwt_json = json.dumps(jwt)
# Encode the JWT claims
jwt_json_b64 = base64.urlsafe_b64encode(jwt_json)
# Sign the JWT header and claims
msg_hash = SHA.new(header_b64 + "." + jwt_json_b64)
signature_b64 = base64.urlsafe_b64encode(signer.sign(msg_hash))
# Make the complete message
jwt_complete = header_b64 + "." + jwt_json_b64 + "." + signature_b64
data = {'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer',
'assertion': jwt_complete}
f = urllib2.urlopen("https://accounts.google.com/o/oauth2/token", urllib.urlencode(data))
print f.read()
If I try to use curl to post to the server, I get the invalid grants error:
(venv)$ curl -d 'grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer&assertion=eyJhbGciOiAiUlMyNTYiLCAidHlwIjogIkpXVCJ9.eyJpc3MiOiAiMTM1MDY3NjIyMTk4LWVhbWUwZnFqdTNvamRoZ29zdDg2dnBpdTBsYW91NnZlQGRldmVsb3Blci5nc2VydmljZWFjY291bnQuY29tIiwgInNjb3BlIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL2F1dGgvZGV2c3RvcmFnZS5yZWFkX3dyaXRlIiwgImF1ZCI6ICJodHRwczovL2FjY291bnRzLmdvb2dsZS5jb20vby9vYXV0aDIvdG9rZW4iLCAiZXhwIjogMTQwODY1MTU2OCwgImlhdCI6IDE0MDg2NDg1NTh9.HWC7h3QiOy7QsSuta4leq_Gjwmy9IdF-MUwflPhiohzAJ-Amykd56Ye4Y_Saf_sAc5STzOCmrSPzOTYvGXr6X_T_AmSTxXK2AJ2SpAiEUs2_Wp5h18xTUY3Y_hkKvSZLh5bRzeJ_0xRcmRIPE6tua0FHFwUDdnCIGdh4DGg6i4E%3D' https://accounts.google.com/o/oauth2/token
{
"error" : "invalid_grant"
}
Ok so there's a better way to do this! Google already has a python client API that handles some of the complexity. The following code works after installing google python client API: https://developers.google.com/api-client-library/python/guide/aaa_oauth
from oauth2client.client import SignedJwtAssertionCredentials
import json
import urllib
import urllib2
# Settings
json_key_file = 'GooglePM-9f75ad112f87-service.json'
# Load the private key associated with the Google service account
with open(json_key_file) as json_file:
json_data = json.load(json_file)
# Get and sign JWT
credential = SignedJwtAssertionCredentials(json_data['client_email'], json_data['private_key'], 'https://www.googleapis.com/auth/devstorage.read_write')
jwt_complete = credential._generate_assertion()
# Get token from server
data = {'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer',
'assertion': jwt_complete}
f = urllib2.urlopen("https://accounts.google.com/o/oauth2/token", urllib.urlencode(data))
print f.read()
maybe, slightly simplier:
import oauth2client.service_account
jsonfile = 'GooglePM-9f7sdf342f87-service.json'
# use static method .from_json_keyfile_name(filename)
credentials = oauth2client.service_account.ServiceAccountCredentials.from_json_keyfile_name(jsonfile)
Your first algorithm is completely ok and working, but you need SHA256, not SHA. Thank you for your code.
Initial code is also working using Python 3.6. You only have to use bytes instead of str in case of base64:
import requests
import json as js
import Crypto.PublicKey.RSA as RSA
import Crypto.Hash.SHA256 as SHA
import Crypto.Signature.PKCS1_v1_5 as PKCS1_v1_5
import base64
import time
# Settings
json_key_file = 'google-api.json'
# Load the private key associated with the Google service account
with open(json_key_file) as json_file:
json_data = js.load(json_file)
key = RSA.importKey(json_data['private_key'])
# Create an PKCS1_v1_5 object
signer = PKCS1_v1_5.new(key)
header = js.dumps({'alg':'RS256','typ':'JWT'})
# Encode the JWT header
header_b64 = base64.urlsafe_b64encode(header.encode("UTF-8"))
# JWT claims
jwt = {
'iss': json_data['client_email'],
'scope': 'https://www.googleapis.com/auth/analytics.readonly',
'aud': 'https://accounts.google.com/o/oauth2/token',
'exp': int(time.time())+3600,
'iat': int(time.time())
}
jwt_json = js.dumps(jwt)
# Encode the JWT claims
jwt_json_b64 = base64.urlsafe_b64encode(jwt_json.encode("UTF-8"))
# Sign the JWT header and claims
msg_hash = SHA.new((header_b64.decode("UTF-8") + "." + jwt_json_b64.decode("UTF-8")).encode("UTF-8"))
signature_b64 = base64.urlsafe_b64encode(signer.sign(msg_hash))
# Make the complete message
jwt_complete = (header_b64.decode("UTF-8") + "." + jwt_json_b64.decode("UTF-8") + "." + signature_b64.decode("UTF-8")).encode("UTF-8")
data = {'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer',
'assertion': jwt_complete}
requests.post(url="https://accounts.google.com/o/oauth2/token",data=data).text