I'm getting the following error:
googleapiclient.errors.HttpError: <HttpError 403 when requesting https://www.googleapis.com/drive/v3/files/XXXXXXXXXXXX?alt=media returned "The user has not granted the app 123456789 read access to the file XXXXXXXXXXXX.">
This is rather strange, as I have authenticated as myself using the OAuth web-authentication method. I can list my files too.
Google lists this error here: https://developers.google.com/drive/v3/web/handle-errors#403_the_user_has_not_granted_the_app_appid_verb_access_to_the_file_fileid and suggests that I prompt the user to open the file. Well, the user is me and I have opened this file about 20 times perfectly fine. The error doesn't go away...
Here is my complete example:
import os
import httplib2
import io
from oauth2client.file import Storage
from apiclient.discovery import build
from oauth2client.client import OAuth2WebServerFlow
from googleapiclient.http import MediaIoBaseDownload
location import Location
CLIENT_ID = 'CLIENT ID XYXXYX'
CLIENT_SECRET = 'YYYYYYYYYYYYYYY'
OAUTH_SCOPE = 'https://www.googleapis.com/auth/drive'
REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'
OUT_PATH = Location.folder_path + "media/dumps/"
CREDS_FILE ='this_file_is_somewhere.json'
storage = Storage(CREDS_FILE)
credentials = storage.get()
if credentials is None:
# Run through the OAuth flow and retrieve credentials
flow = OAuth2WebServerFlow(CLIENT_ID, CLIENT_SECRET, OAUTH_SCOPE, REDIRECT_URI)
authorize_url = flow.step1_get_authorize_url()
print('Go to the following link in your browser: ' + authorize_url)
code = raw_input('Enter verification code: ').strip()
credentials = flow.step2_exchange(code)
storage.put(credentials)
# Create an httplib2.Http object and authorize it with our credentials
http = httplib2.Http()
http = credentials.authorize(http)
drive_service = build('drive', 'v3', http=http)
def list_files(service):
page_token = None
while True:
param = {}
if page_token:
param['pageToken'] = page_token
files = service.files().list(**param).execute()
for item in files['items']:
yield item
page_token = files.get('nextPageToken')
if not page_token:
break
def download(file_id, path=OUT_PATH):
request = drive_service.files().get_media(fileId=file_id)
file_meta = drive_service.files().get(fileId=file_id).execute()
name = file_to_get['name']
print(file_meta)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(int(status.progress() * 100))
f = open(path + '/' + name, 'wb')
f.write(fh.getvalue())
print('File downloaded at', path)
f.close()
download('XXXXXXXXXXXX')
All thanks to #Tanaike:
Simply removing the credential file and recreating it solved that problem.
Related
In the code below I get the fileID of a csv file on Google Drive. Now, I want to store the file content directly in a pandas frame instead of downloading the csv file and afterwards extracting the data (as shown in the code).
import io
import os.path
import pandas as pd
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
# Login to Google Drive
def login():
creds = None
# The file token.json stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
print ("Login to your to your Google Drive account which holds/shares the file database")
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'./src/credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.json', 'w') as token:
token.write(creds.to_json())
# Return service
service = build('drive', 'v3', credentials=creds)
return service
# Download files from Google Drive
def downloadFile(file_name):
# Authenticate
service = login()
# Search file by name
response = service.files().list(q=f"name='{file_name}'", spaces='drive', fields='nextPageToken, files(id, name)').execute()
for file in response.get('files', []):
file_id = file.get('id')
# Download file file if it exists
if ("file_id" in locals()):
request = service.files().get_media(fileId=file_id)
fh = io.FileIO(f"./data/{file_name}.csv", "wb")
downloader = MediaIoBaseDownload(fh, request)
print (f"Downloading {file_name}.csv")
else:
print (f"\033[1;31m Warning: Can't download >> {file_name} << because it is missing!!!\033[0;0m")
return
downloadFile("NameOfFile")
Is there any way to achieve this?
Thanks a lot for your help
From The problem is to be able to do that I need the file's URL but I'm not able to retrieve it., I thought that your file might be Google Spreadsheet. When the file is Google Spreadsheet, webContentLink is not included in the retrieved metadata.
If my understanding of your situation is correct, how about the following modification?
Modified script:
From:
file_id = file.get('id')
# !!! Here, I would like to get the URL of the file and download it to a pandas data frame !!!
file_url = file.get("webContentLink")
To:
file_id = file.get('id')
file_url = file.get("webContentLink")
if not file_url:
request = service.files().export_media(fileId=file_id, mimeType='text/csv')
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%" % int(status.progress() * 100))
fh.seek(0)
df = pd.read_csv(fh)
print(df)
In this modification, the Google Spreadsheet is exported as the CSV data using Drive API, and the exported data is put to the dataframe.
In this modification, please add import io and from googleapiclient.http import MediaIoBaseDownload.
Note:
In this case, the Google Spreadsheet is exported as the CSV data using Drive API. So please include the scope of https://www.googleapis.com/auth/drive.readonly or https://www.googleapis.com/auth/drive. When your scope is only https://www.googleapis.com/auth/drive.metadata.readonly, an error occurs. Please be careful this.
Reference:
Files: export
Added:
When the file is the CSV data, please modify as follows.
file_id = file.get('id')
request = service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%" % int(status.progress() * 100))
fh.seek(0)
df = pd.read_csv(fh)
print(df)
I have this piece of code to extract some metrics about my YouTube channel and create a pandas dataframe from them.
import os
import google.oauth2.credentials
import google_auth_oauthlib.flow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow
import json
SCOPES = ['https://www.googleapis.com/auth/yt-analytics.readonly']
API_SERVICE_NAME = 'youtubeAnalytics'
API_VERSION = 'v2'
CLIENT_SECRETS_FILE = 'client_secrets.json'
def get_service():
flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRETS_FILE, SCOPES)
credentials = flow.run_console()
return build(API_SERVICE_NAME, API_VERSION, credentials = credentials)
def execute_api_request(client_library_function, **kwargs):
response = client_library_function(
**kwargs
).execute()
with open('data.json', 'w') as fp:
json.dump(response, fp)
if __name__ == '__main__':
# Disable OAuthlib's HTTPs verification when running locally.
# *DO NOT* leave this option enabled when running in production.
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
youtubeAnalytics = get_service()
execute_api_request(
youtubeAnalytics.reports().query,
ids='channel==MINE',
startDate='2014-01-01',
endDate='2019-02-26',
metrics='averageViewDuration,views,likes,dislikes,subscribersGained,subscribersLost',
dimensions='day',
sort='day',
filters = 'country==US'
)
## Now, convert the json to dataframe
import json
import pandas as pd
with open('data.json') as json_data:
d = json.load(json_data)
colnames = [d['columnHeaders'][i]['name'] for i in range(0,len(d['columnHeaders']))]
Results = pd.DataFrame(d['rows'],columns = colnames)
Results.to_csv("Youtube_data.csv")
By running this code, a windows opens and asks me to login into youtube and then provide me the authorization code. Entering this authorization code finishes the running of above python program. However, you should repeat this authorization process each time you are running this program.
Is there anyway to bypass this repeated authorization such that this process can be automated?
You need to use the oauth2client.file.Storage class to store and retrieves the credentials object as (badly) explained here: https://developers.google.com/api-client-library/python/guide/aaa_oauth
You will need to modify your get_service function with something like this:
from oauth2client import client, file
def get_service():
flow = client.flow_from_clientsecrets(CLIENT_SECRETS_FILE, SCOPES)
storage = file.Storage(API_SERVICE_NAME + '.dat')
credentials = storage.get()
http = credentials.authorize(http=httplib2.Http())
service = build(API_SERVICE_NAME, API_VERSION, http=http)
return service
Hope this helps
I am trying to write a simple script to get a list of my Google Apps users using Google's python API. So far it looks like this (based on a Google example):
!/usr/bin/python
import httplib2
from apiclient import errors
from apiclient.discovery import build
from oauth2client.client import OAuth2WebServerFlow
from oauth2client.client import SignedJwtAssertionCredentials
client_email = 'service_account_email#developer.gserviceaccount.com'
with open("Python GAPS-98dfb88b4c9f.p12") as f:
private_key = f.read()
OAUTH_SCOPE = 'https://www.googleapis.com/auth/admin.directory.user'
credentials = SignedJwtAssertionCredentials(client_email, private_key, OAUTH_SCOPE )
http = httplib2.Http()
http = credentials.authorize(http)
directory_service = build('admin', 'directory_v1', http=http)
all_users = []
page_token = None
params = {'customer': 'my_customer'}
while True:
try:
if page_token:
param['pageToken'] = page_token
current_page = directory_service.users().list(**params).execute()
all_users.extend(current_page['users'])
page_token = current_page.get('nextPageToken')
if not page_token:
break
except errors.HttpError as error:
print 'An error occurred: %s' % error
break
for user in all_users:
print user['primaryEmail']
The service account has been authorized on google developer console for the following API's:
https://www.googleapis.com/auth/admin.directory.user
https://www.googleapis.com/auth/admin.directory.user.alias
However, when I run the code, I get this error:
An error occurred: <HttpError 404 when requesting https://www.googleapis.com/admin/directory/v1/users?customer=my_customer&alt=json returned "Resource Not Found: domain">
Any hints on what am I missing?
E.
Even when using a service account, you still need to "act as" a Google Apps user in the domain with the proper rights (e.g. a super admin). Try:
credentials = SignedJwtAssertionCredentials(client_email, private_key,
OAUTH_SCOPE, sub='admin#domain.com')
where admin#domain.com is the email of a super admin in your domain.
I have to split a PDF on drive. So i want to know if there are a way to manipulate PDF on Drive API.
Does anyone know a way to make at least one of these actions
Split
get number of page
cut page
...
Here is a solution to display the number of pages of a PDF file in Drive, split it into separate PDFs for each page and insert the newly created PDFs back into Drive.
To execute the following code you will need to define a project in the Google Developer Console. You can create a new one at https://console.developers.google.com/project if you do not already have one.
Once your project is created, click on it to open the Project Dashboard. Go to APIS & Auth > Credentials and create a new OAuth Client ID for an installed application if you do not already have one for this project. Replace client_id, client_secret and redirect_uri in the code below with respectively the Client ID, the Client Secret and the first redirect URI listed.
The program will first open a page in your web browser to obtain a verification code required to create a new OAuth token. It will then ask for the fileId of a PDF file in your drive, will display the number of pages of this PDF and insert each page as a separate PDF back in your drive.
from cStringIO import StringIO
import os
import webbrowser
from apiclient.discovery import build
from apiclient.http import MediaInMemoryUpload
import httplib2
from oauth2client.client import OAuth2WebServerFlow
import pyPdf
CLIENT_ID = 'client_id'
CLIENT_SECRET = 'client_secret'
OAUTH_SCOPE = 'https://www.googleapis.com/auth/drive'
REDIRECT_URI = 'redirect_url'
class GoogleDriveManager(object):
def __init__(self):
# Create new Google Drive credentials.
flow = OAuth2WebServerFlow(
CLIENT_ID, CLIENT_SECRET, OAUTH_SCOPE, REDIRECT_URI)
authorize_url = flow.step1_get_authorize_url()
webbrowser.open(authorize_url)
code = raw_input('Enter verification code: ').strip()
self._credentials = flow.step2_exchange(code)
def GetFile(self, file_id):
http = httplib2.Http()
http = self._credentials.authorize(http)
drive_service = build('drive', 'v2', http=http)
url = drive_service.files().get(fileId=file_id).execute()['downloadUrl']
return http.request(url, "GET")[1]
def GetFileName(self, file_id):
http = httplib2.Http()
http = self._credentials.authorize(http)
drive_service = build('drive', 'v2', http=http)
return drive_service.files().get(fileId=file_id).execute()['title']
def InsertFile(self, file_name, data, mimeType):
http = httplib2.Http()
http = self._credentials.authorize(http)
drive_service = build('drive', 'v2', http=http)
media_body = MediaInMemoryUpload(
data, mimetype='text/plain', resumable=True)
body = {
'title': file_name,
'mimeType': mimeType
}
drive_service.files().insert(body=body, media_body=media_body).execute()
if __name__ == '__main__':
# Create a drive manager.
drive_manager = GoogleDriveManager()
file_id = raw_input('Enter the file id of the pdf file: ').strip()
file_name, ext = os.path.splitext(drive_manager.GetFileName(file_id))
# Download the pdf file.
pdf_data = drive_manager.GetFile(file_id)
pdf = pyPdf.PdfFileReader(StringIO(pdf_data))
print "Number of pages: %d" % pdf.getNumPages()
for i in xrange(pdf.getNumPages()):
writer = pyPdf.PdfFileWriter()
writer.addPage(pdf.getPage(i))
page_data = StringIO()
writer.write(page_data)
drive_manager.InsertFile(
file_name + '-' + str(i) + ext, page_data.getvalue(), 'application/pdf')
I need to get OAuth2 'flow' for GoogDRIVE working.
Previously I have gotten the success token thing to work fine, but I need to have it set up so I don't need to get the token each time, just once would be fine.
Here is where I'm at, based on: https://developers.google.com/api-client-library/python/guide/aaa_oauth
import httplib2
from oauth2client.client import OAuth2WebServerFlow
from oauth2client.file import Storage
from apiclient.discovery import build
flow = OAuth2WebServerFlow(client_id='107......539.apps.googleusercontent.com',
client_secret='dVvq2itsasecretbxzG',
scope='https://www.googleapis.com/auth/drive',
redirect_uri='urn:ietf:wg:oauth:2.0:oob')
#retrieve if available
storage = Storage('OAuthcredentials.txt')
credentials = storage.get()
if credentials is None:
#step 1
auth_uri = flow.step1_get_authorize_url() # Redirect the user to auth_uri
print 'Go to the following link in your browser: ' + auth_uri
code = raw_input('Enter verification code: ').strip()
else:
code = credentials #yeah i know this bit is wrong, but pls send HELP!
#step 2
credentials = flow.step2_exchange(code)
#authorise
http = httplib2.Http()
http = credentials.authorize(http)
print 'authorisation completed'
#build
service = build('drive', 'v2', http=http)
#store for next time
storage.put(credentials)
Its for a stand-alone app. so firstly am I going about this the right way?
And if so, how do I enter the credentials as the code in the above else?
Yep it was a simple mistake. Here is the working snip to replace the bits in the Q above:
......
if credentials is None:
#step 1
auth_uri = flow.step1_get_authorize_url() # Redirect the user to auth_uri
print 'Go to the following link in your browser: ' + auth_uri
code = raw_input('Enter verification code: ').strip()
#step 2
credentials = flow.step2_exchange(code)
else:
print 'GDrive credentials are still current'
#authorise
http = httplib2.Http()
http = credentials.authorize(http)
print 'Authorisation successfully completed'
......etc
cheers