How to manipulate PDF in Google Drive API using Python - python

I have to split a PDF on drive. So i want to know if there are a way to manipulate PDF on Drive API.
Does anyone know a way to make at least one of these actions
Split
get number of page
cut page
...

Here is a solution to display the number of pages of a PDF file in Drive, split it into separate PDFs for each page and insert the newly created PDFs back into Drive.
To execute the following code you will need to define a project in the Google Developer Console. You can create a new one at https://console.developers.google.com/project if you do not already have one.
Once your project is created, click on it to open the Project Dashboard. Go to APIS & Auth > Credentials and create a new OAuth Client ID for an installed application if you do not already have one for this project. Replace client_id, client_secret and redirect_uri in the code below with respectively the Client ID, the Client Secret and the first redirect URI listed.
The program will first open a page in your web browser to obtain a verification code required to create a new OAuth token. It will then ask for the fileId of a PDF file in your drive, will display the number of pages of this PDF and insert each page as a separate PDF back in your drive.
from cStringIO import StringIO
import os
import webbrowser
from apiclient.discovery import build
from apiclient.http import MediaInMemoryUpload
import httplib2
from oauth2client.client import OAuth2WebServerFlow
import pyPdf
CLIENT_ID = 'client_id'
CLIENT_SECRET = 'client_secret'
OAUTH_SCOPE = 'https://www.googleapis.com/auth/drive'
REDIRECT_URI = 'redirect_url'
class GoogleDriveManager(object):
def __init__(self):
# Create new Google Drive credentials.
flow = OAuth2WebServerFlow(
CLIENT_ID, CLIENT_SECRET, OAUTH_SCOPE, REDIRECT_URI)
authorize_url = flow.step1_get_authorize_url()
webbrowser.open(authorize_url)
code = raw_input('Enter verification code: ').strip()
self._credentials = flow.step2_exchange(code)
def GetFile(self, file_id):
http = httplib2.Http()
http = self._credentials.authorize(http)
drive_service = build('drive', 'v2', http=http)
url = drive_service.files().get(fileId=file_id).execute()['downloadUrl']
return http.request(url, "GET")[1]
def GetFileName(self, file_id):
http = httplib2.Http()
http = self._credentials.authorize(http)
drive_service = build('drive', 'v2', http=http)
return drive_service.files().get(fileId=file_id).execute()['title']
def InsertFile(self, file_name, data, mimeType):
http = httplib2.Http()
http = self._credentials.authorize(http)
drive_service = build('drive', 'v2', http=http)
media_body = MediaInMemoryUpload(
data, mimetype='text/plain', resumable=True)
body = {
'title': file_name,
'mimeType': mimeType
}
drive_service.files().insert(body=body, media_body=media_body).execute()
if __name__ == '__main__':
# Create a drive manager.
drive_manager = GoogleDriveManager()
file_id = raw_input('Enter the file id of the pdf file: ').strip()
file_name, ext = os.path.splitext(drive_manager.GetFileName(file_id))
# Download the pdf file.
pdf_data = drive_manager.GetFile(file_id)
pdf = pyPdf.PdfFileReader(StringIO(pdf_data))
print "Number of pages: %d" % pdf.getNumPages()
for i in xrange(pdf.getNumPages()):
writer = pyPdf.PdfFileWriter()
writer.addPage(pdf.getPage(i))
page_data = StringIO()
writer.write(page_data)
drive_manager.InsertFile(
file_name + '-' + str(i) + ext, page_data.getvalue(), 'application/pdf')

Related

How do I do a resumable upload to a shared folder in Python using Google Drive API v3?

I'm trying to upload files to Google Drive using the Google Drive API v3 and Python 3.10.4. I'm attempting to do a resumable upload. I successfully get the URI location of where to start uploading to but when I attempt to upload the file to the location I get a 404 back from Google.
reason: 'notFound',
message: 'File not found: #FileIdHere',
locationType: 'parameter',
location: 'fileId'
Google gives me the fileId of the folder I'm attempting to upload to. Its the same id I get from my get_folderId function.
def backup(drive: Drive, cam_name: str):
logger.info('Backup drive has been plugged in')
logger.info(f'Backing up {drive.letter}')
#Grab the folder ID of the folder we are uploading files to
folderId = get_folderId(cam_name)
access_token = get_access_token()
rootDirectory = drive.letter + "\\FILE\\"
if not os.path.isdir(rootDirectory):
rootDirectory = drive.letter + "\\DCIM"
for root, dirs, files in os.walk(rootDirectory):
for fileName in files:
#Get file path
filePath = os.path.join(root, fileName)
#Grab mimetype for file
mime = magic.Magic(mime=True)
mimeType = mime.from_file(filePath)
#Get size of file
fileSize = os.path.getsize(filePath)
#Retrieve session for resumable upload
headers = {"Authorization": "Bearer "+access_token, "Content-Type": "application/json"}
parameters = {
"name": fileName,
"mimeType": mimeType,
"parents" : [folderId]
}
response = requests.post(
"https://www.googleapis.com/upload/drive/v3/files?uploadType=resumable",
headers=headers,
data=json.dumps(parameters)
)
#Location to upload file
uploadLocation = response.headers['Location']
#Upload the file
headers = {"Content-Range": "bytes 0-" + str(fileSize - 1) + "/" + str(fileSize)}
response = requests.put(
uploadLocation,
headers = headers,
data=open(filePath, 'rb')
)
logger.info(response.text)
In the parameters variable in the above function, I've tried adding in supportsAllDrives = True to no avail so I'm not sure why its not able to find the folder.
#Gets the folder ID of the provided folder name from Google Drive
def get_folderId(cam_name: str):
service = get_google_service()
page_token = None
queryString = "mimeType='application/vnd.google-apps.folder' and name='{}' and trashed = false".format(cam_name)
response = service.files().list(q = queryString,
spaces='drive',
corpora='drive',
supportsAllDrives=True,
includeItemsFromAllDrives=True,
driveId= #DriveIdGoesHere,
fields='nextPageToken, files(id, name)',
pageToken=page_token).execute()
folderId = response.get('files')[0].get('id')
return folderId
#Gets the Google service
def get_google_service():
SCOPES = ['https://www.googleapis.com/auth/drive']
SERVICE_ACCOUNT_FILE = 'service.json'
credentials = service_account.Credentials.from_service_account_file(
SERVICE_ACCOUNT_FILE, scopes=SCOPES)
creds = credentials.with_subject(#EmailGoesHere)
service = build('drive', 'v3', credentials=creds)
return service
I believe your goal is as follows.
From How do I do a resumable upload to a shared folder in Python using Google Drive API v3?, I thought that you might have wanted to upload a file to a shared folder in Google Drive.
From your showing script, it seems that you are using the service account.
But, from Google gives me the fileId of the folder I'm attempting to upload to. Its the same id I get from my get_folderId function., in your situation, I thought that you might have wanted to upload a file to a folder in a shared Drive.
If my understanding is correct, from your error message of reason: 'notFound', message: 'File not found: #FileIdHere', locationType: 'parameter', location: 'fileId', how about the following modification?
From:
response = requests.post(
"https://www.googleapis.com/upload/drive/v3/files?uploadType=resumable",
headers=headers,
data=json.dumps(parameters)
)
To:
response = requests.post(
"https://www.googleapis.com/upload/drive/v3/files?uploadType=resumable&supportsAllDrives=true",
headers=headers,
data=json.dumps(parameters)
)
When you want to upload a file to a folder in a shared Drive and the value of folderId is the folder ID of the folder in a shared Drive, it is required to include supportsAllDrives=true to the query parameter.
Note:
When I tested your script using your endpoint of https://www.googleapis.com/upload/drive/v3/files?uploadType=resumable, I confirmed that the same error of File not found:.
When I tested your script using my modified endpoint of https://www.googleapis.com/upload/drive/v3/files?uploadType=resumable&supportsAllDrives=true, I confirmed that the file could be correctly uploaded.
But, if your service account has no write permission to the shared Drive, an error occurs. Please be careful about this.
Reference
Files: create
I hope this snippet helps
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from openpyxl import load_workbook
from openpyxl.drawing.image import Image as pyImage
from PIL import Image
import progressbar
import requests
def upload_as_csv(df, ver):
title = "doc-{fver}.csv".format(fver=ver)
path = 'output/{ftit}'.format(ftit=title)
df.to_csv(path)
gauth = GoogleAuth()
drive = GoogleDrive(gauth)
uploaded = drive.CreateFile({'title': title})
uploaded.SetContentFile(path)
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))
print('Version number {}'.format(ver))
return True

How to download a file from Google Drive using Python and the Drive API v3

I have tried downloading file from Google Drive to my local system using python script but facing a "forbidden" issue while running a Python script. The script is as follows:
import requests
url = "https://www.googleapis.com/drive/v3/files/1wPxpQwvEEOu9whmVVJA9PzGPM2XvZvhj?alt=media&export=download"
querystring = {"alt":"media","export":"download"}
headers = {
'Authorization': "Bearer TOKEN",
'Host': "www.googleapis.com",
'Accept-Encoding': "gzip, deflate",
'Connection': "keep-alive",
}
response = requests.request("GET", url, headers=headers, params=querystring)
print(response.url)
#
import wget
import os
from os.path import expanduser
myhome = expanduser("/home/sunarcgautam/Music")
### set working dir
os.chdir(myhome)
url = "https://www.googleapis.com/drive/v3/files/1wPxpQwvEEOu9whmVVJA9PzGPM2XvZvhj?alt=media&export=download"
print('downloading ...')
wget.download(response.url)
In this script, I have got forbidden issue. Am I doing anything wrong in the script?
I have also tried another script that I found on a Google Developer page, which is as follows:
import auth
import httplib2
SCOPES = "https://www.googleapis.com/auth/drive.scripts"
CLIENT_SECRET_FILE = "client_secret.json"
APPLICATION_NAME = "test_Download"
authInst = auth.auth(SCOPES, CLIENT_SECRET_FILE, APPLICATION_NAME)
credentials = authInst.getCredentials()
http = credentials.authorize(httplib2.Http())
drive_serivce = discovery.build('drive', 'v3', http=http)
file_id = '1Af6vN0uXj8_qgqac6f23QSAiKYCTu9cA'
request = drive_serivce.files().export_media(fileId=file_id,
mimeType='application/pdf')
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print ("Download %d%%." % int(status.progress() * 100))
This script gives me a URL mismatch error.
So what should be given for redirect URL in Google console credentials? or any other solution for the issue? Do I have to authorise my Google console app from Google in both the script? If so, what will the process of authorising the app because I haven't found any document regarding that.
To make requests to Google APIs the work flow is in essence the following:
Go to developer console, log in if you haven't.
Create a Cloud Platform project.
Enable for your project, the APIs you are interested in using with you projects' apps (for example: Google Drive API).
Create and download OAuth 2.0 Client IDs credentials that will allow your app to gain authorization for using your enabled APIs.
Head over to OAuth consent screen, click on and add your scope using the button. (scope: https://www.googleapis.com/auth/drive.readonly for you). Choose Internal/External according to your needs, and for now ignore the warnings if any.
To get the valid token for making API request the app will go through the OAuth flow to receive the authorization token. (Since it needs consent)
During the OAuth flow the user will be redirected to your the OAuth consent screen, where it will be asked to approve or deny access to your app's requested scopes.
If consent is given, your app will receive an authorization token.
Pass the token in your request to your authorized API endpoints.[2]
Build a Drive Service to make API requests (You will need the valid token)[1]
NOTE:
The available methods for the Files resource for Drive API v3 are here.
When using the Python Google APIs Client, then you can use export_media() or get_media() as per Google APIs Client for Python documentation
IMPORTANT:
Also, check that the scope you are using, actually allows you to do what you want (Downloading Files from user's Drive) and set it accordingly. ATM you have an incorrect scope for your goal. See OAuth 2.0 API Scopes
Sample Code References:
Building a Drive Service:
import google_auth_oauthlib.flow
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
class Auth:
def __init__(self, client_secret_filename, scopes):
self.client_secret = client_secret_filename
self.scopes = scopes
self.flow = google_auth_oauthlib.flow.Flow.from_client_secrets_file(self.client_secret, self.scopes)
self.flow.redirect_uri = 'http://localhost:8080/'
self.creds = None
def get_credentials(self):
flow = InstalledAppFlow.from_client_secrets_file(self.client_secret, self.scopes)
self.creds = flow.run_local_server(port=8080)
return self.creds
# The scope you app will use.
# (NEEDS to be among the enabled in your OAuth consent screen)
SCOPES = "https://www.googleapis.com/auth/drive.readonly"
CLIENT_SECRET_FILE = "credentials.json"
credentials = Auth(client_secret_filename=CLIENT_SECRET_FILE, scopes=SCOPES).get_credentials()
drive_service = build('drive', 'v3', credentials=credentials)
Making the request to export or get a file
request = drive_service.files().export(fileId=file_id, mimeType='application/pdf')
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%" % int(status.progress() * 100))
# The file has been downloaded into RAM, now save it in a file
fh.seek(0)
with open('your_filename.pdf', 'wb') as f:
shutil.copyfileobj(fh, f, length=131072)

No permission to download my file from Google Drive

I'm getting the following error:
googleapiclient.errors.HttpError: <HttpError 403 when requesting https://www.googleapis.com/drive/v3/files/XXXXXXXXXXXX?alt=media returned "The user has not granted the app 123456789 read access to the file XXXXXXXXXXXX.">
This is rather strange, as I have authenticated as myself using the OAuth web-authentication method. I can list my files too.
Google lists this error here: https://developers.google.com/drive/v3/web/handle-errors#403_the_user_has_not_granted_the_app_appid_verb_access_to_the_file_fileid and suggests that I prompt the user to open the file. Well, the user is me and I have opened this file about 20 times perfectly fine. The error doesn't go away...
Here is my complete example:
import os
import httplib2
import io
from oauth2client.file import Storage
from apiclient.discovery import build
from oauth2client.client import OAuth2WebServerFlow
from googleapiclient.http import MediaIoBaseDownload
location import Location
CLIENT_ID = 'CLIENT ID XYXXYX'
CLIENT_SECRET = 'YYYYYYYYYYYYYYY'
OAUTH_SCOPE = 'https://www.googleapis.com/auth/drive'
REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'
OUT_PATH = Location.folder_path + "media/dumps/"
CREDS_FILE ='this_file_is_somewhere.json'
storage = Storage(CREDS_FILE)
credentials = storage.get()
if credentials is None:
# Run through the OAuth flow and retrieve credentials
flow = OAuth2WebServerFlow(CLIENT_ID, CLIENT_SECRET, OAUTH_SCOPE, REDIRECT_URI)
authorize_url = flow.step1_get_authorize_url()
print('Go to the following link in your browser: ' + authorize_url)
code = raw_input('Enter verification code: ').strip()
credentials = flow.step2_exchange(code)
storage.put(credentials)
# Create an httplib2.Http object and authorize it with our credentials
http = httplib2.Http()
http = credentials.authorize(http)
drive_service = build('drive', 'v3', http=http)
def list_files(service):
page_token = None
while True:
param = {}
if page_token:
param['pageToken'] = page_token
files = service.files().list(**param).execute()
for item in files['items']:
yield item
page_token = files.get('nextPageToken')
if not page_token:
break
def download(file_id, path=OUT_PATH):
request = drive_service.files().get_media(fileId=file_id)
file_meta = drive_service.files().get(fileId=file_id).execute()
name = file_to_get['name']
print(file_meta)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(int(status.progress() * 100))
f = open(path + '/' + name, 'wb')
f.write(fh.getvalue())
print('File downloaded at', path)
f.close()
download('XXXXXXXXXXXX')
All thanks to #Tanaike:
Simply removing the credential file and recreating it solved that problem.

windows7 python36: how send to gdrive using righ click context menu?

Where is the code (and explainations) needed to send a file using it's path or to send a file by right cliking on it (working on windows 7 with python 36)
I struggled with google tutorial. Here is the code (and explainations) needed to send a file using it's path or to send a file by righ cliking on it (working on windows 7 with python 36)
## windows7 python36: send to gdrive using righ click context menu
import logging
import httplib2
import os #to get files
from os.path import basename #get file name
import sys #to get path
import ctypes # alert box (inbuilt)
import time #date for filename
import oauth2client
from oauth2client import client, tools
# google apli drive
from oauth2client.service_account import ServiceAccountCredentials
from apiclient.discovery import build
from apiclient.http import MediaFileUpload
#needed for gmail service
from apiclient import discovery
import mimetypes #to guess the mime types of the file to upload
import HtmlClipboard #custom modul which recognized html and copy it in html_clipboard
## About credentials
# There are 2 types of "credentials":
# the one created and downloaded from https://console.developers.google.com/apis/ (let's call it the client_id)
# the one that will be created from the downloaded client_id (let's call it credentials, it will be store in C:\Users\user\.credentials)
#Getting the CLIENT_ID
# 1) enable the api you need on https://console.developers.google.com/apis/
# 2) download the .json file (this is the CLIENT_ID)
# 3) save the CLIENT_ID in same folder as your script.py
# 4) update the CLIENT_SECRET_FILE (in the code below) with the CLIENT_ID filename
#Optional
# If you don't change the permission ("scope"):
#the CLIENT_ID could be deleted after creating the credential (after the first run)
# If you need to change the scope:
# you will need the CLIENT_ID each time to create a new credential that contains the new scope.
# Set a new credentials_path for the new credential (because it's another file)
## get the credential or create it if doesn't exist
def get_credentials():
# If needed create folder for credential
home_dir = os.path.expanduser('~') #>> C:\Users\Me
credential_dir = os.path.join(home_dir, '.credentials') # >>C:\Users\Me\.credentials (it's a folder)
if not os.path.exists(credential_dir):
os.makedirs(credential_dir) #create folder if doesnt exist
credential_path = os.path.join(credential_dir, 'name_of_your_json_file.json')
#Store the credential
store = oauth2client.file.Storage(credential_path)
credentials = store.get()
if not credentials or credentials.invalid:
CLIENT_SECRET_FILE = 'client_id to send Gmail.json'
APPLICATION_NAME = 'Gmail API Python Send Email'
#The scope URL for read/write access to a user's calendar data
SCOPES = 'https://www.googleapis.com/auth/gmail.send'
# Create a flow object. (it assists with OAuth 2.0 steps to get user authorization + credentials)
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.user_agent = APPLICATION_NAME
credentials = tools.run_flow(flow, store)
return credentials
## Send a file by providing it's path (for debug)
def upload_myFile_to_Gdrive(): #needed for testing
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
drive_service = discovery.build('drive', 'v3', http=http )
path_file_to_upload=r'C:\Users\Me\Desktop\myFile.docx'
#extract file name from the path
myFile_name=os.path.basename(path_file_to_upload)
#upload in the right folder:
# (to get the id: open the folder on gdrive and look in the browser URL)
folder_id="0B5qsCtRh5yNoTnVbR3hJUHlKZVU"
#send it
file_metadata = { 'name' : myFile_name, 'parents': [folder_id] }
media = MediaFileUpload(path_file_to_upload,
mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)#find the right mime type: http://stackoverflow.com/questions/4212861/what-is-a-correct-mime-type-for-docx-pptx-etc
file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
#Print result:
print(f"file ID uploaded: {file.get('id')}")
input("close?")
## Send file selected using the "send to" context menu
def upload_right_clicked_files_to_Gdrive():
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
drive_service = discovery.build('drive', 'v3', http=http)
## launch the script through the context menu "SendTo"
# (in windows7) Type "shell:SendTo" in the URL of an explorer windows. Create a .cmd file containing (remove the first # before each line):
# ##echo off
# cls
# python "The\\path\\of\\your_script.py" "%1"
## the path of the file right-clicked will be stored in "sys.argv[1:]"
#print(sys.argv)
#['C:\\Users\\my_script.py', 'C:\\Desktop\\my', 'photo.jpg'] #(the path will cut if the filename contain space)
##get the right-clicked file path
#join all till the end (because of the space)
path_file_to_upload= ' '.join(sys.argv[1:]) #>> C:\Desktop\my photo.jpg
file_name=os.path.basename(path_file_to_upload)
##guess the content type of the file
#-----About MimeTypes:
# It tells gdrive which application it should use to read the file (it acts like an extension for windows). If you dont provide it, you wont be able to read the file on gdrive (it won't recognized it as text, image...). You'll have to download it to read it (it will be recognized then with it's extension).
file_mime_type, encoding = mimetypes.guess_type(path_file_to_upload)
#file_mime_type: ex image/jpeg or text/plain (or None if extension isn't recognized)
# if your file isn't recognized you can add it here: C:\Users\Me\AppData\Local\Programs\Python\Python36\Lib\mimetypes.py
if file_mime_type is None or encoding is not None:
file_mime_type = 'application/octet-stream' #this mine type will be set for unrecognized extension (so it won't return None).
##upload in the right folder:
# (to get the id: open the folder on gdrive and look in the browser URL)
folder_id="0B5f6Tv7nVYv77BPbVU"
## send file + it's metadata
file_metadata = { 'name' : file_name, 'parents': [folder_id] }
media = MediaFileUpload(path_file_to_upload, mimetype= file_mime_type)
the_file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
##print the uploaded file ID
uploaded_file_id = the_file.get('id')
print(f"file ID: {uploaded_file_id}")
input("close?")
if __name__ == '__main__':
# upload_right_clicked_files_to_Gdrive()
upload_myFile_to_Gdrive() # needed to debug

Python: download files from google drive using url

I am trying to download files from google drive and all I have is the drive's URL.
I have read about google API that talks about some drive_service and MedioIO, which also requires some credentials( mainly JSON file/OAuth). But I am unable to get any idea about how it is working.
Also, tried urllib2.urlretrieve, but my case is to get files from the drive. Tried wget too but no use.
Tried PyDrive library. It has good upload functions to drive but no download options.
Any help will be appreciated.
Thanks.
If by "drive's url" you mean the shareable link of a file on Google Drive, then the following might help:
import requests
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
save_response_content(response, destination)
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
if __name__ == "__main__":
file_id = 'TAKE ID FROM SHAREABLE LINK'
destination = 'DESTINATION FILE ON YOUR DISK'
download_file_from_google_drive(file_id, destination)
The snipped does not use pydrive, nor the Google Drive SDK, though. It uses the requests module (which is, somehow, an alternative to urllib2).
When downloading large files from Google Drive, a single GET request is not sufficient. A second one is needed - see wget/curl large file from google drive.
I recommend gdown package.
pip install gdown
Take your share link
https://drive.google.com/file/d/0B9P1L--7Wd2vNm9zMTJWOGxobkU/view?usp=sharing
and grab the id - eg. 1TLNdIufzwesDbyr_nVTR7Zrx9oRHLM_N by pressing the download button (look for at the link), and swap it in after the id below.
import gdown
url = 'https://drive.google.com/uc?id=0B9P1L--7Wd2vNm9zMTJWOGxobkU'
output = '20150428_collected_images.tgz'
gdown.download(url, output, quiet=False)
Having had similar needs many times, I made an extra simple class GoogleDriveDownloader starting on the snippet from #user115202 above. You can find the source code here.
You can also install it through pip:
pip install googledrivedownloader
Then usage is as simple as:
from google_drive_downloader import GoogleDriveDownloader as gdd
gdd.download_file_from_google_drive(file_id='1iytA1n2z4go3uVCwE__vIKouTKyIDjEq',
dest_path='./data/mnist.zip',
unzip=True)
This snippet will download an archive shared in Google Drive. In this case 1iytA1n2z4go3uVCwE__vIKouTKyIDjEq is the id of the sharable link got from Google Drive.
Here's an easy way to do it with no third-party libraries and a service account.
pip install google-api-core and google-api-python-client
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google.oauth2 import service_account
import io
credz = {} #put json credentials her from service account or the like
# More info: https://cloud.google.com/docs/authentication
credentials = service_account.Credentials.from_service_account_info(credz)
drive_service = build('drive', 'v3', credentials=credentials)
file_id = '0BwwA4oUTeiV1UVNwOHItT0xfa2M'
request = drive_service.files().get_media(fileId=file_id)
#fh = io.BytesIO() # this can be used to keep in memory
fh = io.FileIO('file.tar.gz', 'wb') # this can be used to write to disk
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
PyDrive allows you to download a file with the function GetContentFile(). You can find the function's documentation here.
See example below:
# Initialize GoogleDriveFile instance with file id.
file_obj = drive.CreateFile({'id': '<your file ID here>'})
file_obj.GetContentFile('cats.png') # Download file as 'cats.png'.
This code assumes that you have an authenticated drive object, the docs on this can be found here and here.
In the general case this is done like so:
from pydrive.auth import GoogleAuth
gauth = GoogleAuth()
# Create local webserver which automatically handles authentication.
gauth.LocalWebserverAuth()
# Create GoogleDrive instance with authenticated GoogleAuth instance.
drive = GoogleDrive(gauth)
Info on silent authentication on a server can be found here and involves writing a settings.yaml (example: here) in which you save the authentication details.
There's in the docs a function that downloads a file when we provide an ID of the file to download,
from __future__ import print_function
import io
import google.auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
def download_file(real_file_id):
"""Downloads a file
Args:
real_file_id: ID of the file to download
Returns : IO object with location.
Load pre-authorized user credentials from the environment.
TODO(developer) - See https://developers.google.com/identity
for guides on implementing OAuth2 for the application.
"""
creds, _ = google.auth.default()
try:
# create drive api client
service = build('drive', 'v3', credentials=creds)
file_id = real_file_id
# pylint: disable=maybe-no-member
request = service.files().get_media(fileId=file_id)
file = io.BytesIO()
downloader = MediaIoBaseDownload(file, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(F'Download {int(status.progress() * 100)}.')
except HttpError as error:
print(F'An error occurred: {error}')
file = None
return file.getvalue()
if __name__ == '__main__':
download_file(real_file_id='1KuPmvGq8yoYgbfW74OENMCB5H0n_2Jm9')
This bears the question:
How do we get the file ID to download the file?
Generally speaking, a URL from a shared file from Google Drive looks like this
https://drive.google.com/file/d/1HV6vf8pB-EYnjcJcH65eGZVMa2v2tcMh/view?usp=sharing
where 1HV6vf8pB-EYnjcJcH65eGZVMa2v2tcMh corresponds to fileID.
You can simply copy it from the URL or, if you prefer, it's also possible to create a function to get the fileID from the URL.
For instance, given the following url = https://drive.google.com/file/d/1HV6vf8pB-EYnjcJcH65eGZVMa2v2tcMh/view?usp=sharing,
def url_to_id(url):
x = url.split("/")
return x[5]
Printing x will give
['https:', '', 'drive.google.com', 'file', 'd', '1HV6vf8pB-EYnjcJcH65eGZVMa2v2tcMh', 'view?usp=sharing']
And so, as we want to return the 6th array value, we use x[5].
This has also been described above,
from pydrive.auth import GoogleAuth
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
This creates its own server too do the dirty work of authenticating
file_obj = drive.CreateFile({'id': '<Put the file ID here>'})
file_obj.GetContentFile('Demo.txt')
This downloads the file
import requests
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id , 'confirm': 1 }, stream = True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
save_response_content(response, destination)
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
if __name__ == "__main__":
file_id = 'TAKE ID FROM SHAREABLE LINK'
destination = 'DESTINATION FILE ON YOUR DISK'
download_file_from_google_drive(file_id, destination)
Just repeating the accepted answer but adding confirm=1 parameter so it always downloads even if the file is too big
# Importing [PyDrive][1] OAuth
from pydrive.auth import GoogleAuth
def download_tracking_file_by_id(file_id, download_dir):
gauth = GoogleAuth(settings_file='../settings.yaml')
# Try to load saved client credentials
gauth.LoadCredentialsFile("../credentials.json")
if gauth.credentials is None:
# Authenticate if they're not there
gauth.LocalWebserverAuth()
elif gauth.access_token_expired:
# Refresh them if expired
gauth.Refresh()
else:
# Initialize the saved creds
gauth.Authorize()
# Save the current credentials to a file
gauth.SaveCredentialsFile("../credentials.json")
drive = GoogleDrive(gauth)
logger.debug("Trying to download file_id " + str(file_id))
file6 = drive.CreateFile({'id': file_id})
file6.GetContentFile(download_dir+'mapmob.zip')
zipfile.ZipFile(download_dir + 'test.zip').extractall(UNZIP_DIR)
tracking_data_location = download_dir + 'test.json'
return tracking_data_location
The above function downloads the file given the file_id to a specified downloads folder. Now the question remains, how to get the file_id? Simply split the url by id= to get the file_id.
file_id = url.split("id=")[1]
I tried using google Colaboratory: https://colab.research.google.com/
Suppose your sharable link is https://docs.google.com/spreadsheets/d/12hiI0NK7M0KEfscMfyBaLT9gxcZMleeu/edit?usp=sharing&ouid=102608702203033509854&rtpof=true&sd=true
all you need is id that is 12hiI0NK7M0KEfscMfyBaLT9gxcZMleeu
command in cell
!gdown 12hiI0NK7M0KEfscMfyBaLT9gxcZMleeu
run the cell and you will see that file is downloaded in /content/Amazon_Reviews.xlsx
Note: one should know how to use Google colab
This example is based on an similar to RayB, but keeps the file in memory
and is a little simpler, and you can paste it into colab and it works.
import googleapiclient.discovery
import oauth2client.client
from google.colab import auth
auth.authenticate_user()
def download_gdrive(id):
creds = oauth2client.client.GoogleCredentials.get_application_default()
service = googleapiclient.discovery.build('drive', 'v3', credentials=creds)
return service.files().get_media(fileId=id).execute()
a = download_gdrive("1F-yaQB8fdsfsdafm2l8WFjhEiYSHZrCcr")
You can install https://pypi.org/project/googleDriveFileDownloader/
pip install googleDriveFileDownloader
And download the file, here is the sample code to download
from googleDriveFileDownloader import googleDriveFileDownloader
a = googleDriveFileDownloader()
a.downloadFile("https://drive.google.com/uc?id=1O4x8rwGJAh8gRo8sjm0kuKFf6vCEm93G&export=download")

Categories