google.api_core.exceptions.InvalidArgument: 400 RecognitionAudio not set - python

I am having a problem with the Google Cloud Speech API, every time I run the script the error
six.raise_from (exceptions.from_grpc_error (exc), exc) occurs
File "<string>", line 3, in raise_from
google.api_core.exceptions.InvalidArgument: 400 RecognitionAudio not set.
he doesn't seem to recognize RecognitionAudio for some reason, I already checked the API documentation but I couldn't solve the problem
I am not understanding the reason for the error, I will leave my code here in case anyone knows and can help me, thanks
import telebot
import requests
from pydub import AudioSegment
import os
import io
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./chatbot.json"
token = "1233361335"
bot = telebot.TeleBot(token)
downloadAudio = "https://api.telegram.org/file/bot{token}/".format(token = token)
#bot.message_handler(commands=['start'])
def send_welcome(message):
bot.reply_to(message, "welcome")
#bot.message_handler(content_types=['voice'])
def handlerAudio(message):
#get audio from telegram
messageVoice = message.voice
#get download link
audioPath = bot.get_file(messageVoice.file_id).file_path
audioLink = downloadAudio+audioPath
#download file
audioFile = requests.get(audioLink)
audioName = "audio.ogg"
#save locally
open(audioName, 'wb').write(audioFile.content)
#convert format to .WAV
AudioSegment.from_file(audioName).export("audio.wav", format="wav")
sound = AudioSegment.from_wav("audio.wav")
sound = sound.set_channels(1) #convert mono
sound.export("audio.wav", format="wav")
client = speech.SpeechClient()
with io.open("audio.wav", 'rb') as audio_file:
content = audio_file.read()
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=48000,
language_code='pt-BR')
response = client.recognize(config, audio)
for result in response.results:
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
#bot.reply_to(message, result.alternatives[0].transcript)
bot.polling()

Related

how to send media album from local storage on telebot?

I'm trying to make a telegram bot that sends media from local storage and i got this error.
Also if there is a list with over 10 items on it and you try to send as a album does telegram automatically seperates them to different album to send it?
A request to the Telegram API was unsuccessful. Error code: 400. Description: Bad Request: wrong HTTP URL specified
import telebot
import glob
import os
from telebot.types import InputMediaPhoto, InputMediaVideo
bot = telebot.TeleBot("")
#bot.message_handler(commands=['test'])
def test(message):
id = message.chat.id
path = "./vid/*.mp4"
vid_media = []
#i might remove this i think this is not needed
for files in glob.glob(path):
print(files)
for i in os.listdir("./vid/"):
vid_media.append(InputMediaVideo(i))
bot.send_message(id, "Sending videos...")
for i in vid_media:
with open(i, 'rb') as f:
bot.send_media_group(id, vid_media)
bot.polling()
It can't use directly path to local file.
You have to send content as bytes:
with open(filename, 'rb') as fh: # open in `byte mode`
data = fh.read() # read bytes
media = InputMediaVideo(data) # put bytes
vid_media.append(media)
Full working code:
import os
import glob
import telebot
from telebot.types import InputMediaPhoto, InputMediaVideo
TOKEN = os.getenv('TELEGRAM_TOKEN')
#print('TOKEN:', TOKEN)
bot = telebot.TeleBot(TOKEN)
#bot.message_handler(commands=['test'])
def test(message):
chat_id = message.chat.id
path = "./vid/*.mp4"
vid_media = []
for filename in glob.glob(path):
print('read:', filename)
with open(filename, 'rb') as fh:
data = fh.read()
media = InputMediaVideo(data)
vid_media.append(media)
bot.send_message(chat_id, "Sending videos...")
bot.send_media_group(chat_id, vid_media)
bot.polling()
EDIT:
Different modules may have different functionalities.
This code uses module telebot (pyTelegramBotAPI) and it can't use local path
but it seems module telegram can use pathlib.Path with local path in its InputMediaVideo.

dialogflow - ImportError: cannot import name 'AgentsClient'

I'm a noob with dialogue flow. Here is the audio-to-text code that I'm using.
I googled the error but couldn't find a solution. Any help is appreciated. Thank you!
import os
import dialogflow_v2 as dialogflow
from dialogflow_v2 import AgentsClient
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"D:\Resume building\gcloudtstuff24Jan2020\testing-302710-cc7fef4033ff.json"
project_id = '*******'
session_id = "ayesha"
audio_file_path = r'C:\Users\ayesha\Downloads\sample - mp3 inserted.wav'
language_code = 'en'
def detect_intent_audio(project_id, session_id, audio_file_path, language_code):
"""Returns the result of detect intent with an audio file as input.
Using the same `session_id` between requests allows continuation
of the conversation."""
from google.cloud import dialogflow
session_client = dialogflow.SessionsClient()
# Note: hard coding audio_encoding and sample_rate_hertz for simplicity.
audio_encoding = dialogflow.AudioEncoding.AUDIO_ENCODING_LINEAR_16
sample_rate_hertz = 16000
session = session_client.session_path(project_id, session_id)
print("Session path: {}\n".format(session))
with open(audio_file_path, "rb") as audio_file:
input_audio = audio_file.read()
audio_config = dialogflow.InputAudioConfig(
audio_encoding=audio_encoding,
language_code=language_code,
sample_rate_hertz=sample_rate_hertz,
)
query_input = dialogflow.QueryInput(audio_config=audio_config)
request = dialogflow.DetectIntentRequest(
session=session,
query_input=query_input,
input_audio=input_audio,
)
response = session_client.detect_intent(request=request)
print("=" * 20)
print("Query text: {}".format(response.query_result.query_text))
print(
"Detected intent: {} (confidence: {})\n".format(
response.query_result.intent.display_name,
response.query_result.intent_detection_confidence,
)
)
print("Fulfillment text: {}\n".format(response.query_result.fulfillment_text))
detect_intent_audio(project_id, session_id, audio_file_path, language_code) dialogflow services.
And I am getting
ImportError: cannot import name 'AgentsClient'
I was creating my json file directly from the project I already have in my google cloud. I created a new project in the dialogue flow and it worked.

How to attach large files to an email using Python - Gmail API

I'm trying to send an email with an attachment (ideally multiple attachments) that are larger than 10 MB and smaller than the limit of 25 MB in total. The reason I mention 10 MB is because it seems to be the lower bound for when the normal way of attaching files stops working and you get Error 10053.
I've read in the documentation that the best way to do this would be by using the resumable upload method but I haven't been able to get it to work nor have I been able to find any good examples in Python. Most of the SO questions on this simply link back to the documentation which doesn't have a Python example or their code resulted in other errors.
I'm looking for an explanation in Python because I want to make sure I understand it correctly.
Questions I've looked through:
Attaching a file using Resumable upload w/ Gmail API
Gmail Api resumable upload Rest( attachment larger than 5MB)
using /upload urls with Google API client
How to upload large messages to Gmail
Error 10053 When Sending Large Attachments using Gmail API
Sending email via gmail & python
MIMEMultipart, MIMEText, MIMEBase, and payloads for sending email with file attachment in Python
Code:
import base64
import json
import os
from email import utils, encoders
from email.message import EmailMessage
from email.mime import application, multipart, text, base, image, audio
import mimetypes
from apiclient import errors
from googleapiclient import discovery, http
from google.oauth2 import service_account
def send_email(email_subject, email_body, email_sender='my_service_account#gmail.com', email_to='', email_cc='', email_bcc='', files=None):
# Getting credentials
with open(os.environ.get('SERVICE_KEY_PASSWORD')) as f:
service_account_info = json.loads(f.read())
# Define which scopes we're trying to access
SCOPES = ['https://www.googleapis.com/auth/gmail.send']
# Setting up credentials using the gmail api
credentials = service_account.Credentials.from_service_account_info(service_account_info, scopes=SCOPES)
# This allows us to assign an alias account to the message so that the messages aren't coming from 'ServiceDriod-8328balh blah blah'
delegated_credentials = credentials.with_subject(email_sender)
# 'Building' the service instance using the credentials we've passed
service = discovery.build(serviceName='gmail', version='v1', credentials=delegated_credentials)
# Building out the email
message = multipart.MIMEMultipart()
message['to'] = email_to
message['from'] = email_sender
message['date'] = utils.formatdate(localtime=True)
message['subject'] = email_subject
message['cc'] = email_cc
message['bcc'] = email_bcc
message.attach(text.MIMEText(email_body, 'html'))
for f in files or []:
mimetype, encoding = mimetypes.guess_type(f)
# If the extension is not recognized it will return: (None, None)
# If it's an .mp3, it will return: (audio/mp3, None) (None is for the encoding)
# For an unrecognized extension we set mimetype to 'application/octet-stream' so it won't return None again.
if mimetype is None or encoding is not None:
mimetype = 'application/octet-stream'
main_type, sub_type = mimetype.split('/', 1)
# Creating the attachement:
# This part is used to tell how the file should be read and stored (r, or rb, etc.)
if main_type == 'text':
print('text')
with open(f, 'rb') as outfile:
attachement = text.MIMEText(outfile.read(), _subtype=sub_type)
elif main_type == 'image':
print('image')
with open(f, 'rb') as outfile:
attachement = image.MIMEImage(outfile.read(), _subtype=sub_type)
elif main_type == 'audio':
print('audio')
with open(f, 'rb') as outfile:
attachement = audio.MIMEAudio(outfile.read(), _subtype=sub_type)
elif main_type == 'application' and sub_type == 'pdf':
with open(f, 'rb') as outfile:
attachement = application.MIMEApplication(outfile.read(), _subtype=sub_type)
else:
attachement = base.MIMEBase(main_type, sub_type)
with open(f, 'rb') as outfile:
attachement.set_payload(outfile.read())
encoders.encode_base64(attachement)
attachement.add_header('Content-Disposition', 'attachment', filename=os.path.basename(f))
message.attach(attachement)
media_body = http.MediaFileUpload(files[0], chunksize=500, resumable=True)
print('Uploading large file...')
body = {'raw': base64.urlsafe_b64encode(message.as_bytes()).decode()}
message = (service.users().messages().send(userId='me', body=body, media_body=media_body).execute())
Note: Right now, in the MediaFileUpload I'm using files[0] because I'm only using one file for testing and I just wanted to attach one file for now until it works.
Error:
Exception has occurred: ResumableUploadError
<HttpError 400 "Bad Request">
File "C:\Users\CON01599\AppData\Local\Continuum\anaconda3\Lib\site-packages\googleapiclient\http.py", line 927, in next_chunk
raise ResumableUploadError(resp, content)
File "C:\Users\CON01599\AppData\Local\Continuum\anaconda3\Lib\site-packages\googleapiclient\_helpers.py", line 130, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Users\CON01599\AppData\Local\Continuum\anaconda3\Lib\site-packages\googleapiclient\http.py", line 822, in execute
_, body = self.next_chunk(http=http, num_retries=num_retries)
File "C:\Users\CON01599\AppData\Local\Continuum\anaconda3\Lib\site-packages\googleapiclient\_helpers.py", line 130, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Users\CON01599\Documents\GitHub\pipelines\components\email\send_email.py", line 105, in send_email
message = (service.users().messages().send(userId='me', body=body, media_body=media_body).execute())
Answer:
import base64
import io
import json
import os
from email import utils, encoders
from email.message import EmailMessage
from email.mime import application, multipart, text, base, image, audio
import mimetypes
from apiclient import errors
from googleapiclient import discovery, http
from google.oauth2 import service_account
def get_environment_variables():
""" Retrieves the environment variables and returns them in
a dictionary object.
"""
env_var_dict = {
'to': os.environ.get('TO'),
'subject': os.environ.get('SUBJECT'),
'body': os.environ.get('BODY'),
'file': os.environ.get('FILE')
}
return env_var_dict
def send_email(email_subject, email_body, email_sender='my_service_account#gmail.com', email_to='', email_cc='', email_bcc='', files=None):
# Pulling in the string value of the service key from the parameter
with open(os.environ.get('SERVICE_KEY_PASSWORD')) as f:
service_account_info = json.loads(f.read())
# Define which scopes we're trying to access
SCOPES = ['https://www.googleapis.com/auth/gmail.send']
# Setting up credentials using the gmail api
credentials = service_account.Credentials.from_service_account_info(service_account_info, scopes=SCOPES)
# This allows us to assign an alias account to the message so that the messages aren't coming from 'ServiceDriod-8328balh blah blah'
delegated_credentials = credentials.with_subject(email_sender)
# 'Building' the service instance using the credentials we've passed
service = discovery.build(serviceName='gmail', version='v1', credentials=delegated_credentials)
# Building out the email
message = multipart.MIMEMultipart()
message['to'] = email_to
message['from'] = email_sender
message['date'] = utils.formatdate(localtime=True)
message['subject'] = email_subject
message['cc'] = email_cc
message['bcc'] = email_bcc
message.attach(text.MIMEText(email_body, 'html'))
for f in files or []:
f = f.strip(' ')
mimetype, encoding = mimetypes.guess_type(f)
# If the extension is not recognized it will return: (None, None)
# If it's an .mp3, it will return: (audio/mp3, None) (None is for the encoding)
# For an unrecognized extension we set mimetype to 'application/octet-stream' so it won't return None again.
if mimetype is None or encoding is not None:
mimetype = 'application/octet-stream'
main_type, sub_type = mimetype.split('/', 1)
# Creating the attachement:
# This part is used to tell how the file should be read and stored (r, or rb, etc.)
if main_type == 'text':
print('text')
with open(f, 'rb') as outfile:
attachement = text.MIMEText(outfile.read(), _subtype=sub_type)
elif main_type == 'image':
print('image')
with open(f, 'rb') as outfile:
attachement = image.MIMEImage(outfile.read(), _subtype=sub_type)
elif main_type == 'audio':
print('audio')
with open(f, 'rb') as outfile:
attachement = audio.MIMEAudio(outfile.read(), _subtype=sub_type)
elif main_type == 'application' and sub_type == 'pdf':
with open(f, 'rb') as outfile:
attachement = application.MIMEApplication(outfile.read(), _subtype=sub_type)
else:
attachement = base.MIMEBase(main_type, sub_type)
with open(f, 'rb') as outfile:
attachement.set_payload(outfile.read())
encoders.encode_base64(attachement)
attachement.add_header('Content-Disposition', 'attachment', filename=os.path.basename(f))
message.attach(attachement)
media_body = http.MediaIoBaseUpload(io.BytesIO(message.as_bytes()), mimetype='message/rfc822', resumable=True)
body_metadata = {} # no thread, no labels in this example
try:
print('Uploading file...')
response = service.users().messages().send(userId='me', body=body_metadata, media_body=media_body).execute()
print(response)
except errors.HttpError as error:
print('An error occurred when sending the email:\n{}'.format(error))
if __name__ == '__main__':
env_var_dict = get_environment_variables()
print("Sending email...")
send_email(email_subject=env_var_dict['subject'],
email_body=env_var_dict['body'],
email_to=env_var_dict['to'],
files=env_var_dict['file'].split(','))
print("Email sent!")
The issue you're having here is that your MediaUpload is a single attachment.
Instead of uploading a single attachment as a resumable MediaUpload, you need to upload the entire RFC822 message as a resumable MediaUpload.
In other words:
import ...
...
from io import BytesIO
from googleapiclient.http import MediaIoBaseUpload
SCOPES = [ 'scopes' ]
creds = get_credentials_somehow()
gmail = get_authed_service_somehow()
msg = create_rfc822_message(headers, email_body)
to_attach = get_attachment_paths_from_dir('../reports/tps/memos/2019/04')
add_attachments(msg, to_attach)
media = MediaIoBaseUpload(BytesIO(msg.as_bytes()), mimetype='message/rfc822', resumable=True)
body_metadata = {} # no thread, no labels in this example
resp = gmail.users().messages().send(userId='me', body=body_metadata, media_body=media).execute()
print(resp)
# { "id": "some new id", "threadId": "some new thread id", "labelIds": ["SENT"]}
I pieced this together from your provided code, reviewing this GitHub issue and Google's Inbox-to-Gmail email importer, specificially this bit.
When sending replies to existing messages, you will almost certainly have some sort of metadata that you should provide to help Gmail keep track of your new response and the original conversation. Namely, instead of an empty body parameter, you would pass informative metadata such as
body_metadata = { 'labelIds': [
"your label id here",
"another label id" ],
'threadId': "some thread id you took from the message you're replying to"
}
Other good refs:
API Client's Gmail PyDoc
Actual code used
You mention the attachment being larger than 10Mb, but you don't mention it being smaller than 25Mb: there's a limitation to gmail that attachments can't be larger than 25Mb, so if this is your case, there's simply no way to get this done, as it is beyond gmail limitations.
The explanation can be found here.
Can you confirm that your attachment is not too large?

Is it possible to use Google vision API offline in Python?

I have a simple Python application which uses Google vision API. How can I make it to work offline? I searched a lot but couldn't find anything useful. This question is about android app and it seems that for that case the answer is positive. Here is my code:
from google.cloud import vision
from google.cloud.vision import types
from google.oauth2 import service_account
credentials=service_account.Credentials.from_service_account_file('key.json')
client = vision.ImageAnnotatorClient(credentials=credentials)
with io.open('my_figure.jpg', 'rb') as image_file:
content = image_file.read()
image_context = types.ImageContext(language_hints =["en"])
image = types.Image(content=content)
response = client.text_detection(image=image, image_context=image_context)
texts = response.text_annotations
for text in texts:
print('\n"{}"'.format(text.description))

Python: download files from google drive using url

I am trying to download files from google drive and all I have is the drive's URL.
I have read about google API that talks about some drive_service and MedioIO, which also requires some credentials( mainly JSON file/OAuth). But I am unable to get any idea about how it is working.
Also, tried urllib2.urlretrieve, but my case is to get files from the drive. Tried wget too but no use.
Tried PyDrive library. It has good upload functions to drive but no download options.
Any help will be appreciated.
Thanks.
If by "drive's url" you mean the shareable link of a file on Google Drive, then the following might help:
import requests
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
save_response_content(response, destination)
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
if __name__ == "__main__":
file_id = 'TAKE ID FROM SHAREABLE LINK'
destination = 'DESTINATION FILE ON YOUR DISK'
download_file_from_google_drive(file_id, destination)
The snipped does not use pydrive, nor the Google Drive SDK, though. It uses the requests module (which is, somehow, an alternative to urllib2).
When downloading large files from Google Drive, a single GET request is not sufficient. A second one is needed - see wget/curl large file from google drive.
I recommend gdown package.
pip install gdown
Take your share link
https://drive.google.com/file/d/0B9P1L--7Wd2vNm9zMTJWOGxobkU/view?usp=sharing
and grab the id - eg. 1TLNdIufzwesDbyr_nVTR7Zrx9oRHLM_N by pressing the download button (look for at the link), and swap it in after the id below.
import gdown
url = 'https://drive.google.com/uc?id=0B9P1L--7Wd2vNm9zMTJWOGxobkU'
output = '20150428_collected_images.tgz'
gdown.download(url, output, quiet=False)
Having had similar needs many times, I made an extra simple class GoogleDriveDownloader starting on the snippet from #user115202 above. You can find the source code here.
You can also install it through pip:
pip install googledrivedownloader
Then usage is as simple as:
from google_drive_downloader import GoogleDriveDownloader as gdd
gdd.download_file_from_google_drive(file_id='1iytA1n2z4go3uVCwE__vIKouTKyIDjEq',
dest_path='./data/mnist.zip',
unzip=True)
This snippet will download an archive shared in Google Drive. In this case 1iytA1n2z4go3uVCwE__vIKouTKyIDjEq is the id of the sharable link got from Google Drive.
Here's an easy way to do it with no third-party libraries and a service account.
pip install google-api-core and google-api-python-client
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google.oauth2 import service_account
import io
credz = {} #put json credentials her from service account or the like
# More info: https://cloud.google.com/docs/authentication
credentials = service_account.Credentials.from_service_account_info(credz)
drive_service = build('drive', 'v3', credentials=credentials)
file_id = '0BwwA4oUTeiV1UVNwOHItT0xfa2M'
request = drive_service.files().get_media(fileId=file_id)
#fh = io.BytesIO() # this can be used to keep in memory
fh = io.FileIO('file.tar.gz', 'wb') # this can be used to write to disk
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
PyDrive allows you to download a file with the function GetContentFile(). You can find the function's documentation here.
See example below:
# Initialize GoogleDriveFile instance with file id.
file_obj = drive.CreateFile({'id': '<your file ID here>'})
file_obj.GetContentFile('cats.png') # Download file as 'cats.png'.
This code assumes that you have an authenticated drive object, the docs on this can be found here and here.
In the general case this is done like so:
from pydrive.auth import GoogleAuth
gauth = GoogleAuth()
# Create local webserver which automatically handles authentication.
gauth.LocalWebserverAuth()
# Create GoogleDrive instance with authenticated GoogleAuth instance.
drive = GoogleDrive(gauth)
Info on silent authentication on a server can be found here and involves writing a settings.yaml (example: here) in which you save the authentication details.
There's in the docs a function that downloads a file when we provide an ID of the file to download,
from __future__ import print_function
import io
import google.auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
def download_file(real_file_id):
"""Downloads a file
Args:
real_file_id: ID of the file to download
Returns : IO object with location.
Load pre-authorized user credentials from the environment.
TODO(developer) - See https://developers.google.com/identity
for guides on implementing OAuth2 for the application.
"""
creds, _ = google.auth.default()
try:
# create drive api client
service = build('drive', 'v3', credentials=creds)
file_id = real_file_id
# pylint: disable=maybe-no-member
request = service.files().get_media(fileId=file_id)
file = io.BytesIO()
downloader = MediaIoBaseDownload(file, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(F'Download {int(status.progress() * 100)}.')
except HttpError as error:
print(F'An error occurred: {error}')
file = None
return file.getvalue()
if __name__ == '__main__':
download_file(real_file_id='1KuPmvGq8yoYgbfW74OENMCB5H0n_2Jm9')
This bears the question:
How do we get the file ID to download the file?
Generally speaking, a URL from a shared file from Google Drive looks like this
https://drive.google.com/file/d/1HV6vf8pB-EYnjcJcH65eGZVMa2v2tcMh/view?usp=sharing
where 1HV6vf8pB-EYnjcJcH65eGZVMa2v2tcMh corresponds to fileID.
You can simply copy it from the URL or, if you prefer, it's also possible to create a function to get the fileID from the URL.
For instance, given the following url = https://drive.google.com/file/d/1HV6vf8pB-EYnjcJcH65eGZVMa2v2tcMh/view?usp=sharing,
def url_to_id(url):
x = url.split("/")
return x[5]
Printing x will give
['https:', '', 'drive.google.com', 'file', 'd', '1HV6vf8pB-EYnjcJcH65eGZVMa2v2tcMh', 'view?usp=sharing']
And so, as we want to return the 6th array value, we use x[5].
This has also been described above,
from pydrive.auth import GoogleAuth
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
This creates its own server too do the dirty work of authenticating
file_obj = drive.CreateFile({'id': '<Put the file ID here>'})
file_obj.GetContentFile('Demo.txt')
This downloads the file
import requests
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id , 'confirm': 1 }, stream = True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
save_response_content(response, destination)
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
if __name__ == "__main__":
file_id = 'TAKE ID FROM SHAREABLE LINK'
destination = 'DESTINATION FILE ON YOUR DISK'
download_file_from_google_drive(file_id, destination)
Just repeating the accepted answer but adding confirm=1 parameter so it always downloads even if the file is too big
# Importing [PyDrive][1] OAuth
from pydrive.auth import GoogleAuth
def download_tracking_file_by_id(file_id, download_dir):
gauth = GoogleAuth(settings_file='../settings.yaml')
# Try to load saved client credentials
gauth.LoadCredentialsFile("../credentials.json")
if gauth.credentials is None:
# Authenticate if they're not there
gauth.LocalWebserverAuth()
elif gauth.access_token_expired:
# Refresh them if expired
gauth.Refresh()
else:
# Initialize the saved creds
gauth.Authorize()
# Save the current credentials to a file
gauth.SaveCredentialsFile("../credentials.json")
drive = GoogleDrive(gauth)
logger.debug("Trying to download file_id " + str(file_id))
file6 = drive.CreateFile({'id': file_id})
file6.GetContentFile(download_dir+'mapmob.zip')
zipfile.ZipFile(download_dir + 'test.zip').extractall(UNZIP_DIR)
tracking_data_location = download_dir + 'test.json'
return tracking_data_location
The above function downloads the file given the file_id to a specified downloads folder. Now the question remains, how to get the file_id? Simply split the url by id= to get the file_id.
file_id = url.split("id=")[1]
I tried using google Colaboratory: https://colab.research.google.com/
Suppose your sharable link is https://docs.google.com/spreadsheets/d/12hiI0NK7M0KEfscMfyBaLT9gxcZMleeu/edit?usp=sharing&ouid=102608702203033509854&rtpof=true&sd=true
all you need is id that is 12hiI0NK7M0KEfscMfyBaLT9gxcZMleeu
command in cell
!gdown 12hiI0NK7M0KEfscMfyBaLT9gxcZMleeu
run the cell and you will see that file is downloaded in /content/Amazon_Reviews.xlsx
Note: one should know how to use Google colab
This example is based on an similar to RayB, but keeps the file in memory
and is a little simpler, and you can paste it into colab and it works.
import googleapiclient.discovery
import oauth2client.client
from google.colab import auth
auth.authenticate_user()
def download_gdrive(id):
creds = oauth2client.client.GoogleCredentials.get_application_default()
service = googleapiclient.discovery.build('drive', 'v3', credentials=creds)
return service.files().get_media(fileId=id).execute()
a = download_gdrive("1F-yaQB8fdsfsdafm2l8WFjhEiYSHZrCcr")
You can install https://pypi.org/project/googleDriveFileDownloader/
pip install googleDriveFileDownloader
And download the file, here is the sample code to download
from googleDriveFileDownloader import googleDriveFileDownloader
a = googleDriveFileDownloader()
a.downloadFile("https://drive.google.com/uc?id=1O4x8rwGJAh8gRo8sjm0kuKFf6vCEm93G&export=download")

Categories