PyDrive upload speed is too slow - python

I'm using PyDrive to upload files from my RPi to a specific folder in my Google Drive. It is successfully working, but the speed is terribly slow. For a .npy file (binary numpy file) that is only 40kB, the upload speed is around 2 seconds. When I try uploading a different file (.pptx) that is 2MB, the upload speed is around 5 seconds. I also tried this on my Mac, and it has the same upload speed.
Is there a better way to do this? I need an upload speed that is less than a second since I'm collecting data every second. Here is the code:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import os
import time
credentials = '/***/pydrive_credentials.txt'
directory = '/***/remote_dir'
gauth = GoogleAuth()
gauth.LoadCredentialsFile(credentials)
# gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
# get id of designated folder in Google Drive
folder = drive.ListFile({'q': "title = 'sample pydrive folder' and trashed=false"}).GetList()[0]
for filename in os.listdir(directory):
f = drive.CreateFile({'title': filename, 'parents': [{'id': folder['id']}]})
# f = drive.CreateFile()
filepath = os.path.join(directory, filename)
f.SetContentFile(filepath)
start = time.time()
f.Upload()
end = time.time()
print(end-start)
# delete file after upload
# os.remove(filepath)
# to ensure no memory leakage
f = None
filepath = None
print("Uploaded: {}".format(filename))

Related

Google API/Python: Delete old files in Google Drive by filename in local folder, upload new filenames from local folder

I have a problem witch was hard to write in the title. I have this script with a lot of help from #Tanaike . This script is doing basically two things:
Deletes files from Google Drive folder by filenames which are in local folder CSVtoGD (using spreadsheet ID's)
then:
Upload list of CSV from local folder "CSVtoGD" to Google Drive folder
I have a big problem now and can not work it out. The script is deleting old files in google drive when there are the same filenames in CSVtoGD. When I add new file to local folder CSVtoGD, there is a error "list index out of range" and I got printed "No files found" like in the script. I was trying to make some modification but it was blind shoots. What I want this script to do is to delete from Google Drive folder ONLY files which are in local CSVtoGD folder and work on with rest of the files in CSVtoGD (just upload them). Anyone have some answer to that? Thank you :)
import gspread
import os
from googleapiclient.discovery import build
gc = gspread.oauth(credentials_filename='/users/user/credentials.json')
service = build("drive", "v3", credentials=gc.auth)
def getSpreadsheetId(filename):
q = "name='" + filename + "' and mimeType='application/vnd.google-apps.spreadsheet' and trashed=false"
res = service.files().list(q=q, fields="files(id)", corpora="allDrives", includeItemsFromAllDrives=True, supportsAllDrives=True).execute()
items = res.get("files", [])
if not items:
print("No files found.")
exit()
return items[0]["id"]
os.chdir('/users/user/CSVtoGD2')
files = os.listdir()
for filename in files:
fname = filename.split(".")
if fname[1] == "csv":
folder_id = '1z_pUvZyt5AoTNy-aKCKLmlNjdR2OPo'
oldSpreadsheetId = getSpreadsheetId(fname[0])
#print(oldSpreadsheetId)
sh = gc.del_spreadsheet(oldSpreadsheetId)
**# IF there are the same filenames in CSVtoGD folder on my Mac
#and the same filenames on Google Drive folder,
#those lines works well.
#Problem is when there are new files in CSVtoGD local folder on Mac.**
sh = gc.create(fname[0], folder_id)
content = open(filename, "r").read().encode("utf-8")
gc.import_csv(sh.id, content)
I believe your goal is as follows.
For example, when sample.csv is existing on your local PC and a Spreadsheet of sample is existing in your Google Drive, you want to delete the Spreadsheet of sample from your Google Drive.
When sample1.csv is existing on your local PC and the Spreadsheet of sample1 is NOT existing in your Google Drive, you want to upload sample1.csv to Google Drive.
In this case, how about the following modification?
Modified script:
import gspread
import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
gc = gspread.oauth(credentials_filename='/users/user/credentials.json')
service = build("drive", "v3", credentials=gc.auth)
folder_id = '1z_pUvZyt5AoTNy-aKCKLmlNjdR2OPo' # Please set the folder ID you want to upload the file.
def getSpreadsheetId(filename, filePath):
q = "name='" + filename + "' and mimeType='application/vnd.google-apps.spreadsheet' and trashed=false"
res = service.files().list(q=q, fields="files(id)", corpora="allDrives", includeItemsFromAllDrives=True, supportsAllDrives=True).execute()
items = res.get("files", [])
if not items:
print("No files found.")
file_metadata = {
"name": filename,
"parents": [folder_id],
"mimeType": "application/vnd.google-apps.spreadsheet",
}
media = MediaFileUpload(filePath + "/" + filename + ".csv")
file = service.files().create(body=file_metadata, media_body=media, fields="id").execute()
id = file.get("id")
print("File was uploaded. The file ID is " + id)
exit()
return items[0]["id"]
filePath = '/users/user/CSVtoGD2'
os.chdir(filePath)
files = os.listdir()
for filename in files:
fname = filename.split(".")
if fname[1] == "csv":
oldSpreadsheetId = getSpreadsheetId(fname[0], filePath)
print(oldSpreadsheetId)
sh = gc.del_spreadsheet(oldSpreadsheetId)
sh = gc.create(fname[0], folder_id)
content = open(filename, "r").read().encode("utf-8")
gc.import_csv(sh.id, content)
When this script is run, the above flow is run.
Note:
In this modification, the CSV file is uploaded as a Google Spreadsheet. From your question, I thought that this might be your expected result. But, if you want to upload the CSV file as the CSV file, please remove "mimeType": "application/vnd.google-apps.spreadsheet", from file_metadata.
If an error related to the scope, please add the scope of https://www.googleapis.com/auth/drive and authorize the scopes again and test it again.
Reference:
Upload file data

Unzip Password Protected Zip file automatically from azure storage?

I'm just wondering is there a way to extract a password protected zip file from Azure Storage.
I tried using a python Azure Function to no avail but had a problem reading the location of the file.
Would the file have to stored on a shared location temporarily in order to achieve?
Just looking for a bit of direction here am I missing a step maybe?
Regards,
James
Azure blob storage provides storing functionality only, there is no running env to perform unzip operation. So basically, we should download .zip file to Azure function, unzip it and upload files in .zip file 1 by 1.
For a quick test, I write an HTTP trigger Azure function demo that unzipping a zip file with password-protected, it works for me on local :
import azure.functions as func
import uuid
import os
import shutil
from azure.storage.blob import ContainerClient
from zipfile import ZipFile
storageAccountConnstr = '<storage account conn str>'
container = '<container name>'
#define local temp path, on Azure, the path is recommanded under /home
tempPathRoot = 'd:/temp/'
unZipTempPathRoot = 'd:/unZipTemp/'
def main(req=func.HttpRequest) -> func.HttpResponse:
reqBody = req.get_json()
fileName = reqBody['fileName']
zipPass = reqBody['password']
container_client = ContainerClient.from_connection_string(storageAccountConnstr,container)
#download zip file
zipFilePath = tempPathRoot + fileName
with open(zipFilePath, "wb") as my_blob:
download_stream = container_client.get_blob_client(fileName).download_blob()
my_blob.write(download_stream.readall())
#unzip to temp folder
unZipTempPath = unZipTempPathRoot + str(uuid.uuid4())
with ZipFile(zipFilePath) as zf:
zf.extractall(path=unZipTempPath,pwd=bytes(zipPass,'utf8'))
#upload all files in temp folder
for root, dirs, files in os.walk(unZipTempPath):
for file in files:
filePath = os.path.join(root, file)
destBlobClient = container_client.get_blob_client(fileName + filePath.replace(unZipTempPath,''))
with open(filePath, "rb") as data:
destBlobClient.upload_blob(data,overwrite=True)
#remove all temp files
shutil.rmtree(unZipTempPath)
os.remove(zipFilePath)
return func.HttpResponse("done")
Files in my container:
Result:
Using blob triggers will be better to do this as it will cause time-out errors if the size of your zip file is huge.
Anyway, this is only a demo that shows you how to do this.

How to work with Google Colab efficiently?

I try to train a neural network on Colab using a GPU there. I am now wondering if I am on the right pave and if all the steps I am doing are necessary, because the process I am following does not appear very efficient to me.
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
import os
# choose a local (colab) directory to store the data.
local_root_path = os.path.expanduser("~/data")
try:
os.makedirs(local_root_path)
except: pass
def ListFolder(google_drive_id, destination):
file_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % google_drive_id}).GetList()
counter = 0
for f in file_list:
# If it is a directory then, create the dicrectory and upload the file inside it
if f['mimeType']=='application/vnd.google-apps.folder':
folder_path = os.path.join(destination, f['title'])
os.makedirs(folder_path)
print('creating directory {}'.format(folder_path))
ListFolder(f['id'], folder_path)
else:
fname = os.path.join(destination, f['title'])
f_ = drive.CreateFile({'id': f['id']})
f_.GetContentFile(fname)
counter += 1
print('{} files were uploaded in {}'.format(counter, destination))
ListFolder("1s1Ks_Gf_cW-F-RwXFjBu96svbmqiXB0o", local_root_path)
This commands allow to connect the Notebook in Colab with my Google Drive and stores the data in Colab. Because I have a lot of images (more than 180k) the storage of the data in Colab takes very, very long and partially the connection breaks. I am now wondering if it is necessray to upload all the data from my Google Drive to Colab?
If no, what do I have to do instead to work with the data from Google Drive?
If yes, is there a way to do this more efficiently?
Or is there maybe a completely different way I should work with Colab?
You can access files directly on your Google drive without copying them into Notebook environment.
Execute this code in one cell:
from google.colab import drive
drive.mount('/content/gdrive')
And try:
!ls /content/gdrive
Now you can copy your files from/to /content/gdrive directory and they will appear in your Google Drive.

google drive api to upload all pdfs to google drive

I am using the pydrive to upload pdf files to my google drive folder. I am wanting to send all *pdf files in a local folder at once with this code but not sure where to go from here? Should I use glob? If so I would like to see an example, please.
working code that sends 1 file to the designated google drive folder:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(g_login)
folder_id = 'google_drive_id_goes_here'
f = drive.CreateFile({'title': 'testing_pdf',
'mimeType': 'application/pdf',
'parents': [{'kind': 'drive#fileLink', 'id':folder_id}]})
f.SetContentFile('/Users/Documents/python/google_drive/testing.pdf')
f.Upload()
You cant upload files at once. Create file with the API is a single thing and pydrive as no mechanism for uploading more then one .
Your going to have to put this in a loop and upload each file as you go.
import os
directory = 'the/directory/you/want/to/use'
for filename in os.listdir(directory):
if filename.endswith(".txt"):
f = open(filename)
lines = f.read()
print (lines[10])
continue
else:
continue

How to upload dataset in google colaboratory?

I need to upload dataset of images in google colaboratory. It has subfolder inside it which contains images. Whatever I found on the net was for the single file.
from google.colab import files
uploaded = files.upload()
Is there any way to do it?
For uploading data to Colab, you have three methods.
Method 1
You can directly upload file or directory in Colab UI
The data is saved in Colab local machine. In my experiment, there are three features:
1) the upload speed is good.
2) it will remain directory structure but it will not unzip directly. You need to execute this code in Colab cell
!makedir {dir_name}
!unzip {zip_file} -d {dir_name}
3) Most importantly, when Colab crashes, the data will be deleted.
Method 2
Execute the code in Colab cell:
from google.colab import files
uploaded = files.upload()
In my experiment, when you run the cell, it appears the upload button. and when the cell executing indicator is still running, you choose a file. 1) After execution, the file name will appear in the result panel. 2)Refresh Colab files, you will see the file. 3) Or execute !ls, you shall see you file. If not, the file is not uploaded successfully.
Method 3
If your data is from kaggle, you can use Kaggle API to download data to Colab local directory.
Method 4
Upload data to Google Drive, you can use 1)Google Drive Web Browser or 2) Drive API (https://developers.google.com/drive/api/v3/quickstart/python). To access drive data, use the following code in Colab.
from google.colab import drive
drive.mount('/content/drive')
I would recommend uploading data to Google Drive because it is permanent.
You need to copy your dataset into Google Drive. Then obtain the DATA_FOLDER_ID.
The best way to do so, is to open the folder in your Google Drive and copy the last part of html address. For example the folder id for the link:
https://drive.google.com/drive/folders/xxxxxxxxxxxxxxxxxxxxxxxx is xxxxxxxxxxxxxxxxxxxxxxxx
Then you can create local folders and upload each file recursively.
DATA_FOLDER_ID = 'xxxxxxxxxxxxxxxxxxxxxxxx'
ROOT_PATH = '~/you_path'
!pip install -U -q PyDrive
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
# choose a local (colab) directory to store the data.
local_root_path = os.path.expanduser(ROOT_PATH)
try:
os.makedirs(local_root_path)
except: pass
def ListFolder(google_drive_id, destination):
file_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % google_drive_id}).GetList()
counter = 0
for f in file_list:
# If it is a directory then, create the dicrectory and upload the file inside it
if f['mimeType']=='application/vnd.google-apps.folder':
folder_path = os.path.join(destination, f['title'])
os.makedirs(folder_path)
print('creating directory {}'.format(folder_path))
ListFolder(f['id'], folder_path)
else:
fname = os.path.join(destination, f['title'])
f_ = drive.CreateFile({'id': f['id']})
f_.GetContentFile(fname)
counter += 1
print('{} files were uploaded in {}'.format(counter, destination))
ListFolder(DATA_FOLDER_ID, local_root_path)

Categories