how to delete all files with a specific extension in google drive

how to delete all files with a specific extension in google drive - python

I have about 200k files with .tif extension in my root folder of google drive that i need to delete.
The python code i wrote only transfers / deletes the few files that we can see at an instance (we need to scroll down in the drive and let them 'load' to see more of them)
I am willing to delete all other files as well if there is a shortcut to do so.
Cntl + A does not work either, it just selects a same few files that we can see in an instance.
import shutil
import os
source = '/content/gdrive/My Drive'
dest1 = '/content/gdrive/My Drive/toDelete'
files = os.listdir(source)
for f in files:
if (f.endswith(".tif")):
shutil.move(f, dest1)
dir_name = "/content/gdrive/My Drive"
test = os.listdir(dir_name)
for item in test:
if item.endswith(".tif"):
os.remove(os.path.join(dir_name, item))

First you need to search for all the files that contain in the name and are in your root directory once you have those you can start deleting them.
I recommend you test this without the delete first to make sure its listing the files your after I am not responsible for this deleting stuff :)
page_token = None
while True:
response = drive_service.files().list(q="name contains '.tif' and 'root' in parents",
spaces='drive',
fields='nextPageToken, files(id, name)',
pageToken=page_token).execute()
for file in response.get('files', []):
# Process change
print 'Found file: %s (%s)' % (file.get('name'), file.get('id'))
#drive_service.files().delete(fileId=file.get('id')).execute()
page_token = response.get('nextPageToken', None)
if page_token is None:
break

Use glob.
Use pathlib for path manipulation.
import pathlib
import shutil
source = pathlib.Path('/content/gdrive/My Drive')
dest1 = pathlib.Path('/content/gdrive/My Drive/toDelete')
dest1.mkdir(exist_ok=True)
for f in source.glob("*.tif"):
shutil.move(f, dest1.joinpath(f.name))

Related

Defining path for downloaded CSV file in python3

I'm successfully able to download CSV files from a folder on Exavault, using the files provided by Exavault, but the download gets saved in a temporary folder on my Mac.
How do I define the directory to save it to? This is an excerpt of the script.
resources_api = ResourcesApi()
resources_api.api_client.configuration.host = ACCOUNT_URL
try:
list_result = resources_api.list_resources(
API_KEY, ACCESS_TOKEN, "/files", offset=0, type='file', name='*.csv')
if list_result.returned_results == 0:
print("Found no files to download")
sys.exit(0)
else:
print("Found {} CSV files to download".format(list_result.returned_results))
except Exception as e:
raise e
print('Exception when calling Api:', str(e))
sys.exit(1)
downloads = []
listed_files = list_result.data
for listed_file in listed_files:
downloads.append("id:{}".format(listed_file.id))
print(listed_file.attributes.path)
try:
downloaded_file = resources_api.download(API_KEY, ACCESS_TOKEN, downloads, download_archive_name="sample-csvs")
print("File(s) downloaded to", os.path(downloaded_file))
Separately there's a resources_api.py file, which might hold the answer, but if I edit that, I'm not sure how I would invoke the changes.
Any help would be appreciated.

When looking at the code of the API, you can see that it is written such that it always creates a temporary folder for you:
# Write the file to the temporary folder configured for our client
fd, path = tempfile.mkstemp(dir=self.api_client.configuration.temp_folder_path)
You could check if you can try to change api_client.configuration.temp_folder_path and see if that works for you.
Or even better, just copy the file to a location of your choice. You can do it with shutil
import shutil
import os
targetFolder= "/your/folder/"
filename = os.path.basename(downloaded_file)
destination = os.path.join(targetFolder,filename )
shutil.copy(downloaded_file, destination)

Google Api/Python: Empty folder in Google Drive before uploading new files

I have this script made with big help from #Tanaike. It is doing two things:
Delete old files in Google Drive folder by listing my files in local folder (CSVtoGD), checking names of that files and delete files in Google Drive named the same.
Uploads new csv files to Google Drive
I have a problem with deleting old files in Google Drive. The script is uploading new files and delete old files in GD but If in my local folder (CSVtoGD) there is new file which has never been uploaded to Google Drive I receive error:
HttpError: <HttpError 404 when requesting https://www.googleapis.com/upload/drive/v3/files?fields=id&alt=json&uploadType=multipart returned "File not found: 19vrbvaeDqWcxFGwPV82APWYTmBMEn-hi.". Details: "[{'domain': 'global', 'reason': 'notFound', 'message': 'File not found: 19vrbvaeDqWcxFGwPV82APWYTmBMEn-hi.', 'locationType': 'parameter', 'location': 'fileId'}]">
Script:
import gspread
import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
gc = gspread.oauth(credentials_filename='/users/user/credentials.json')
service = build("drive", "v3", credentials=gc.auth)
folder_id = '19vrbvaeDqWcxFGwPV82APWYTmBME'
def getSpreadsheetId(filename, filePath):
q = "name='" + filename + "' and mimeType='application/vnd.google-apps.spreadsheet' and trashed=false"
res = service.files().list(q=q, fields="files(id)", corpora="allDrives", includeItemsFromAllDrives=True, supportsAllDrives=True).execute()
items = res.get("files", [])
if not items:
print("No files found.")
file_metadata = {
"name": filename,
"parents": [folder_id],
"mimeType": "application/vnd.google-apps.spreadsheet",
}
media = MediaFileUpload(filePath + "/" + filename + ".csv")
file = service.files().create(body=file_metadata, media_body=media, fields="id").execute()
id = file.get("id")
print("File was uploaded. The file ID is " + id)
exit()
return items[0]["id"]
filePath = '/users/user/CSVtoGD'
os.chdir(filePath)
files = os.listdir()
for filename in files:
fname = filename.split(".")
if fname[1] == "csv":
oldSpreadsheetId = getSpreadsheetId(fname[0], filePath)
print(oldSpreadsheetId)
sh = gc.del_spreadsheet(oldSpreadsheetId)
sh = gc.create(fname[0], folder_id)
content = open(filename, "r").read().encode("utf-8")
gc.import_csv(sh.id, content)
I was trying to make it work but none of my efforts work, so I thing I have to change the script to just simply delete all files which are in my Google Drive folder and then, upload the new files, but reading docs of Google API I really can't see where to start as the files ned to be deleted by checking their ID in google drive so I have to somehow list the files ID's and then delete all of them.
Simple in two words: How to delete ALL files in Google Drive folder and then upload new files from local folder (CSVtoGD) - uploading part of the script works great I have problem with deleting old files.

I believe your goal is as follows.
After all files in the folder were deleted, you want to upload the file.
From your showing script, you want to delete Google Spreadsheet files in the folder.
In this case, how about the following modification?
Modified script:
import gspread
import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
gc = gspread.oauth(credentials_filename='/users/user/credentials.json')
service = build("drive", "v3", credentials=gc.auth)
folder_id = '19vrbvaeDqWcxFGwPV82APWYTmBME'
def deleteAllFilesInFolder():
q = "'" + folder_id + "' in parents and mimeType='application/vnd.google-apps.spreadsheet' and trashed=false"
res = service.files().list(q=q, fields="files(id)", corpora="allDrives", includeItemsFromAllDrives=True, supportsAllDrives=True, pageSize=1000).execute()
for e in res.get("files", []):
gc.del_spreadsheet(e["id"])
def uploadCSV(filename, filePath):
file_metadata = {
"name": filename,
"parents": [folder_id],
"mimeType": "application/vnd.google-apps.spreadsheet",
}
media = MediaFileUpload(filePath + "/" + filename + ".csv")
file = service.files().create(body=file_metadata, media_body=media, fields="id", supportsAllDrives=True).execute()
id = file.get("id")
print("File was uploaded. The file ID is " + id)
filePath = '/users/user/CSVtoGD'
os.chdir(filePath)
files = os.listdir()
delete = False
for filename in files:
fname = filename.split(".")
if fname[1] == "csv":
if not delete:
deleteAllFilesInFolder()
delete = True
uploadCSV(fname[0], filePath)
When this script is run, first, all Spreadsheet files in the folder are deleted. And then, the CSV files are uploaded as Google Spreadsheet.
Note:
In this modified script, all Spreadsheet files in the folder are completely deleted. Please be careful about this. So, I would like to recommend testing using the sample Spreadsheet files.
If you want to delete all files including except for Spreadsheet, please modify q = "'" + folder_id + "' in parents and mimeType='application/vnd.google-apps.spreadsheet' and trashed=false" to q = "'" + folder_id + "' in parents and trashed=false".

Google API/Python: Delete old files in Google Drive by filename in local folder, upload new filenames from local folder

I have a problem witch was hard to write in the title. I have this script with a lot of help from #Tanaike . This script is doing basically two things:
Deletes files from Google Drive folder by filenames which are in local folder CSVtoGD (using spreadsheet ID's)
then:
Upload list of CSV from local folder "CSVtoGD" to Google Drive folder
I have a big problem now and can not work it out. The script is deleting old files in google drive when there are the same filenames in CSVtoGD. When I add new file to local folder CSVtoGD, there is a error "list index out of range" and I got printed "No files found" like in the script. I was trying to make some modification but it was blind shoots. What I want this script to do is to delete from Google Drive folder ONLY files which are in local CSVtoGD folder and work on with rest of the files in CSVtoGD (just upload them). Anyone have some answer to that? Thank you :)
import gspread
import os
from googleapiclient.discovery import build
gc = gspread.oauth(credentials_filename='/users/user/credentials.json')
service = build("drive", "v3", credentials=gc.auth)
def getSpreadsheetId(filename):
q = "name='" + filename + "' and mimeType='application/vnd.google-apps.spreadsheet' and trashed=false"
res = service.files().list(q=q, fields="files(id)", corpora="allDrives", includeItemsFromAllDrives=True, supportsAllDrives=True).execute()
items = res.get("files", [])
if not items:
print("No files found.")
exit()
return items[0]["id"]
os.chdir('/users/user/CSVtoGD2')
files = os.listdir()
for filename in files:
fname = filename.split(".")
if fname[1] == "csv":
folder_id = '1z_pUvZyt5AoTNy-aKCKLmlNjdR2OPo'
oldSpreadsheetId = getSpreadsheetId(fname[0])
#print(oldSpreadsheetId)
sh = gc.del_spreadsheet(oldSpreadsheetId)
**# IF there are the same filenames in CSVtoGD folder on my Mac
#and the same filenames on Google Drive folder,
#those lines works well.
#Problem is when there are new files in CSVtoGD local folder on Mac.**
sh = gc.create(fname[0], folder_id)
content = open(filename, "r").read().encode("utf-8")
gc.import_csv(sh.id, content)

I believe your goal is as follows.
For example, when sample.csv is existing on your local PC and a Spreadsheet of sample is existing in your Google Drive, you want to delete the Spreadsheet of sample from your Google Drive.
When sample1.csv is existing on your local PC and the Spreadsheet of sample1 is NOT existing in your Google Drive, you want to upload sample1.csv to Google Drive.
In this case, how about the following modification?
Modified script:
import gspread
import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
gc = gspread.oauth(credentials_filename='/users/user/credentials.json')
service = build("drive", "v3", credentials=gc.auth)
folder_id = '1z_pUvZyt5AoTNy-aKCKLmlNjdR2OPo' # Please set the folder ID you want to upload the file.
def getSpreadsheetId(filename, filePath):
q = "name='" + filename + "' and mimeType='application/vnd.google-apps.spreadsheet' and trashed=false"
res = service.files().list(q=q, fields="files(id)", corpora="allDrives", includeItemsFromAllDrives=True, supportsAllDrives=True).execute()
items = res.get("files", [])
if not items:
print("No files found.")
file_metadata = {
"name": filename,
"parents": [folder_id],
"mimeType": "application/vnd.google-apps.spreadsheet",
}
media = MediaFileUpload(filePath + "/" + filename + ".csv")
file = service.files().create(body=file_metadata, media_body=media, fields="id").execute()
id = file.get("id")
print("File was uploaded. The file ID is " + id)
exit()
return items[0]["id"]
filePath = '/users/user/CSVtoGD2'
os.chdir(filePath)
files = os.listdir()
for filename in files:
fname = filename.split(".")
if fname[1] == "csv":
oldSpreadsheetId = getSpreadsheetId(fname[0], filePath)
print(oldSpreadsheetId)
sh = gc.del_spreadsheet(oldSpreadsheetId)
sh = gc.create(fname[0], folder_id)
content = open(filename, "r").read().encode("utf-8")
gc.import_csv(sh.id, content)
When this script is run, the above flow is run.
Note:
In this modification, the CSV file is uploaded as a Google Spreadsheet. From your question, I thought that this might be your expected result. But, if you want to upload the CSV file as the CSV file, please remove "mimeType": "application/vnd.google-apps.spreadsheet", from file_metadata.
If an error related to the scope, please add the scope of https://www.googleapis.com/auth/drive and authorize the scopes again and test it again.
Reference:
Upload file data

Python error extracting zip file

I only know how to write python for GIS purposes. There is more to this code using arcpy and geoprocessing tools.... this is just the beginning part I'm stuck on that is for trying to get data ready so I can then use the shapefiles within the zipped folder for the rest of my script
I am trying to prompt the user to enter a directory to search through. For use of this script it will be searching for a compressed zip file, then extract all the files to that same directory.
import zipfile, os
# ask what directory to search in
mypath = input("Enter .zip folder path: ")
extension = ".zip"
os.chdir(mypath) # change directory from working dir to dir with files
for item in os.listdir(mypath):
if item.endswith(extension):
filename = os.path.abspath(item)
zip_ref = zipfile.ZipFile(filename)
zip_ref.extractall(mypath)
zip_ref.close()
Tried with y'alls suggestions and still have issues with the following:
import zipfile, os
mypath = input("Enter folder: ")
if os.path.isdir(mypath):
for dirpath, dirname, filenames in os.listdir(mypath):
for file in filenames:
if file.endswith(".zip"):
print(os.path.abspath(file))
with zipfile.ZipFile(os.path.abspath(file)) as z:
z.extractall(mypath)
else:
print("Directory does not exist.")

I'm not sure on the use of arcpy. However...
To iterate over entries in a directory, use os.listdir:
for entry_name in os.listdir(directory_path):
# ...
Inside the loop, entry_name will be the name of an item in the directory at directory_path.
When checking if it ends with ".zip", keep in mind that the comparison is case sensitive. You can use str.lower to effectively ignore case when using str.endswith:
if entry_name.lower().endswith('.zip'):
# ...
To get the full path to the entry (in this case, your .zip), use os.path.join:
entry_path = os.path.join(directory_path, entry_name)
Pass this full path to zipfile.ZipFile.

Your first if-block will negate the else-block if the location is invalid. I'd remove the 'else' operator entirely. If you keep it, the if-check effectively kills the program. The "if folderExist" is sufficient to replace the else.
import arcpy, zipfile, os
# ask what directory to search in
folder = input("Where is the directory? ")
# set workspace as variable so can change location
arcpy.env.workspace = folder
# check if invalid entry - if bad, ask to input different location
if len(folder) == 0:
print("Invalid location.")
new_folder = input("Try another directory?")
new_folder = folder
# does the above replace old location and re set as directory location?
# check to see if folder exists
folderExist = arcpy.Exists(folder)
if folderExist:
# loop through files in directory
for item in folder:
# check for .zip extension
if item.endswith(".zip"):
file_name = os.path.abspath(item) # get full path of files
print(file_name)
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(folder) # extract all to directory
zip_ref.close() # close file
This may be neater if you're okay not using your original code:
import zipfile, os
from tkinter import filedialog as fd
# ask what directory to search in
folder = fd.askdirectory(title="Where is the directory?")
# loop through files in directory
for item in os.listdir(folder):
# check for .zip extension
if zipfile.is_zipfile(item):
file_name = os.path.abspath(item) # get full path of files
# could string combine to ensure path
# file_name = folder + "/" + item
print(file_name)
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(folder) # extract all to directory
zip_ref.close() # close file

[UPDATED]
I was able to solve the same with the following code
import os
import zipfile
mypath = raw_input('Enter Folder: ')
if os.path.isdir(mypath):
for file in os.listdir(mypath):
if file.endswith('.zip'):
with zipfile.ZipFile(os.path.join(mypath, file)) as z:
z.extractall(mypath)
else:
print('Directory does not exist')

Move pairs of files (.txt & .xml) into their corresponding folder using Python

I have been working this challenge for about a day or so. I've looked at multiple questions and answers asked on SO and tried to 'MacGyver' the code used for my purpose, but still having issues.
I have a directory (lets call it "src\") with hundreds of files (.txt and .xml). Each .txt file has an associated .xml file (let's call it a pair). Example:
src\text-001.txt
src\text-001.xml
src\text-002.txt
src\text-002.xml
src\text-003.txt
src\text-003.xml
Here's an example of how I would like it to turn out so each pair of files are placed into a single unique folder:
src\text-001\text-001.txt
src\text-001\text-001.xml
src\text-002\text-002.txt
src\text-002\text-002.xml
src\text-003\text-003.txt
src\text-003\text-003.xml
What I'd like to do is create an associated folder for each pair and then move each pair of files into its respective folder using Python. I've already tried working from code I found (thanks to a post from Nov '12 by Sethdd, but am having trouble figuring out how to use the move function to grab pairs of files. Here's where I'm at:
import os
import shutil
srcpath = "PATH_TO_SOURCE"
srcfiles = os.listdir(srcpath)
destpath = "PATH_TO_DEST"
# grabs the name of the file before extension and uses as the dest folder name
destdirs = list(set([filename[0:9] for filename in srcfiles]))
def create(dirname, destpath):
full_path = os.path.join(destpath, dirname)
os.mkdir(full_path)
return full_path
def move(filename, dirpath):
shutil.move(os.path.join(srcpath, filename)
,dirpath)
# create destination directories and store their names along with full paths
targets = [
(folder, create(folder, destpath)) for folder in destdirs
]
for dirname, full_path in targets:
for filename in srcfile:
if dirname == filename[0:9]:
move(filename, full_path)
I feel like it should be easy, but Python isn't something I work with everyday and it's been a while since my scripting days... Any help would be greatly appreciated!
Thanks,
WK2EcoD

Use the glob module to interate all of the 'txt' files. From that you can parse and create the folders and copy the files.

The process should be as simple as it appears to you as a human.
for file_name in os.listdir(srcpath):
dir = file_name[:9]
# if dir doesn't exist, create it
# move file_name to dir
You're doing a lot of intermediate work that seems to be confusing you.
Also, insert some simple print statements to track data flow and execution flow. It appears that you have no tracing output so far.

You can do it with os module. For every file in directory check if associated folder exists, create if needed and then move the file. See the code below:
import os
SRC = 'path-to-src'
for fname in os.listdir(SRC):
filename, file_extension = os.path.splitext(fname)
if file_extension not in ['xml', 'txt']:
continue
folder_path = os.path.join(SRC, filename)
if not os.path.exists(folder_path):
os.mkdir(folderpath)
os.rename(
os.path.join(SRC, fname),
os.path.join(folder_path, fname)
)

My approach would be:
Find the pairs that I want to move (do nothing with files without a pair)
Create a directory for every pair
Move the pair to the directory
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os, shutil
import re
def getPairs(files):
pairs = []
file_re = re.compile(r'^(.*)\.(.*)$')
for f in files:
match = file_re.match(f)
if match:
(name, ext) = match.groups()
if ext == 'txt' and name + '.xml' in files:
pairs.append(name)
return pairs
def movePairsToDir(pairs):
for name in pairs:
os.mkdir(name)
shutil.move(name+'.txt', name)
shutil.move(name+'.xml', name)
files = os.listdir()
pairs = getPairs(files)
movePairsToDir(pairs)
NOTE: This script works when called inside the directory with the pairs.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

how to delete all files with a specific extension in google drive - python

Use glob. Use pathlib for path manipulation. import pathlib import shutil source = pathlib.Path('/content/gdrive/My Drive') dest1 = pathlib.Path('/content/gdrive/My Drive/toDelete') dest1.mkdir(exist_ok=True) for f in source.glob("*.tif"): shutil.move(f, dest1.joinpath(f.name))

Related

Defining path for downloaded CSV file in python3

Google Api/Python: Empty folder in Google Drive before uploading new files

Google API/Python: Delete old files in Google Drive by filename in local folder, upload new filenames from local folder

Python error extracting zip file

Move pairs of files (.txt & .xml) into their corresponding folder using Python

Categories

Resources