Python file sort - python

So i wrote this to help me sort files in a bunch of different folders, it works by taking the first file from each folder and creating a folder for it then the 2nd file from each folder and does the same and so on, but when ever I run the code nothing happens can someone help.
import os, sys
path = "\Users\mac\Desktop\soliddd sort\dir"
fdir = os.listdir(path)
f = len(fdir[0])
array = [[] for i in xrange(f)]
def setArray(c, i, j):
array[j][i] = c[j]
def chooseFile(j):
for i in fdir:
setArray(fdir[i], i, j)
def makedir(f, fdir):
for i in f:
folder = r"\Users\mac\Desktop\soliddd sort\dir"+str(i)
if not os.path.exists(folder):
os.makedirs(folder)
for j in fdir:
with open(os.path.join(folder, array[i][j], 'wb')) as temp:
temp.write(buff)
folder.close()
def main():
for j in f:
chooseFile(j)
makedir(f, fdir)

A tiny example on how you can go about sorting files into folders, as I couldn't understand why you were randomly selecting files from different locations. This example gets all the file names, and then lets you select a common filename to sort into a new folder
def sort_files(base_dir="C:\sorted_files")
import os, shutil
folder_list = [
'folder_path_a',
'folder_path_b',
]
file_list = []
for folder in folder_list:
file_list.extend([os.path.join(folder, file_name) for file_name in os.listdir(folder)])
key = raw_input("Group files with <text> in the filename: ")
matching_files = [ file_name for file_name in file_list if key in file_name ]
for file_name in matching_files:
shutil.move(file_name, os.path.join(os.path.join(base_dir,key), os.path.basename(file_name)))

Related

Creating a multidimensional list of similarly named files with different extensions

I have a directory of files that follows this file naming pattern:
alice_01.mov
alice_01.mp4
alice_02.mp4
bob_01.avi
My goal is to find all files at a given path and create a "multidimensional" list of them where each sublist is the unique name of the file (without extension) and then a list of extensions, like so:
resulting_list = [
['alice_01', ['mov','mp4']],
['alice_02', ['mp4']],
['bob_01', ['avi']]
]
I have gotten this far:
import os
path = "user_files/"
def user_files(path):
files = []
for file in os.listdir(path):
files.append(file)
return files
file_array = []
for file in user_files(path):
file_name = file.split(".")[0]
file_ext = file.split(".")[1]
if file_name not in (sublist[0] for sublist in file_array):
file_array.append([file_name,[file_ext]])
else:
file_array[file_array.index(file_name)].append([file_name,[file_ext]])
print(file_array)
My problem is in the else condition but I'm struggling to get it right.
Any help is appreciated.
Here's how you can do it using a dict to store the results:
filenames = [
"alice_01.mov",
"alice_01.mp4",
"alice_02.mp4",
"bob_01.avi",
]
file_dict = {}
for file in filenames:
file_name, file_ext = file.split(".")[0:2]
file_dict.setdefault(file_name, []).append(file_ext)
print(file_dict)
Result:
{'alice_01': ['mov', 'mp4'], 'alice_02': ['mp4'], 'bob_01': ['avi']}
UPDATE: The code above doesn't handle special cases, so here's a slightly more robust version.
from pprint import pprint
filenames = [
"alice_01.mov",
"alice_01.mp4",
"alice_02.mp4",
"bob_01.avi",
"john_007.json.xz",
"john_007.json.txt.xz",
"john_007.json.txt.zip",
"tom_and_jerry",
"tom_and_jerry.dat",
]
file_dict = {}
for file in filenames:
parts = file.split(".")
if len(parts) > 1:
file_name = ".".join(parts[0:-1])
file_ext = parts[-1]
else:
file_name = parts[0]
file_ext = ""
file_dict.setdefault(file_name, []).append(file_ext)
pprint(file_dict)
Result:
{'alice_01': ['mov', 'mp4'],
'alice_02': ['mp4'],
'bob_01': ['avi'],
'john_007.json': ['xz'],
'john_007.json.txt': ['xz', 'zip'],
'tom_and_jerry': ['', 'dat']}

How to zip files that ends with certain extension

I want to get all files in a directory (I reached it after doing several for loops - hence fourth.path) that ends with .npy or with csv and then zip those files.
My code is running putting one file only in the zip file. What am I doing wrong?
I tried to change my indents, but no zip file is being created
import json
import os
import zipfile
import zlib
directory = os.path.join(os.getcwd(), 'recs')
radarfolder = 'RadarIfxAvian'
file = os.listdir(directory)
def r(p, name):
p = os.path.join(p, name)
return p.replace("/", "\\")
#This code will list all json files in e ach file
for first in os.scandir(directory):
if first.is_dir():
for second in os.scandir(first.path):
if second.is_dir():
for third in os.scandir(second.path):
if third.is_dir():
radar_folder_name = ''
list_files = ()
for fourth in os.scandir(third.path):
if fourth.is_dir():
if radarfolder in fourth.path:
radar_folder_name = fourth.path
print(radar_folder_name)
list_files = ()
for file in os.listdir(fourth.path):
if file.endswith(".npy") | file.endswith(".csv"):
list_files = (file)
print(list_files)
with zipfile.ZipFile(radar_folder_name +'\\' +'radar.zip', 'w', compression=zipfile.ZIP_DEFLATED ) as zipMe:
zipMe.write(radar_folder_name +'\\' +list_files)
zipMe.close()
I tried to change my indents either resulting in error: TypeError: can only concatenate str (not "tuple") to str or no zip file being created
As I said in my second comment, your problem comes from the 'w' argument in your zipping statement. It causes the zip to be overwritten every time it's opened, which you do for each file you zip in. You can fix this 2 ways (at least):
Replace 'w' with 'a'; this way the files will be appended to your zip (with the side effect that, if you do this several times, files will be added more than once).
Keep the 'w', but only open the zip once, having listed all the files you want to zip before. See my code below.
I've taken the liberty to rewrite the part of your code where you look for the 'RadarIfxAvian' folder, since embedded for are clumsy (and if your folder structure changes, they might not work), replacing it with a multi-purpose recursive function.
Note that the folder structure will be included in the .zip; if you want to zip only the files themselves, consider doing os.chdir(radar_folder_name) before zipping the files.
# This function recursively looks for the 'filename' file or folder
# under 'start_path' and returns the full path, or an empty string if not found.
def find_file(start_path, filename):
if filename in os.listdir(start_path):
return start_path + '/' + filename
for file in os.scandir(start_path):
if not file.is_dir():
continue
if (deep_path:=find_file(start_path + '/' + file.name, filename)):
return deep_path
return ''
directory = os.path.join(os.getcwd(), 'recs')
radarfolder = 'RadarIfxAvian'
radar_folder_name = find_file(directory, radarfolder)
print(radar_folder_name)
list_files = []
for file in os.listdir(radar_folder_name):
if file.endswith(".npy") or file.endswith(".csv"):
list_files.append(file)
with zipfile.ZipFile(radar_folder_name + '/' + 'radar.zip', 'w', compression=zipfile.ZIP_DEFLATED ) as zipMe:
for file in list_files:
zipMe.write(radar_folder_name + '/' + file)
If I understand your code correctly, you are looking for a folder "RadarIfxAvian" and want to place a .ZIP in that folder containing any .CSV or .NPY files in that directory. This should do the equivalent, using os.walk for the recursive search:
import os
import zipfile
for path, dirs, files in os.walk('recs'):
if os.path.basename(path) == 'RadarIfxAvian':
print(path)
with zipfile.ZipFile(os.path.join(path, 'radar.zip'), 'w', zipfile.ZIP_DEFLATED) as zip:
for file in files:
if file.endswith(".npy") | file.endswith(".csv"):
print(file)
zip.write(file)
break # stop search once the directory is found and processed
I adjusted my code with the following steps:
Put the if in a function
writing the the zip by looping over each item in the list I appended
import json
import os
import glob
import zipfile
import zlib
directory = os.path.join(os.getcwd(), 'recs')
radarfolder = 'RadarIfxAvian'
file = os.listdir(directory)
list_files = []
def r(p, name):
p = os.path.join(p, name)
return p.replace("/", "\\")
def tozip(path, file):
filestozip = []
if file.endswith(".npy") or file.endswith(".csv"):
filestozip = (path + '\\' + file)
list_files.append(filestozip)
return list_files
#This code will list all json files in each file
for first in os.scandir(directory):
if first.is_dir():
for second in os.scandir(first.path):
if second.is_dir():
for third in os.scandir(second.path):
if third.is_dir():
radar_folder_name = ''
filestozip = []
list_files.clear()
for fourth in os.scandir(third.path):
if fourth.is_dir():
if radarfolder in fourth.path:
radar_folder_name = fourth.path
for file in os.listdir(fourth.path):
filestozip = tozip(radar_folder_name,file)
print(filestozip)
ZipFile = zipfile.ZipFile(r(radar_folder_name,"radar.zip"), "w")
for a in filestozip:
ZipFile.write(a, compress_type= zipfile.ZIP_DEFLATED)
print(radar_folder_name + "added to zip")

delete all files except .parquet files from all folders in windows file system using python

i want to delete all the files from all the folders iteratively using python in windows filesystem where i need to keep only .parquet files and remove all other files ending with .crc,.bak etc.,
The problem is i have folders like files1,files2,files3 ... files100 folders and i have to remove all other .bak,.crc etc., files from all the folders and just keep .parquet files,can anyone help me on this please
i tried this
mydir='c/users/name/files'
for f in os.listdir(mydir):
if f.endswith(".parquet"):
continue
os.remove(os.path.join(mydir, f))
import os
def getListOfFiles(dirName):
# create a list of file and sub directories
# names in the given directory
listOfFile = os.listdir(dirName)
allFiles = list()
# Iterate over all the entries
for entry in listOfFile:
# Create full path
fullPath = os.path.join(dirName, entry)
# If entry is a directory then get the list of files in this directory
if os.path.isdir(fullPath):
allFiles = allFiles + getListOfFiles(fullPath)
else:
allFiles.append(fullPath)
return allFiles
## select only files in all dirr
onlyfiles = getListOfFiles("C:/Users")
index_dot = 0
to_delete = []
to_keep = ["parquet"] #list of format to keep
for name in onlyfiles:
for l in range(len(name)):
if name[l] == '.':
index_dot = l
format_file = len(name)-index_dot-1
if name[-format_file:] not in to_keep:
to_delete.append(name)
for to_del in to_delete:
os.remove(to_del)
Don't forget to modify this line :
onlyfiles = getListOfFiles("C:/Users")
Becare full with this code, you could delete lot of files by mistake

How to create a python list with the number of file in each sub directory of a directory

I have a main directory(root) which countain 6 sub directory.
I would like to count the number of files present in each sub directory and add all to a simple python list.
For this result : mylist = [497643, 5976, 3698, 12, 456, 745]
I'm blocked on that code:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
list = dirs.append(len(sub_dir))
My trying for the list fill doesn't work and i'm dramaticaly at my best...
Finding a way to iterate sub-directory of a main directory and fill a list with a function applied on each sub directory would sky rocket the speed of my actual data science project!
Thanks for your help
Abel
You can use os.path.isfile and os.path.isdir
res = [len(list(map(os.path.isfile, os.listdir(os.path.join(path, name))))) for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]
print(res)
Using the for loop
res = []
for name in os.listdir(path):
dir_path = os.path.join(path, name)
if os.path.isdir(dir_path):
res.append(len(list(map(os.path.isfile, os.listdir(dir_path)))))
You need to use os.listdir on each subdirectory. The current code simply takes the length of a filepath.
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
temp = os.listdir(sub_dir)
list = dirs.append(len(temp))
Adding this line to the code will list out the subdirectory
You were almost there:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir(path)
# This would print all the files and directories
for file in dirs:
print(file)
for sub_dir in dirs:
if os.path.isdir(sub_dir):
list.append(len(os.listdir(os.path.join(path, sub_dir))))
print(list)
As an alternative, you can also utilize glob module for this and other related tasks.
I have created a test directory containing 3 subdirectories l,m and k containing 3 test files each.
import os, glob
list = []
path = "test" # you can leave this "." if you want files in the current directory
for root, dirs, files in os.walk(path, topdown=True):
for name in dirs:
list.append(len(glob.glob(root + '/' + name + '/*')))
print(list)
Output :
[3, 3, 3]

Organizing and copying files to new folders

I'm trying to organize some data before processing it.
What I have is a folder of raw tiff files (they're raster bands from a drone sensor).
I want to move these files into new, individual folders. e.g., IMG_001_1, IMG_001_2, IMG_001_3, IMG_001_4 and IMG_001_5 are all moved into a new folder titled IMG_001. I am ok with changing the naming structure of the files in order to make the code simpler.
An additional issue is that there are a few images missing from the folder. The current files are IMG0016 - IMG0054 (no IMG0055), IMG0056 - IMG0086 (no IMG0087), and IMG0087 - IMG0161. This is why I think it would be simpler to just rename the new image folders from 1-143.
My main problem is actually moving the files into the new folders - creating the folders is fairly simple.
Played around a little and this script came out, which should do what you want:
import os
import shutil
import re
UNORG = "C:\\Users\joshuarb\Desktop\Unorganized_Images\\"
ORG = "C:\\Users\joshuarb\Desktop\Organized_Images\\"
def main():
file_names = [os.path.join(UNORG, i) for i in get_files_of(UNORG)]
for count in range(0, 143):
current_dir = "{}IMG_{:04d}".format(ORG, count)
os.makedirs(current_dir)
move_files = get_files_to_move(file_names, count)
print move_files
for i in move_files:
shutil.move(i, os.path.join(current_dir, os.path.basename(i)))
def get_files_to_move(file_names, count):
return [i for i in file_names if re.match('.*IMG{}_.*'.format(count), i)]
def get_files_of(mypath):
(dirpath, dirnames, filenames) = os.walk(mypath).next()
return filenames
if __name__ == '__main__':
main()
As you see, the code is not commented. But feel free to ask if something is unclear;)
Problem solved!
import os
import shutil
srcpath = "C:\Users\joshuarb\Desktop\Python_Test\UnorganizedImages"
srcfiles = os.listdir(srcpath)
destpath = "C:\Users\joshuarb\Desktop\Python_Test\OrganizedImages"
# extract the three letters from filenames and filter out duplicates
destdirs = list(set([filename[0:8] for filename in srcfiles]))
def create(dirname, destpath):
full_path = os.path.join(destpath, dirname)
os.mkdir(full_path)
return full_path
def move(filename, dirpath):
shutil.move(os.path.join(srcpath, filename)
,dirpath)
# create destination directories and store their names along with full paths
targets = [
(folder, create(folder, destpath)) for folder in destdirs
]
for dirname, full_path in targets:
for filename in srcfiles:
if dirname == filename[0:8]:
move(filename, full_path)

Categories