Delete directories except one sub-directory in Python - python

I have some directories which I want to delete but one of the directory has a sub-directory that I want to keep.
Example:
Files
pictures
cat.png
icon.png
Music
song.mp3
Movies
First.mp4
I want to delete everything (directories and sub-directories) except the sub-directory pictures for example.
Right now I have this:
def Destroy_Path(path):
shutil.rmtree(path, ignore_errors=True)

Here is a solution. UNTESTED!
Note the use of os.walk()'s ability to change dirnames in-place to tell it not to recurse into a sub-directory, and the avoidance of using topdown=False which would break this feature:
When topdown is True, the caller can modify the dirnames list in-place
(perhaps using del or slice assignment), and walk() will only recurse
into the subdirectories whose names remain in dirnames; this can be
used to prune the search, [...]
Modifying dirnames when topdown is False has no effect on the behavior of the walk, because in bottom-up mode the directories in dirnames are generated before dirpath itself is generated.
See the docs for more information.
Also notable is the use of os.path.samefile() for path comparisons. This is available on Windows since version 3.2.
THIS IS UNTESTED!
USE AT YOUR OWN RISK!!
SERIOUSLY, BE CAREFUL!!!
import os
def gen_dir_paths_except(base_path, exceptions):
# behave like os.walk(): return nothing if the given path isn't a directory
if not os.path.isdir(base_path):
return
# keep only existing paths in exceptions, so that os.path.samefile() works
exceptions = list(filter(os.path.exists, exceptions))
# don't forget the base directory
if any(os.path.samefile(base_path, x) for x in exceptions):
return
yield base_path
for curr_dirpath, dirnames, filenames in os.walk(base_path):
# skip directories mentioned in exceptions
dirnames_to_skip = []
for dir_name in dirnames:
dir_path = os.path.join(curr_dirpath, dir_name)
if any(os.path.samefile(dir_path, x) for x in exceptions):
dirnames_to_skip.append(dir_name)
else:
yield dir_path
for dir_name in dirnames_to_skip:
dirnames.remove(dir_name)
def rmtree_except(path, exceptions):
# Check that the path is actually a directory. This is needed here
# because os.walk() will silently return nothing for a non-directory.
if not os.path.isdir(path):
if not os.path.exists(path):
raise OSError("No such directory: " + path)
else:
raise OSError("Not a directory: " + path)
# keep only existing paths in exceptions, so that os.path.samefile() works
exceptions = list(filter(os.path.exists, exceptions))
dirs = list(gen_dir_paths_except(path, exceptions))
# iterate through directories in bottom-up order
for dir_path in reversed(dirs):
filenames = [
x for x in os.listdir(dir_path)
if not os.path.isdir(os.path.join(dir_path, x))
]
for file_name in filenames:
# skip files mentioned in exceptions
file_path = os.path.join(dir_path, file_name)
if not any(os.path.samefile(file_path, x) for x in exceptions):
os.remove(file_path)
try:
os.rmdir(dir_path)
except OSError: # directory not empty
pass # just leave the directory

Related

Check multiple files exist for each folders

I am looking for the way to print out the file that not exist in directories.
So far I could do
QA_files_pattern = '*QA.xlsx'
EP_files_pattern = '*EP.xlsx'
AD_files_pattern = '*AD.xlsx'
filelist = [QA_files_pattern,EP_files_pattern,AD_files_pattern]
path = os.path.abspath(os.getcwd())
for (path, dir, files) in os.walk(path): # Get all files in current file's path
for a_file in filelist:
if fnmatch.filter(os.listdir(path), a_file):
print(fnmatch.filter(os.listdir(path), a_file))
else:
print("missing"+path+a_file)
The problem in this way is that it checks whether the files pattern exist not only the folders I want to look for but also every root it enters to reach to folder I want to look for.
Will there be a way to make it search just the end path?
EDIT: I cannot specify how many subfolders there will be, but I am sure the files are only contained at the end folders and the common name for end folder is 'QS'
You can set your path so that it is explicitly on the file path you desire.
Right now you are setting the path on the current directory, which I believe is the root.
path = os.path.abspath(os.getcwd())
You could make it a path that is explicity to limit it
path = os.path.abspath(os.getcwd())
extended_path= os.path.join(path, "specific_directory", "subdirectory", "etc")
And then sub in the extended_path
for (extended_path, dir, files) in os.walk(extended_path): # Get all files in current file's path
for a_file in filelist:
if fnmatch.filter(os.listdir(extended_path), a_file):
print(fnmatch.filter(os.listdir(extended_path), a_file))
else:
print("missing"+extended_path+a_file)
You can use glob recursively for something like:
from glob import glob
from os import path
QA = "*QA.xlsx"
EP = "*EP.xlsx"
AD = "*AD.xlsx"
base_path = "/base/path/**/*" # change /base/path to the dir you want to check
for d in glob(base_path, recursive=1): # get all files/dirs recursively inside base_path
if path.isdir(d): # If is a dir
to_match = {"QA":glob(f"{d}/{QA}"), "EP":glob(f"{d}/{EP}"), "AD":glob(f"{d}/{AD}")}
for k, v in to_match.items():
if not v:
print(f"Dir '{d}' Missing {k}")
Ok so I was able to solve for my own question, which was much simpler way than I expected
QA_files_pattern = '*QA.xlsx'
EP_files_pattern = '*EP.xlsx'
AD_files_pattern = '*AD.xlsx'
filelist = [QA_files_pattern,EP_files_pattern,AD_files_pattern]
path = os.path.abspath(os.getcwd())
for (path, dir, files) in os.walk(path): # Get all files in current file's path
if "QS" in path:
for a_file in filelist:
if fnmatch.filter(os.listdir(path), a_file):
print(fnmatch.filter(os.listdir(path), a_file))
else:
print("missing"+path+a_file)

Copying specific files to a new folder, while maintaining the original subdirectory tree

I have a large directory with many subdirectories that I am trying to sort, I am trying to copy specific file types to a new folder, but I want to maintain the original subdirectories.
def copyFile(src, dest):
try:
shutil.copy(src,dest)
except shutil.Error as e:
print('Error: %s' % e)
except IOError as e:
print('Error: %s' % s.strerror)
for root, directories, files in os.walk(directory):
for directoryname in directories:
dirpath = os.path.join(root,directoryname)
dir_paths.append(dirpath)
dir_names.append(directoryname)
if not os.listdir(dirpath): #Cheching if directory is empty
print("Empty")
EmptyDirs.append(directoryname) #Add directory name to empty directory list
EmptyDirPath.append(dirpath)
else:
pass
for filename in files:
filepath = os.path.join(root,filename)
file_paths.append(filepath)
file_names.append(filename)
if filename.lower().endswith(".sldasm"):
print(filename.encode('utf8'))
SolidModels.append(filename)
copyFile(filepath,dest)
elif filename.lower().endswith(".sldprt"):
print(filename.encode('utf8'))
SolidModels.append(filename)
copyFile(filepath,dest)
else:
pass
This is the code I am using now, but it just copies the files without copying the subdirectories they were originally in, so they are completely unorganized in the new folder.
This is the new code using copytree, however now the specific files will not copy, only the subdirectories do.
def copytree(src, dst, symlinks=False, ignore=None):
names = os.listdir(src)
if ignore is not None:
ignored_names = ignore(src, names)
else:
ignored_names = set()
os.makedirs(dst)
errors = []
for name in names:
if name in ignored_names:
continue
srcname = os.path.join(src, name)
dstname = os.path.join(dst, name)
try:
if symlinks and os.path.islink(srcname):
linkto = os.readlink(srcname)
os.symlink(linkto, dstname)
elif os.path.isdir(srcname):
copytree(srcname, dstname, symlinks, ignore)
else:
if src is "*.sldasm":
copy2(srcname, dstname)
elif src is "*.sldprt":
copy2(srcname, dstname)
except (IOError, os.error) as why:
errors.append((srcname, dstname, str(why)))
You can do what you want with the built-in shutil.copytree() function by using (abusing?) its optional ignore keyword argument. The tricky part is that, if given, it must be a callable that returns what, in each directory, should not be copied, rather than what should be.
However it possible to write a factory function similar to shutil.ignore_patterns() that creates a function that does what's needed, and use that as the ignore keyword argument's value.
The function returned first determines what files to keep via the fnmatch.filter() function, then removes them from the list of everything which is in the given directory, unless they're a sub-directory name, in which case they're left in for later [recursive] processing. (This is what makes it copy the whole tree and what was probably wrong with your attempt to write your own copytree() function).
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Works in Python 2.7 & 3.x
import fnmatch
from os.path import isdir, join
def include_patterns(*patterns):
""" Function that can be used as shutil.copytree() ignore parameter that
determines which files *not* to ignore, the inverse of "normal" usage.
This is a factory function that creates a function which can be used as a
callable for copytree()'s ignore argument, *not* ignoring files that match
any of the glob-style patterns provided.
‛patterns’ are a sequence of pattern strings used to identify the files to
include when copying the directory tree.
Example usage:
copytree(src_directory, dst_directory,
ignore=include_patterns('*.sldasm', '*.sldprt'))
"""
def _ignore_patterns(path, all_names):
# Determine names which match one or more patterns (that shouldn't be
# ignored).
keep = (name for pattern in patterns
for name in fnmatch.filter(all_names, pattern))
# Ignore file names which *didn't* match any of the patterns given that
# aren't directory names.
dir_names = (name for name in all_names if isdir(join(path, name)))
return set(all_names) - set(keep) - set(dir_names)
return _ignore_patterns
if __name__ == '__main__':
from shutil import copytree, rmtree
import os
src = r'C:\vols\Files\PythonLib\Stack Overflow'
dst = r'C:\vols\Temp\temp\test'
# Make sure the destination folder does not exist.
if os.path.exists(dst) and os.path.isdir(dst):
print('removing existing directory "{}"'.format(dst))
rmtree(dst, ignore_errors=False)
copytree(src, dst, ignore=include_patterns('*.png', '*.gif'))
print('done')

Moving files and creating directories if certain file type in python

This is probably a simple question, but I'm brand new to python and programming in general.
I'm working on a simple program to copy/move .mp3 files from on location to another while mirroring the directory structure of the source location. What I have so far works, however it also creates new folders in the destination location even if the source folder contained no mp3 files. I only want to create the new directories if the source contains .mp3s, otherwise it could lead to a bunch of empty folders in the destination.
Here is what I have so far:
import os
import shutil #Used for copying files
##CONFIG
source_dir = "C:\Users\username\Desktop\iTunes\\" #set the root folder that you want to scan and move files from. This script will scan recursively.
destPath = "C:\Users\username\Desktop\converted From iTunes" #set the destination root that you want to move files to. Any non-existing sub directories will be created.
ext = ".mp3" #set the type of file you want to search for.
count = 0 #initialize counter variable to count number of files moved
##
##FIND FILES
for dirName, subdirList, fileList in os.walk(source_dir):
#set the path for the destination folder(s)
dest = destPath + dirName.replace(source_dir, '\\')
#if the source directory doesn't exist in the destination folder
#then create a new folder
if not os.path.isdir(dest):
os.mkdir(dest)
print('Directory created at: ' + dest)
for fname in fileList:
if fname.endswith(ext) :
#determine source & new file locations
oldLoc = dirName + '\\' + fname
newLoc = dest + '\\' + fname
if os.path.isfile(newLoc): # check to see if the file already exists. If it does print out a message saying so.
print ('file "' + newLoc + fname + '" already exists')
if not os.path.isfile(newLoc): #if the file doesnt exist then copy it and print out confirmation that is was copied/moved
try:
shutil.move(oldLoc, newLoc)
print('File ' + fname + ' copied.')
count = count + 1
except IOError:
print('There was an error copying the file: "' + fname + '"')
print 'error'
print "\n"
print str(count) + " files were moved."
print "\n"
so if the folder structure is something like:
root->
band 1->
album name->
song.m4a,
song2.m4a
right now it will create all those folders in the destination driectory, even though there are no .mp3s to copy.....
Any help is appreciated!
I think I would create my own wrapper around copy for this sort of thing:
def fcopy(src,dest):
"""
Copy file from source to dest. dest can include an absolute or relative path
If the path doesn't exist, it gets created
"""
dest_dir = os.path.dirname(dest)
try:
os.makedirs(dest_dir)
except os.error as e:
pass #Assume it exists. This could fail if you don't have permissions, etc...
shutil.copy(src,dest)
Now you can just walk the tree calling this function on any .mp3 file.
The simplest thing to do I can think of for your existing code would be to just make it skip over any folders that don't have any .mp3 files in them. This can easily be done by adding the following items and if statement to the top of your loop. The itertools.ifilter() and fnmatch.fnmatch() functions can be used together to simplify checking for files with the proper extension.
from itertools import ifilter
from fnmatch import fnmatch
ext = '.mp3'
fnPattern = '*'+ext
for dirName, subdirList, fileList in os.walk(source_dir):
if not any(ifilter(lambda fname: fnmatch(fname, fnPattern), fileList)):
print ' skipping "{}"'.format(dirName)
continue
...
You will also have to change the os.mkdir(dest) to os.makedirs(dest) in the code further down to ensure that any subdirectories skipped by earlier iterations get created when there's a need to copy files to a corresponding subbranch of the destination directory.
You could optimize things a bit by creating and saving a possibly empty iterator of matching files that have the extension, and then use it again later to to determine what files to copy:
from itertools import ifilter
from fnmatch import fnmatch
ext = '.mp3'
fnPattern = '*'+ext
for dirName, subdirList, fileList in os.walk(source_dir):
# generate list of files in directory with desired extension
matches = ifilter(lambda fname: fnmatch(fname, fnPattern), fileList)
# skip subdirectory if it does not contain any files of interest
if not matches:
continue
...
... create destination directory with os.makedirs()
...
# copy each file to destination directory
for fname in matches:
... copy file
Would shutils.copytree not do what you want in fewer lines?

Excluding all but a single subdirectory from a file search

I have a directory structure that resembles the following:
Dir1
Dir2
Dir3
Dir4
L SubDir4.1
L SubDir4.2
L SubDir4.3
I want to generate a list of files (with full paths) that include all the contents of Dirs1-3, but only SubDir4.2 inside Dir4. The code I have so far is
import fnmatch
import os
for root, dirs, files in os.walk( '.' )
if 'Dir4' in dirs:
if not 'SubDir4.2' in 'Dir4':
dirs.remove( 'Dir4' )
for file in files
print os.path.join( root, file )
My problem is that the part where I attempt to exclude any file that does not have SubDir4.2 in it's path is excluding everything in Dir4, including the things I would like to remain. How should I amend that above to to do what I desire?
Update 1: I should add that there are a lot of directories below Dir4 so manually listing them in an excludes list isn't a practical option. I'd like to be able to specify SubDur4.2 as the only subdirectory within Dir4 to be read.
Update 2: For reason outside of my control, I only have access to Python version 2.4.3.
There are a few typos in your snippet. I propose this:
import os
def any_p(iterable):
for element in iterable:
if element:
return True
return False
include_dirs = ['Dir4/SubDir4.2', 'Dir1/SubDir4.2', 'Dir3', 'Dir2'] # List all your included folder names in that
for root, dirs, files in os.walk( '.' ):
dirs[:] = [d for d in dirs if any_p(d in os.path.join(root, q_inc) for q_inc in include_dirs)]
for file in files:
print file
EDIT: According to comments, I have changed that so this is include list, instead of an exclude one.
EDIT2: Added a any_p (any() equivalent function for python version < 2.5)
EDIT3bis: if you have other subfolders with the same name 'SubDir4.2' in other folders, you can use the following to specify the location:
include_dirs = ['Dir4/SubDir4.2', 'Dir1/SubDir4.2']
Assuming you have a Dir1/SubDir4.2.
If they are a lot of those, then you may want to refine this approach with fnmatch, or probably a regex query.
I altered mstud's solution to give you what you are looking for:
import os;
for root, dirs, files in os.walk('.'):
# Split the root into its path parts
tmp = root.split(os.path.sep)
# If the lenth of the path is long enough to be your path AND
# The second to last part of the path is Dir4 AND
# The last part of the path is SubDir4.2 THEN
# Stop processing this pass.
if (len(tmp) > 2) and (tmp[-2] == 'Dir4') and (tmp[-1] != 'SubDir4.2'):
continue
# If we aren't in Dir4, print the file paths.
if tmp[-1] != 'Dir4':
for file in files:
print os.path.join(root, file)
In short, the first "if" skips the printing of any directory contents under Dir4 that aren't SubDir4.2. The second "if" skips the printing of the contents of the Dir4 directory.
for root, dirs, files in os.walk('.'):
tmp = root.split(os.path.sep)
if len(tmp)>2 and tmp[-2]=="Dir4" and tmp[-1]=="SubDir4.2":
continue
for file in files:
print os.path.join(root, file)

How to rename/replace a particular keyword with unicode character for all files and the associated folders?

I have the following files and subdirectories in a directory ('input_folder') and I would like to change the name of all the files with '.dat' extension and all the folders that have a particular keyword (e.g., ABC) with a Unicode character. A MWE is given below:
import os
import random
import errno
#--------------------------------------
# Create random folders and files
# tzot's forced directory create hack https://stackoverflow.com/a/600612/4576447
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
if not os.path.isdir('./input_folder'):
os.makedirs('input_folder')
for i in range(10):
mkdir_p('./input_folder/folder_ABC_' + str(random.randint(100,999)))
for root, dirs, files in os.walk('./input_folder'):
for dir in dirs:
result = open(os.path.join(root,dir) + '/ABC ' + str(random.randint(100,999)) + '.dat','w')
result = open(os.path.join(root,dir) + '/XYZ-ABC ' + str(random.randint(100,999)) + '.dat','w')
#--------------------------------------
# Main rename code
for root, dirs, files in os.walk('./input_folder'):
for file in files:
if file.endswith((".dat")):
os.rename(file, file.replace('ABC', u'\u2714'.encode('utf-8')))
This MWE gives the following error:
os.rename(file, file.replace('ABC', u'\u2714'.encode('utf-8')))
WindowsError: [Error 2] The system cannot find the file specified
How to correctly rename all the files and folders that has ABC with a unioode character in Python 2.7?
There are at least five issues:
When dealing with Unicode, use it everywhere. os.walk will return Unicode filenames if passed a Unicode path. from __future__ import unicode_literals will default strings to Unicode.
When opening files, close them. You'll run into problems later when renaming. result still exists and has a reference to the last file opened.
As mentioned in a comment, use os.path.join on the root and the file for both the before and after name.
Use os.walk with topdown=False. This will process the leaf nodes first, so the directory tree isn't corrupted (and keeping root and dirs valid) while traversing it.
Rename the files first, then the directories, again to not corrupt the directory tree while traversing it.
Result:
from __future__ import unicode_literals
# Skipping unchanged code...
for root, dirs, files in os.walk('./input_folder'):
for dir in dirs:
# One way to ensure the file is closed.
with open(os.path.join(root,dir) + '/ABC ' + str(random.randint(100,999)) + '.dat','w'):
pass
with open(os.path.join(root,dir) + '/XYZ-ABC ' + str(random.randint(100,999)) + '.dat','w'):
pass
#--------------------------------------
# Main rename code
for root, dirs, files in os.walk('./input_folder',topdown=False):
for file in files:
if file.endswith((".dat")):
# Generate the full file path names.
filename1 = os.path.join(root,file)
filename2 = os.path.join(root,file.replace('ABC', '\u2714'))
os.rename(filename1,filename2)
for d in dirs:
# Generate the full directory path names.
dirname1 = os.path.join(root,d)
dirname2 = os.path.join(root,d.replace('ABC', '\u2714'))
os.rename(dirname1,dirname2)

Categories