Check multiple files exist for each folders - python

I am looking for the way to print out the file that not exist in directories.
So far I could do
QA_files_pattern = '*QA.xlsx'
EP_files_pattern = '*EP.xlsx'
AD_files_pattern = '*AD.xlsx'
filelist = [QA_files_pattern,EP_files_pattern,AD_files_pattern]
path = os.path.abspath(os.getcwd())
for (path, dir, files) in os.walk(path): # Get all files in current file's path
for a_file in filelist:
if fnmatch.filter(os.listdir(path), a_file):
print(fnmatch.filter(os.listdir(path), a_file))
else:
print("missing"+path+a_file)
The problem in this way is that it checks whether the files pattern exist not only the folders I want to look for but also every root it enters to reach to folder I want to look for.
Will there be a way to make it search just the end path?
EDIT: I cannot specify how many subfolders there will be, but I am sure the files are only contained at the end folders and the common name for end folder is 'QS'

You can set your path so that it is explicitly on the file path you desire.
Right now you are setting the path on the current directory, which I believe is the root.
path = os.path.abspath(os.getcwd())
You could make it a path that is explicity to limit it
path = os.path.abspath(os.getcwd())
extended_path= os.path.join(path, "specific_directory", "subdirectory", "etc")
And then sub in the extended_path
for (extended_path, dir, files) in os.walk(extended_path): # Get all files in current file's path
for a_file in filelist:
if fnmatch.filter(os.listdir(extended_path), a_file):
print(fnmatch.filter(os.listdir(extended_path), a_file))
else:
print("missing"+extended_path+a_file)

You can use glob recursively for something like:
from glob import glob
from os import path
QA = "*QA.xlsx"
EP = "*EP.xlsx"
AD = "*AD.xlsx"
base_path = "/base/path/**/*" # change /base/path to the dir you want to check
for d in glob(base_path, recursive=1): # get all files/dirs recursively inside base_path
if path.isdir(d): # If is a dir
to_match = {"QA":glob(f"{d}/{QA}"), "EP":glob(f"{d}/{EP}"), "AD":glob(f"{d}/{AD}")}
for k, v in to_match.items():
if not v:
print(f"Dir '{d}' Missing {k}")

Ok so I was able to solve for my own question, which was much simpler way than I expected
QA_files_pattern = '*QA.xlsx'
EP_files_pattern = '*EP.xlsx'
AD_files_pattern = '*AD.xlsx'
filelist = [QA_files_pattern,EP_files_pattern,AD_files_pattern]
path = os.path.abspath(os.getcwd())
for (path, dir, files) in os.walk(path): # Get all files in current file's path
if "QS" in path:
for a_file in filelist:
if fnmatch.filter(os.listdir(path), a_file):
print(fnmatch.filter(os.listdir(path), a_file))
else:
print("missing"+path+a_file)

Related

python: collect files with one extention from all sub-dir

I am trying to collect all files with all sub-directories and move to another directory
Code used
#collects all mp3 files from folders to a new folder
import os
from pathlib import Path
import shutil
#run once
path = os.getcwd()
os.mkdir("empetrishki")
empetrishki = path + "/empetrishki" #destination dir
print(path)
print(empetrishki)
#recursive collection
for root, dirs, files in os.walk(path, topdown=True, onerror=None, followlinks=True):
for name in files:
filePath = Path(name)
if filePath.suffix.lower() == ".mp3":
print(filePath)
os.path.join
filePath.rename(empetrishki.joinpath(filePath))
I have trouble with the last line of moving files: filePath.rename() nor shutil.move nor joinpath() have worked for me. Maybe that's because I am trying to change the element in the tuple - the output from os.walk
Similar code works with os.scandir but this would collect files only in the current directory
How can I fix that, thanks!
If you use pathlib.Path(name) that doesn't mean that something exists called name. Hence, you do need to be careful that you have a full path, or relative path, and you need to make sure to resolve those. In particular I am noting that you don't change your working directory and have a line like this:
filePath = Path(name)
This means that while you may be walking down the directory, your working directory may not be changing. You should make your path from the root and the name, it is also a good idea to resolve so that the full path is known.
filePath = Path(root).joinpath(name).resolve()
You can also place the Path(root) outside the inner loop as well. Now you have an absolute path from '/home/' to the filename. Hence, you should be able to rename with .rename(), like:
filePath.rename(x.parent.joinpath(newname))
#Or to another directory
filePath.rename(other_dir.joinpath(newname))
All together:
from pathlib import os, Path
empetrishki = Path.cwd().joinpath("empetrishki").resolve()
for root, dirs, files in os.walk(path, topdown=True, onerror=None, followlinks=True):
root = Path(root).resolve()
for name in files:
file = root.joinpath(name)
if file.suffix.lower() == ".mp3":
file.rename(empetrishki.joinpath(file.name))
for root, dirs, files in os.walk(path, topdown=True, onerror=None, followlinks=True):
if root == empetrishki:
continue # skip the destination dir
for name in files:
basename, extension = os.path.splitext(name)
if extension.lower() == ".mp3":
oldpath = os.path.join(root, name)
newpath = os.path.join(empetrishki, name)
print(oldpath)
shutil.move(oldpath, newpath)
This is what I suggest. Your code is running on the current directory, and the file is at the path os.path.join(root, name) and you need to provide such path to your move function.
Besides, I would also suggest to use os.path.splitext for extracting the file extension. More pythonic. And also you might want to skip scanning your target directory.

Copy files along with file paths from a list to a folder or another path

I have a list containing files along with their paths. I need to copy or move the files with their entire path into another directory or folder.
I have tried as follows but path unable to copy the path and only files are being copied.
import shutil
list_l1 = ['/home/Test//A/Aa/hello1.c', '/home/Test/C/Aa/hello1.c', '/home/Test/B/Aa/hello1.c']
for source in list_l1:
shutil.move(source, '/home/Test/sample_try/sample/')
You probably want to use os.makedirs() to make the nested directories. You might want to separate the paths in your list_l1 into directory and filename parts first, and use os.path.exists() to check if the directory exists before attempting to create it.
You could try:
import shutil
import os
list_l1 = ['/home/Test//A/Aa/hello1.c', '/home/Test/C/Aa/hello1.c', '/home/Test/B/Aa/hello1.c']
dest = '/home/Test/sample_try/sample'
for source in list_l1:
dirname, filename = os.path.split(source)
if not os.path.exists(f'{dest}/{dirname}'):
os.makedirs(f'{dest}/{dirname}')
shutil.copy(source, f'{dest}/{source}')
You can try making the directories first like below or some other library.
import shutil
from pathlib import Path
list_l1 = ['./A/Aa/hello1.c', './B/Aa/hello1.c']
new_parent = './C'
for source in list_l1:
path_list = source.split('/')
file = path_list.pop()
new_path = path_list.pop(0)
dirs = '/'.join(path_list)
p = new_parent + '/' + dirs + '/'
path = Path(p)
path.mkdir(parents=True, exist_ok=True)
shutil.move(source, p)

Moving only one file of each sub directories to new sub directories

I have question regarding moving one file in each sub directories to other new sub directories. So for example if I have directory as it shown in the image
And from that, I want to pick only the first file in each sub directories then move it to another new sub directories with the same name as you can see from the image. And this is my expected result
I have tried using os.walk to select the first file of each sub directories, but I still don't know how to move it to another sub directories with the same name
path = './test/'
new_path = './x/'
n = 1
fext = ".png"
for dirpath, dirnames, filenames in os.walk(path):
for filename in [f for f in filenames if f.endswith(fext)][:n]:
print(filename) #this only print the file name in each sub dir
The expected result can be seen in the image above
You are almost there :)
All you need is to have both full path of file: an old path (existing file) and a new path (where you want to move it).
As it mentioned in this post you can move files in different ways in Python. You can use "os.rename" or "shutil.move".
Here is a full tested code-sample:
import os, shutil
path = './test/'
new_path = './x/'
n = 1
fext = ".png"
for dirpath, dirnames, filenames in os.walk(path):
for filename in [f for f in filenames if f.endswith(fext)][:n]:
print(filename) #this only print the file name in each sub dir
filenameFull = os.path.join(dirpath, filename)
new_filenameFull = os.path.join(new_path, filename)
# if new directory doesn't exist - you create it recursively
if not os.path.exists(new_path):
os.makedirs(new_path)
# Use "os.rename"
#os.rename(filenameFull, new_filenameFull)
# or use "shutil.move"
shutil.move(filenameFull, new_filenameFull)

Python folder names in the directory

how can i get the folder names existing in a directory using Python ?
I want to save all the subfolders into a list to work with the names after that but i dont know how to read the subfolder names ?
Thanks for you help
You can use os.walk()
# !/usr/bin/python
import os
directory_list = list()
for root, dirs, files in os.walk("/path/to/your/dir", topdown=False):
for name in dirs:
directory_list.append(os.path.join(root, name))
print directory_list
EDIT
If you only want the first level and not actually "walk" through the subdirectories, it is even less code:
import os
root, dirs, files = os.walk("/path/to/your/dir").next()
print dirs
This is not really what os.walk is made for. If you really only want one level of subdirectories, you can also use os.listdir() like Yannik Ammann suggested:
root='/path/to/my/dir'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]
print dirlist
Starting with Python 3.4, you can also use the new pathlib module:
from pathlib import Path
p = Path('some/folder')
subdirectories = [x for x in p.iterdir() if x.is_dir()]
print(subdirectories)
You can use os.listdir() here a link to the docs
Warning returns files and directories
example:
import os
path = 'pyth/to/dir/'
dir_list = os.listdir(path)
update: you need to check if the returned names are directories or files
import os
path = 'pyth/to/dir/'
# list of all content in a directory, filtered so only directories are returned
dir_list = [directory for directory in os.listdir(path) if os.path.isdir(path+directory)]
You should import os first.
import os
files=[]
files = [f for f in sorted(os.listdir(FileDirectoryPath))]
This would give you list with all files in the FileDirectoryPath sorted.
I use os.listdir
Get all folder names of a directory
folder_names = []
for entry_name in os.listdir(MYDIR):
entry_path = os.path.join(MYDIR, entry_name)
if os.path.isdir(entry_path):
folder_names.append(entry_name)
Get all folder paths of a directory
folder_paths = []
for entry_name in os.listdir(MYDIR):
entry_path = os.path.join(MYDIR, entry_name)
if os.path.isdir(entry_path):
folder_paths.append(entry_path)
Get all file names of a directory
file_names = []
for file_name in os.listdir(MYDIR):
file_path = os.path.join(MYDIR, file_name)
if os.path.isfile(file_path):
file_names.append(file_name)
Get all file paths of a directory
file_paths = []
for file_name in os.listdir(MYDIR):
file_path = os.path.join(MYDIR, file_name)
if os.path.isfile(file_path):
file_paths.append(file_path)
For python 3 I'm using this script
import os
root='./'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]
for dir in dirlist:
print(dir)
Use os.walk(path)
import os
path = 'C:\\'
for root, directories, files in os.walk(path):
for directory in directories:
print os.path.join(root, directory)
Python 3.x: If you want only the directories in a given directory, try:
import os
search_path = '.' # set your path here.
root, dirs, files = next(os.walk(search_path), ([],[],[]))
print(dirs)
The above example will print out a list of the directories in the current directory like this:
['dir1', 'dir2', 'dir3']
The output contains only the sub-directory names.
If the directory does not have sub-directories, it will print:
[]
os.walk() is a generator method, so use next() to only call it once. The 3-tuple of empty strings is for the error condition when the directory does not contain any sub-directories because the os.walk() generator returns 3-tuples for each layer in the directory tree. Without those, if the directory is empty, next() will raise a StopIteration exception.
For a more compact version:
dirs = next(os.walk(search_path), ([],[],[]))[1]

Excluding all but a single subdirectory from a file search

I have a directory structure that resembles the following:
Dir1
Dir2
Dir3
Dir4
L SubDir4.1
L SubDir4.2
L SubDir4.3
I want to generate a list of files (with full paths) that include all the contents of Dirs1-3, but only SubDir4.2 inside Dir4. The code I have so far is
import fnmatch
import os
for root, dirs, files in os.walk( '.' )
if 'Dir4' in dirs:
if not 'SubDir4.2' in 'Dir4':
dirs.remove( 'Dir4' )
for file in files
print os.path.join( root, file )
My problem is that the part where I attempt to exclude any file that does not have SubDir4.2 in it's path is excluding everything in Dir4, including the things I would like to remain. How should I amend that above to to do what I desire?
Update 1: I should add that there are a lot of directories below Dir4 so manually listing them in an excludes list isn't a practical option. I'd like to be able to specify SubDur4.2 as the only subdirectory within Dir4 to be read.
Update 2: For reason outside of my control, I only have access to Python version 2.4.3.
There are a few typos in your snippet. I propose this:
import os
def any_p(iterable):
for element in iterable:
if element:
return True
return False
include_dirs = ['Dir4/SubDir4.2', 'Dir1/SubDir4.2', 'Dir3', 'Dir2'] # List all your included folder names in that
for root, dirs, files in os.walk( '.' ):
dirs[:] = [d for d in dirs if any_p(d in os.path.join(root, q_inc) for q_inc in include_dirs)]
for file in files:
print file
EDIT: According to comments, I have changed that so this is include list, instead of an exclude one.
EDIT2: Added a any_p (any() equivalent function for python version < 2.5)
EDIT3bis: if you have other subfolders with the same name 'SubDir4.2' in other folders, you can use the following to specify the location:
include_dirs = ['Dir4/SubDir4.2', 'Dir1/SubDir4.2']
Assuming you have a Dir1/SubDir4.2.
If they are a lot of those, then you may want to refine this approach with fnmatch, or probably a regex query.
I altered mstud's solution to give you what you are looking for:
import os;
for root, dirs, files in os.walk('.'):
# Split the root into its path parts
tmp = root.split(os.path.sep)
# If the lenth of the path is long enough to be your path AND
# The second to last part of the path is Dir4 AND
# The last part of the path is SubDir4.2 THEN
# Stop processing this pass.
if (len(tmp) > 2) and (tmp[-2] == 'Dir4') and (tmp[-1] != 'SubDir4.2'):
continue
# If we aren't in Dir4, print the file paths.
if tmp[-1] != 'Dir4':
for file in files:
print os.path.join(root, file)
In short, the first "if" skips the printing of any directory contents under Dir4 that aren't SubDir4.2. The second "if" skips the printing of the contents of the Dir4 directory.
for root, dirs, files in os.walk('.'):
tmp = root.split(os.path.sep)
if len(tmp)>2 and tmp[-2]=="Dir4" and tmp[-1]=="SubDir4.2":
continue
for file in files:
print os.path.join(root, file)

Categories