python: collect files with one extention from all sub-dir - python

I am trying to collect all files with all sub-directories and move to another directory
Code used
#collects all mp3 files from folders to a new folder
import os
from pathlib import Path
import shutil
#run once
path = os.getcwd()
os.mkdir("empetrishki")
empetrishki = path + "/empetrishki" #destination dir
print(path)
print(empetrishki)
#recursive collection
for root, dirs, files in os.walk(path, topdown=True, onerror=None, followlinks=True):
for name in files:
filePath = Path(name)
if filePath.suffix.lower() == ".mp3":
print(filePath)
os.path.join
filePath.rename(empetrishki.joinpath(filePath))
I have trouble with the last line of moving files: filePath.rename() nor shutil.move nor joinpath() have worked for me. Maybe that's because I am trying to change the element in the tuple - the output from os.walk
Similar code works with os.scandir but this would collect files only in the current directory
How can I fix that, thanks!

If you use pathlib.Path(name) that doesn't mean that something exists called name. Hence, you do need to be careful that you have a full path, or relative path, and you need to make sure to resolve those. In particular I am noting that you don't change your working directory and have a line like this:
filePath = Path(name)
This means that while you may be walking down the directory, your working directory may not be changing. You should make your path from the root and the name, it is also a good idea to resolve so that the full path is known.
filePath = Path(root).joinpath(name).resolve()
You can also place the Path(root) outside the inner loop as well. Now you have an absolute path from '/home/' to the filename. Hence, you should be able to rename with .rename(), like:
filePath.rename(x.parent.joinpath(newname))
#Or to another directory
filePath.rename(other_dir.joinpath(newname))
All together:
from pathlib import os, Path
empetrishki = Path.cwd().joinpath("empetrishki").resolve()
for root, dirs, files in os.walk(path, topdown=True, onerror=None, followlinks=True):
root = Path(root).resolve()
for name in files:
file = root.joinpath(name)
if file.suffix.lower() == ".mp3":
file.rename(empetrishki.joinpath(file.name))

for root, dirs, files in os.walk(path, topdown=True, onerror=None, followlinks=True):
if root == empetrishki:
continue # skip the destination dir
for name in files:
basename, extension = os.path.splitext(name)
if extension.lower() == ".mp3":
oldpath = os.path.join(root, name)
newpath = os.path.join(empetrishki, name)
print(oldpath)
shutil.move(oldpath, newpath)
This is what I suggest. Your code is running on the current directory, and the file is at the path os.path.join(root, name) and you need to provide such path to your move function.
Besides, I would also suggest to use os.path.splitext for extracting the file extension. More pythonic. And also you might want to skip scanning your target directory.

Related

Check multiple files exist for each folders

I am looking for the way to print out the file that not exist in directories.
So far I could do
QA_files_pattern = '*QA.xlsx'
EP_files_pattern = '*EP.xlsx'
AD_files_pattern = '*AD.xlsx'
filelist = [QA_files_pattern,EP_files_pattern,AD_files_pattern]
path = os.path.abspath(os.getcwd())
for (path, dir, files) in os.walk(path): # Get all files in current file's path
for a_file in filelist:
if fnmatch.filter(os.listdir(path), a_file):
print(fnmatch.filter(os.listdir(path), a_file))
else:
print("missing"+path+a_file)
The problem in this way is that it checks whether the files pattern exist not only the folders I want to look for but also every root it enters to reach to folder I want to look for.
Will there be a way to make it search just the end path?
EDIT: I cannot specify how many subfolders there will be, but I am sure the files are only contained at the end folders and the common name for end folder is 'QS'
You can set your path so that it is explicitly on the file path you desire.
Right now you are setting the path on the current directory, which I believe is the root.
path = os.path.abspath(os.getcwd())
You could make it a path that is explicity to limit it
path = os.path.abspath(os.getcwd())
extended_path= os.path.join(path, "specific_directory", "subdirectory", "etc")
And then sub in the extended_path
for (extended_path, dir, files) in os.walk(extended_path): # Get all files in current file's path
for a_file in filelist:
if fnmatch.filter(os.listdir(extended_path), a_file):
print(fnmatch.filter(os.listdir(extended_path), a_file))
else:
print("missing"+extended_path+a_file)
You can use glob recursively for something like:
from glob import glob
from os import path
QA = "*QA.xlsx"
EP = "*EP.xlsx"
AD = "*AD.xlsx"
base_path = "/base/path/**/*" # change /base/path to the dir you want to check
for d in glob(base_path, recursive=1): # get all files/dirs recursively inside base_path
if path.isdir(d): # If is a dir
to_match = {"QA":glob(f"{d}/{QA}"), "EP":glob(f"{d}/{EP}"), "AD":glob(f"{d}/{AD}")}
for k, v in to_match.items():
if not v:
print(f"Dir '{d}' Missing {k}")
Ok so I was able to solve for my own question, which was much simpler way than I expected
QA_files_pattern = '*QA.xlsx'
EP_files_pattern = '*EP.xlsx'
AD_files_pattern = '*AD.xlsx'
filelist = [QA_files_pattern,EP_files_pattern,AD_files_pattern]
path = os.path.abspath(os.getcwd())
for (path, dir, files) in os.walk(path): # Get all files in current file's path
if "QS" in path:
for a_file in filelist:
if fnmatch.filter(os.listdir(path), a_file):
print(fnmatch.filter(os.listdir(path), a_file))
else:
print("missing"+path+a_file)

Get customize Path of File found with os.walk

I am trying to expose the selective path of a file, found using os.walk. The walk function is working correctly and is exposing all the files I want however right now I am able to expose either only the file name or full path of the file.
path = 'C:/Users/testing_recurssion'
for root, d_names, f_names in os.walk(path):
for name in f_names:
print(os.path.join(root, name))
This returns
C:/Users/testing_recurssion\folder1\file3.txt
C:/Users/testing_recurssion\folder1\folder3\file4.txt
However, I want it to return
folder1\file3.txt
folder1\folder3\file4.txt**
Use os.relpath
directory = "C:/Users/testing_recurssion"
for root, d_names, f_names in os.walk(directory):
for name in f_names:
path = os.path.join(root, name)
print(os.path.relpath(path, directory))
Also - consider using pathlib.Path which is more recent and object oriented:
from pathlib import Path
directory = Path("C:/Users/testing_recurssion")
for path in directory.rglob("*"):
print(path.relative_to(directory))

Find the sub-paths of files with specific externsion in python

I am trying to locate all my files with externsion mp4 in a folder(and subfolder) and copy them in another dir. I manage to find all files with the extension mp4 however I didnt manage to keep the dir of those files. My code is the following:
import os
from shutil import copyfile
path = "videos/"
for root, dirs, files in os.walk(path):
for name in files:
if name.endswith((".mp4", ".mp4")):
print(name)
# copyfile(src, dst)
I want to find the path of the name (corresponding to my vids). How can I do so?
Use os.path.join()
import os
from shutil import copyfile
path = "videos/"
for root, dirs, files in os.walk(path):
for name in files:
if name.endswith((".mp4", ".mp4")):
print(os.path.join(root, name))
# copyfile(src, dst)
Although it is considered better to use absolute paths but you can use os.path.relpath if you want relative path. From the os.path.relpath documentation
os.path.relpath(path[, start])
Return a relative filepath to path either from the current directory or from an optional start directory. This is a path computation: the filesystem is not accessed to confirm the existence or nature of path or start.
start defaults to os.curdir.
Availability: Windows, Unix.
New in version 2.6.
Why not just use glob:
import glob, shutil
for file in glob.iglob('/foo/*.mp4'):
shutil.copy2(file, '/bar/{0}'.format(file))
From the documentation on os.walk:
dirpath is a string, the path to the directory. dirnames is a list of the names of the subdirectories in dirpath (excluding '.' and '..'). filenames is a list of the names of the non-directory files in dirpath. Note that the names in the lists contain no path components. To get a full path (which begins with top) to a file or directory in dirpath, do os.path.join(dirpath, name).
So your code should look like this:
import os
from shutil import copyfile
path = "videos/"
for root, dirs, files in os.walk(path):
for name in files:
if name.endswith((".mp4", ".mp4")):
print(name)
src = os.path.join(root, name)
copyfile(src, dst)

Find files, copy to new directory python

I would like to:
Write a script that takes a single directory path as command line argument, and then walks all subdirectories of that path looking for files with the extension '.py', copying each one to a temporary directory in your file system (eg /tmp/pyfiles). Your script should check for the existence of the temporary directory, and remove it if it already exists; it should then create a new directory, before beginning to copy files.
I have this:
#!/usr/bin/env python
import os, sys
import shutil
#import module
rootdir = sys.argv[1]
#take input directory
if os.path.exists('tmp/pyfiles'):
shutil.rmtree('tmp/pyfiles')
if not os.path.exists('tmp/pyfiles'):
os.makedirs('tmp/pyfiles')
#check whether directory exists, if it exists remove and remake, if not make
for root, dirs, files in os.walk(rootdir):
for f in files:
if os.path.splitext(f)[1] in ['.py']:
shutil.copy2(f, tmp/pyfiles)
#find files ending with .py, copy them and place in tmp/pyfiles directory
I get this error:
Traceback (most recent call last):
File "seek.py", line 20, in <module>
shutil.copy2(f, tmp/pyfiles)
NameError: name 'tmp' is not defined
Could anyone help me out?:) Thx
Your code says shutil.copy2(f, tmp/pyfiles), I believe it meant to be
shutil.copy2(f, 'tmp/pyfiles').
When you use the
os.walk()
method you loose track of the file full path. What I would do is to analyze each directory using the
os.listdir()
method and then copying each file taking into account its absolute path. Something like this:
for root, dirs, files in os.walk(rootdir):
for dir in dirs:
for f in os.listdir(dir):
if os.path.splitext(f)[1] in ['.py']:
shutil.copy2(os.path.join(root, dir, f), "tmp/pyfiles")
I hope this helps, maybe there is a cleaner solution.
You have to check if the root directory exists and walk in to remove everything if not create a new one.
To copy from dir to yours you have to check for files in dir that filename ends with .py, then replace the dir path with root path and create a new file in root dir with the content of matching file.
If we found a directory in dir we should create a new one in the root directory.
After that just call the function recursively to copy all content of dir to the root directory
import os, sys
rootdir = sys.argv[1]
PATH = "/tmp/pyfiles/"
def check(path, _file):
global rootdir, PATH
for item in os.listdir(path):
newpath = os.path.join(path, item)
if os.path.isdir(newpath):
os.mkdir(os.path.join(PATH, newpath.replace(rootdir, '')))
check(newpath, _file)
else:
if item.endswith(_file):
source = open(newpath, 'r')
print os.path.join(path, newpath.replace(rootdir, ''))
output = open(os.path.join(PATH, newpath.replace(rootdir, '')), 'w')
output.write(source.read())
output.close()
source.close()
if __name__ == '__main__':
if os.path.isdir(PATH):
for root, dirs, files in os.walk(PATH, topdown=False):
for name in files:
os.remove(os.path.join(root, name))
for name in dirs:
os.rmdir(os.path.join(root, name))
os.rmdir(PATH)
os.mkdir(PATH)
check(rootdir, '.py')

All Files in Dir & Sub-Dir

I would like to find all the files in a directory and all sub-directories.
code used:
import os
import sys
path = "C:\\"
dirs = os.listdir(path)
filename = "C.txt"
FILE = open(filename, "w")
FILE.write(str(dirs))
FILE.close()
print dirs
The problem is - this code only lists files in directories, not sub-directories. What do I need to change in order to also list files in subdirectories?
To traverse a directory tree you want to use os.walk() for this.
Here's an example to get you started:
import os
searchdir = r'C:\root_dir' # traversal starts in this directory (the root)
for root, dirs, files in os.walk(searchdir):
for name in files:
(base, ext) = os.path.splitext(name) # split base and extension
print base, ext
which would give you access to the file names and the components.
You'll find the functions in the os and os.path module to be of great use for this sort of work.
This function will help you: os.path.walk() http://docs.python.org/library/os.path.html#os.path.walk

Categories