Related
The loop is working but once I put the if statements in it only prints I am a dir
If the if statements are not there I am able to print the dirpath, dirname, filename to the console
I am trying to list all the file names in a directory and get the MD5 sum.
from os import walk
import hashlib
import os
path = "/home/Desktop/myfile"
for (dirpath, dirname, filename) in walk(path):
if os.path.isdir(dirpath):
print("I am a dir")
if os.path.isfile(dirpath):
print(filename, hashlib.md5(open(filename, 'rb').read()).digest())
You're only checking dirpath. What you have as dirname and filename are actually collections of directory names and files under dirpath. Taken from the python docs, and modified slightly, as their example removes the files:
import os
for root, dirs, files in os.walk(top):
for name in files:
print(os.path.join(root, name))
for name in dirs:
print(os.path.join(root, name))
Will print the list of of directories and files under top and then will recurse down the directories in under top and print the folders and directories there.
From the Python documentation about os.walk:
https://docs.python.org/2/library/os.html
dirpath is a string, the path to the directory. dirnames is a list of
the names of the subdirectories in dirpath (excluding '.' and '..').
filenames is a list of the names of the non-directory files in
dirpath.
With os.path.isfile(dirpath) you are checking whether dirpath is a file, which is never the case. Try changing the code to:
full_filename = os.path.join(dirpath, filename)
if os.path.isfile(full_filename):
print(full_filename, hashlib.md5(open(full_filename, 'rb').read()).digest())
I'm having troubles finding and deleting empty folders with my Python script.
I have some directories with files more or less like this:
A/
--B/
----a.txt
----b.pdf
--C/
----d.pdf
I'm trying to delete all files which aren't PDFs and after that delete all empty folders. I can delete the files that I want to, but then I can't get the empty directories. What I'm doing wrong?
os.chdir(path+"/"+name+"/Test Data/Checklists")
pprint("Current path: "+ os.getcwd())
for root, dirs, files in os.walk(path+"/"+name+"/Test Data/Checklists"):
for name in files:
if not(name.endswith(".pdf")):
os.remove(os.path.join(root, name))
pprint("Deletting empty folders..")
pprint("Current path: "+ os.getcwd())
for root, dirs, files in os.walk(path+"/"+name+"/Test Data/Checklists", topdown=False):
if not dirs and not files:
os.rmdir(root)
use insted the function
os.removedirs(path)
this will remove directories until the parent directory is not empty.
Ideally, you should remove the directories immediately after deleting the files, rather than doing two passes with os.walk
import sys
import os
for dir, subdirs, files in os.walk(sys.argv[1], topdown=False):
for name in files:
if not(name.endswith(".pdf")):
os.remove(os.path.join(dir, name))
# check whether the directory is now empty after deletions, and if so, remove it
if len(os.listdir(dir)) == 0:
os.rmdir(dir)
For empty folders deletion you can use this snippet.
It can be combined with some files deletion, but as last run should be used as is.
import os
def drop_empty_folders(directory):
"""Verify that every empty folder removed in local storage."""
for dirpath, dirnames, filenames in os.walk(directory, topdown=False):
if not dirnames and not filenames:
os.rmdir(dirpath)
remove all empty folders
import os
folders = './A/' # directory
for folder in list(os.walk(folders)) :
if not os.listdir(folder[0]):
os.removedirs(folder[0])
how can i get the folder names existing in a directory using Python ?
I want to save all the subfolders into a list to work with the names after that but i dont know how to read the subfolder names ?
Thanks for you help
You can use os.walk()
# !/usr/bin/python
import os
directory_list = list()
for root, dirs, files in os.walk("/path/to/your/dir", topdown=False):
for name in dirs:
directory_list.append(os.path.join(root, name))
print directory_list
EDIT
If you only want the first level and not actually "walk" through the subdirectories, it is even less code:
import os
root, dirs, files = os.walk("/path/to/your/dir").next()
print dirs
This is not really what os.walk is made for. If you really only want one level of subdirectories, you can also use os.listdir() like Yannik Ammann suggested:
root='/path/to/my/dir'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]
print dirlist
Starting with Python 3.4, you can also use the new pathlib module:
from pathlib import Path
p = Path('some/folder')
subdirectories = [x for x in p.iterdir() if x.is_dir()]
print(subdirectories)
You can use os.listdir() here a link to the docs
Warning returns files and directories
example:
import os
path = 'pyth/to/dir/'
dir_list = os.listdir(path)
update: you need to check if the returned names are directories or files
import os
path = 'pyth/to/dir/'
# list of all content in a directory, filtered so only directories are returned
dir_list = [directory for directory in os.listdir(path) if os.path.isdir(path+directory)]
You should import os first.
import os
files=[]
files = [f for f in sorted(os.listdir(FileDirectoryPath))]
This would give you list with all files in the FileDirectoryPath sorted.
I use os.listdir
Get all folder names of a directory
folder_names = []
for entry_name in os.listdir(MYDIR):
entry_path = os.path.join(MYDIR, entry_name)
if os.path.isdir(entry_path):
folder_names.append(entry_name)
Get all folder paths of a directory
folder_paths = []
for entry_name in os.listdir(MYDIR):
entry_path = os.path.join(MYDIR, entry_name)
if os.path.isdir(entry_path):
folder_paths.append(entry_path)
Get all file names of a directory
file_names = []
for file_name in os.listdir(MYDIR):
file_path = os.path.join(MYDIR, file_name)
if os.path.isfile(file_path):
file_names.append(file_name)
Get all file paths of a directory
file_paths = []
for file_name in os.listdir(MYDIR):
file_path = os.path.join(MYDIR, file_name)
if os.path.isfile(file_path):
file_paths.append(file_path)
For python 3 I'm using this script
import os
root='./'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]
for dir in dirlist:
print(dir)
Use os.walk(path)
import os
path = 'C:\\'
for root, directories, files in os.walk(path):
for directory in directories:
print os.path.join(root, directory)
Python 3.x: If you want only the directories in a given directory, try:
import os
search_path = '.' # set your path here.
root, dirs, files = next(os.walk(search_path), ([],[],[]))
print(dirs)
The above example will print out a list of the directories in the current directory like this:
['dir1', 'dir2', 'dir3']
The output contains only the sub-directory names.
If the directory does not have sub-directories, it will print:
[]
os.walk() is a generator method, so use next() to only call it once. The 3-tuple of empty strings is for the error condition when the directory does not contain any sub-directories because the os.walk() generator returns 3-tuples for each layer in the directory tree. Without those, if the directory is empty, next() will raise a StopIteration exception.
For a more compact version:
dirs = next(os.walk(search_path), ([],[],[]))[1]
How do I get the absolute paths of all the files in a directory that could have many sub-folders in Python?
I know os.walk() recursively gives me a list of directories and files, but that doesn't seem to get me what I want.
os.path.abspath makes sure a path is absolute. Use the following helper function:
import os
def absoluteFilePaths(directory):
for dirpath,_,filenames in os.walk(directory):
for f in filenames:
yield os.path.abspath(os.path.join(dirpath, f))
If you have Python 3.4 or newer you can use pathlib (or a third-party backport if you have an older Python version):
import pathlib
for filepath in pathlib.Path(directory).glob('**/*'):
print(filepath.absolute())
If the argument given to os.walk is absolute, then the root dir names yielded during iteration will also be absolute. So, you only need to join them with the filenames:
import os
for root, dirs, files in os.walk(os.path.abspath("../path/to/dir/")):
for file in files:
print(os.path.join(root, file))
Try:
import os
for root, dirs, files in os.walk('.'):
for file in files:
p=os.path.join(root,file)
print p
print os.path.abspath(p)
print
You can use os.path.abspath() to turn relative paths into absolute paths:
file_paths = []
for folder, subs, files in os.walk(rootdir):
for filename in files:
file_paths.append(os.path.abspath(os.path.join(folder, filename)))
Starting with python 3.5 the idiomatic solution would be:
import os
def absolute_file_paths(directory):
path = os.path.abspath(directory)
return [entry.path for entry in os.scandir(path) if entry.is_file()]
This not just reads nicer but also is faster in many cases.
For more details (like ignoring symlinks) see original python docs:
https://docs.python.org/3/library/os.html#os.scandir
All files and folders:
x = [os.path.abspath(os.path.join(directory, p)) for p in os.listdir(directory)]
Images (.jpg | .png):
x = [os.path.abspath(os.path.join(directory, p)) for p in os.listdir(directory) if p.endswith(('jpg', 'png'))]
from glob import glob
def absolute_file_paths(directory):
return glob(join(directory, "**"))
Try:
from pathlib import Path
path = 'Desktop'
files = filter(lambda filepath: filepath.is_file(), Path(path).glob('*'))
for file in files:
print(file.absolute())
I wanted to keep the subdirectory details and not the files and wanted only subdirs with one xml file in them. I can do it this way:
for rootDirectory, subDirectories, files in os.walk(eventDirectory):
for subDirectory in subDirectories:
absSubDir = os.path.join(rootDirectory, subDirectory)
if len(glob.glob(os.path.join(absSubDir, "*.xml"))) == 1:
print "Parsing information in " + absSubDir
for root, directories, filenames in os.walk(directory):
for directory in directories:
print os.path.join(root, directory)
for filename in filenames:
if filename.endswith(".JPG"):
print filename
print os.path.join(root,filename)
Try This
pth=''
types=os.listdir(pth)
for type_ in types:
file_names=os.listdir(f'{pth}/{type_}')
file_names=list(map(lambda x:f'{pth}/{type_}/{x}',file_names))
train_folder+=file_names
I'd like to copy the files that have a specific file extension to a new folder. I have an idea how to use os.walk but specifically how would I go about using that? I'm searching for the files with a specific file extension in only one folder (this folder has 2 subdirectories but the files I'm looking for will never be found in these 2 subdirectories so I don't need to search in these subdirectories). Thanks in advance.
import glob, os, shutil
files = glob.iglob(os.path.join(source_dir, "*.ext"))
for file in files:
if os.path.isfile(file):
shutil.copy2(file, dest_dir)
Read the documentation of the shutil module to choose the function that fits your needs (shutil.copy(), shutil.copy2() or shutil.copyfile()).
If you're not recursing, you don't need walk().
Federico's answer with glob is fine, assuming you aren't going to have any directories called ‘something.ext’. Otherwise try:
import os, shutil
for basename in os.listdir(srcdir):
if basename.endswith('.ext'):
pathname = os.path.join(srcdir, basename)
if os.path.isfile(pathname):
shutil.copy2(pathname, dstdir)
Here is a non-recursive version with os.walk:
import fnmatch, os, shutil
def copyfiles(srcdir, dstdir, filepattern):
def failed(exc):
raise exc
for dirpath, dirs, files in os.walk(srcdir, topdown=True, onerror=failed):
for file in fnmatch.filter(files, filepattern):
shutil.copy2(os.path.join(dirpath, file), dstdir)
break # no recursion
Example:
copyfiles(".", "test", "*.ext")
This will walk a tree with sub-directories. You can do an os.path.isfile check to make it a little safer.
for root, dirs, files in os.walk(srcDir):
for file in files:
if file[-4:].lower() == '.jpg':
shutil.copy(os.path.join(root, file), os.path.join(dest, file))
Copy files with extension "extension" from srcDir to dstDir...
import os, shutil, sys
srcDir = sys.argv[1]
dstDir = sys.argv[2]
extension = sys.argv[3]
print "Source Dir: ", srcDir, "\n", "Destination Dir: ",dstDir, "\n", "Extension: ", extension
for root, dirs, files in os.walk(srcDir):
for file_ in files:
if file_.endswith(extension):
shutil.copy(os.path.join(root, file_), os.path.join(dstDir, file_))