Get absolute paths of all files in a directory - python

How do I get the absolute paths of all the files in a directory that could have many sub-folders in Python?
I know os.walk() recursively gives me a list of directories and files, but that doesn't seem to get me what I want.

os.path.abspath makes sure a path is absolute. Use the following helper function:
import os
def absoluteFilePaths(directory):
for dirpath,_,filenames in os.walk(directory):
for f in filenames:
yield os.path.abspath(os.path.join(dirpath, f))

If you have Python 3.4 or newer you can use pathlib (or a third-party backport if you have an older Python version):
import pathlib
for filepath in pathlib.Path(directory).glob('**/*'):
print(filepath.absolute())

If the argument given to os.walk is absolute, then the root dir names yielded during iteration will also be absolute. So, you only need to join them with the filenames:
import os
for root, dirs, files in os.walk(os.path.abspath("../path/to/dir/")):
for file in files:
print(os.path.join(root, file))

Try:
import os
for root, dirs, files in os.walk('.'):
for file in files:
p=os.path.join(root,file)
print p
print os.path.abspath(p)
print

You can use os.path.abspath() to turn relative paths into absolute paths:
file_paths = []
for folder, subs, files in os.walk(rootdir):
for filename in files:
file_paths.append(os.path.abspath(os.path.join(folder, filename)))

Starting with python 3.5 the idiomatic solution would be:
import os
def absolute_file_paths(directory):
path = os.path.abspath(directory)
return [entry.path for entry in os.scandir(path) if entry.is_file()]
This not just reads nicer but also is faster in many cases.
For more details (like ignoring symlinks) see original python docs:
https://docs.python.org/3/library/os.html#os.scandir

All files and folders:
x = [os.path.abspath(os.path.join(directory, p)) for p in os.listdir(directory)]
Images (.jpg | .png):
x = [os.path.abspath(os.path.join(directory, p)) for p in os.listdir(directory) if p.endswith(('jpg', 'png'))]

from glob import glob
def absolute_file_paths(directory):
return glob(join(directory, "**"))

Try:
from pathlib import Path
path = 'Desktop'
files = filter(lambda filepath: filepath.is_file(), Path(path).glob('*'))
for file in files:
print(file.absolute())

I wanted to keep the subdirectory details and not the files and wanted only subdirs with one xml file in them. I can do it this way:
for rootDirectory, subDirectories, files in os.walk(eventDirectory):
for subDirectory in subDirectories:
absSubDir = os.path.join(rootDirectory, subDirectory)
if len(glob.glob(os.path.join(absSubDir, "*.xml"))) == 1:
print "Parsing information in " + absSubDir

for root, directories, filenames in os.walk(directory):
for directory in directories:
print os.path.join(root, directory)
for filename in filenames:
if filename.endswith(".JPG"):
print filename
print os.path.join(root,filename)

Try This
pth=''
types=os.listdir(pth)
for type_ in types:
file_names=os.listdir(f'{pth}/{type_}')
file_names=list(map(lambda x:f'{pth}/{type_}/{x}',file_names))
train_folder+=file_names

Related

Find and copy files from directories using wildcard [duplicate]

This is what I have:
glob(os.path.join('src','*.c'))
but I want to search the subfolders of src. Something like this would work:
glob(os.path.join('src','*.c'))
glob(os.path.join('src','*','*.c'))
glob(os.path.join('src','*','*','*.c'))
glob(os.path.join('src','*','*','*','*.c'))
But this is obviously limited and clunky.
pathlib.Path.rglob
Use pathlib.Path.rglob from the pathlib module, which was introduced in Python 3.5.
from pathlib import Path
for path in Path('src').rglob('*.c'):
print(path.name)
If you don't want to use pathlib, use can use glob.glob('**/*.c'), but don't forget to pass in the recursive keyword parameter and it will use inordinate amount of time on large directories.
For cases where matching files beginning with a dot (.); like files in the current directory or hidden files on Unix based system, use the os.walk solution below.
os.walk
For older Python versions, use os.walk to recursively walk a directory and fnmatch.filter to match against a simple expression:
import fnmatch
import os
matches = []
for root, dirnames, filenames in os.walk('src'):
for filename in fnmatch.filter(filenames, '*.c'):
matches.append(os.path.join(root, filename))
For python >= 3.5 you can use **, recursive=True :
import glob
for f in glob.glob('/path/**/*.c', recursive=True):
print(f)
If recursive is True (default is False), the pattern ** will match any files and zero
or more directories and subdirectories. If the pattern is followed by
an os.sep, only directories and subdirectories match.
Python 3 Demo
Similar to other solutions, but using fnmatch.fnmatch instead of glob, since os.walk already listed the filenames:
import os, fnmatch
def find_files(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.join(root, basename)
yield filename
for filename in find_files('src', '*.c'):
print 'Found C source:', filename
Also, using a generator alows you to process each file as it is found, instead of finding all the files and then processing them.
I've modified the glob module to support ** for recursive globbing, e.g:
>>> import glob2
>>> all_header_files = glob2.glob('src/**/*.c')
https://github.com/miracle2k/python-glob2/
Useful when you want to provide your users with the ability to use the ** syntax, and thus os.walk() alone is not good enough.
Starting with Python 3.4, one can use the glob() method of one of the Path classes in the new pathlib module, which supports ** wildcards. For example:
from pathlib import Path
for file_path in Path('src').glob('**/*.c'):
print(file_path) # do whatever you need with these files
Update:
Starting with Python 3.5, the same syntax is also supported by glob.glob().
import os
import fnmatch
def recursive_glob(treeroot, pattern):
results = []
for base, dirs, files in os.walk(treeroot):
goodfiles = fnmatch.filter(files, pattern)
results.extend(os.path.join(base, f) for f in goodfiles)
return results
fnmatch gives you exactly the same patterns as glob, so this is really an excellent replacement for glob.glob with very close semantics. An iterative version (e.g. a generator), IOW a replacement for glob.iglob, is a trivial adaptation (just yield the intermediate results as you go, instead of extending a single results list to return at the end).
You'll want to use os.walk to collect filenames that match your criteria. For example:
import os
cfiles = []
for root, dirs, files in os.walk('src'):
for file in files:
if file.endswith('.c'):
cfiles.append(os.path.join(root, file))
Here's a solution with nested list comprehensions, os.walk and simple suffix matching instead of glob:
import os
cfiles = [os.path.join(root, filename)
for root, dirnames, filenames in os.walk('src')
for filename in filenames if filename.endswith('.c')]
It can be compressed to a one-liner:
import os;cfiles=[os.path.join(r,f) for r,d,fs in os.walk('src') for f in fs if f.endswith('.c')]
or generalized as a function:
import os
def recursive_glob(rootdir='.', suffix=''):
return [os.path.join(looproot, filename)
for looproot, _, filenames in os.walk(rootdir)
for filename in filenames if filename.endswith(suffix)]
cfiles = recursive_glob('src', '.c')
If you do need full glob style patterns, you can follow Alex's and
Bruno's example and use fnmatch:
import fnmatch
import os
def recursive_glob(rootdir='.', pattern='*'):
return [os.path.join(looproot, filename)
for looproot, _, filenames in os.walk(rootdir)
for filename in filenames
if fnmatch.fnmatch(filename, pattern)]
cfiles = recursive_glob('src', '*.c')
Consider pathlib.rglob().
This is like calling Path.glob() with "**/" added in front of the given relative pattern:
import pathlib
for p in pathlib.Path("src").rglob("*.c"):
print(p)
See also #taleinat's related post here and a similar post elsewhere.
import os, glob
for each in glob.glob('path/**/*.c', recursive=True):
print(f'Name with path: {each} \nName without path: {os.path.basename(each)}')
glob.glob('*.c') :matches all files ending in .c in current directory
glob.glob('*/*.c') :same as 1
glob.glob('**/*.c') :matches all files ending in .c in the immediate subdirectories only, but not in the current directory
glob.glob('*.c',recursive=True) :same as 1
glob.glob('*/*.c',recursive=True) :same as 3
glob.glob('**/*.c',recursive=True) :matches all files ending in .c in the current directory and in all subdirectories
In case this may interest anyone, I've profiled the top three proposed methods.
I have about ~500K files in the globbed folder (in total), and 2K files that match the desired pattern.
here's the (very basic) code
import glob
import json
import fnmatch
import os
from pathlib import Path
from time import time
def find_files_iglob():
return glob.iglob("./data/**/data.json", recursive=True)
def find_files_oswalk():
for root, dirnames, filenames in os.walk('data'):
for filename in fnmatch.filter(filenames, 'data.json'):
yield os.path.join(root, filename)
def find_files_rglob():
return Path('data').rglob('data.json')
t0 = time()
for f in find_files_oswalk(): pass
t1 = time()
for f in find_files_rglob(): pass
t2 = time()
for f in find_files_iglob(): pass
t3 = time()
print(t1-t0, t2-t1, t3-t2)
And the results I got were:
os_walk: ~3.6sec
rglob ~14.5sec
iglob: ~16.9sec
The platform: Ubuntu 16.04, x86_64 (core i7),
Recently I had to recover my pictures with the extension .jpg. I ran photorec and recovered 4579 directories 2.2 million files within, having tremendous variety of extensions.With the script below I was able to select 50133 files havin .jpg extension within minutes:
#!/usr/binenv python2.7
import glob
import shutil
import os
src_dir = "/home/mustafa/Masaüstü/yedek"
dst_dir = "/home/mustafa/Genel/media"
for mediafile in glob.iglob(os.path.join(src_dir, "*", "*.jpg")): #"*" is for subdirectory
shutil.copy(mediafile, dst_dir)
based on other answers this is my current working implementation, which retrieves nested xml files in a root directory:
files = []
for root, dirnames, filenames in os.walk(myDir):
files.extend(glob.glob(root + "/*.xml"))
I'm really having fun with python :)
For python 3.5 and later
import glob
#file_names_array = glob.glob('path/*.c', recursive=True)
#above works for files directly at path/ as guided by NeStack
#updated version
file_names_array = glob.glob('path/**/*.c', recursive=True)
further you might need
for full_path_in_src in file_names_array:
print (full_path_in_src ) # be like 'abc/xyz.c'
#Full system path of this would be like => 'path till src/abc/xyz.c'
Johan and Bruno provide excellent solutions on the minimal requirement as stated. I have just released Formic which implements Ant FileSet and Globs which can handle this and more complicated scenarios. An implementation of your requirement is:
import formic
fileset = formic.FileSet(include="/src/**/*.c")
for file_name in fileset.qualified_files():
print file_name
Another way to do it using just the glob module. Just seed the rglob method with a starting base directory and a pattern to match and it will return a list of matching file names.
import glob
import os
def _getDirs(base):
return [x for x in glob.iglob(os.path.join( base, '*')) if os.path.isdir(x) ]
def rglob(base, pattern):
list = []
list.extend(glob.glob(os.path.join(base,pattern)))
dirs = _getDirs(base)
if len(dirs):
for d in dirs:
list.extend(rglob(os.path.join(base,d), pattern))
return list
Or with a list comprehension:
>>> base = r"c:\User\xtofl"
>>> binfiles = [ os.path.join(base,f)
for base, _, files in os.walk(root)
for f in files if f.endswith(".jpg") ]
If the files are on a remote file system or inside an archive, you can use an implementation of the fsspec AbstractFileSystem class. For example, to list all the files in a zipfile:
from fsspec.implementations.zip import ZipFileSystem
fs = ZipFileSystem("/tmp/test.zip")
fs.glob("/**") # equivalent: fs.find("/")
or to list all the files in a publicly available S3 bucket:
from s3fs import S3FileSystem
fs_s3 = S3FileSystem(anon=True)
fs_s3.glob("noaa-goes16/ABI-L1b-RadF/2020/045/**") # or use fs_s3.find
you can also use it for a local filesystem, which may be interesting if your implementation should be filesystem-agnostic:
from fsspec.implementations.local import LocalFileSystem
fs = LocalFileSystem()
fs.glob("/tmp/test/**")
Other implementations include Google Cloud, Github, SFTP/SSH, Dropbox, and Azure. For details, see the fsspec API documentation.
Just made this.. it will print files and directory in hierarchical way
But I didn't used fnmatch or walk
#!/usr/bin/python
import os,glob,sys
def dirlist(path, c = 1):
for i in glob.glob(os.path.join(path, "*")):
if os.path.isfile(i):
filepath, filename = os.path.split(i)
print '----' *c + filename
elif os.path.isdir(i):
dirname = os.path.basename(i)
print '----' *c + dirname
c+=1
dirlist(i,c)
c-=1
path = os.path.normpath(sys.argv[1])
print(os.path.basename(path))
dirlist(path)
That one uses fnmatch or regular expression:
import fnmatch, os
def filepaths(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
try:
matched = pattern.match(basename)
except AttributeError:
matched = fnmatch.fnmatch(basename, pattern)
if matched:
yield os.path.join(root, basename)
# usage
if __name__ == '__main__':
from pprint import pprint as pp
import re
path = r'/Users/hipertracker/app/myapp'
pp([x for x in filepaths(path, re.compile(r'.*\.py$'))])
pp([x for x in filepaths(path, '*.py')])
In addition to the suggested answers, you can do this with some lazy generation and list comprehension magic:
import os, glob, itertools
results = itertools.chain.from_iterable(glob.iglob(os.path.join(root,'*.c'))
for root, dirs, files in os.walk('src'))
for f in results: print(f)
Besides fitting in one line and avoiding unnecessary lists in memory, this also has the nice side effect, that you can use it in a way similar to the ** operator, e.g., you could use os.path.join(root, 'some/path/*.c') in order to get all .c files in all sub directories of src that have this structure.
This is a working code on Python 2.7. As part of my devops work, I was required to write a script which would move the config files marked with live-appName.properties to appName.properties. There could be other extension files as well like live-appName.xml.
Below is a working code for this, which finds the files in the given directories (nested level) and then renames (moves) it to the required filename
def flipProperties(searchDir):
print "Flipping properties to point to live DB"
for root, dirnames, filenames in os.walk(searchDir):
for filename in fnmatch.filter(filenames, 'live-*.*'):
targetFileName = os.path.join(root, filename.split("live-")[1])
print "File "+ os.path.join(root, filename) + "will be moved to " + targetFileName
shutil.move(os.path.join(root, filename), targetFileName)
This function is called from a main script
flipProperties(searchDir)
Hope this helps someone struggling with similar issues.
Simplified version of Johan Dahlin's answer, without fnmatch.
import os
matches = []
for root, dirnames, filenames in os.walk('src'):
matches += [os.path.join(root, f) for f in filenames if f[-2:] == '.c']
Here is my solution using list comprehension to search for multiple file extensions recursively in a directory and all subdirectories:
import os, glob
def _globrec(path, *exts):
""" Glob recursively a directory and all subdirectories for multiple file extensions
Note: Glob is case-insensitive, i. e. for '\*.jpg' you will get files ending
with .jpg and .JPG
Parameters
----------
path : str
A directory name
exts : tuple
File extensions to glob for
Returns
-------
files : list
list of files matching extensions in exts in path and subfolders
"""
dirs = [a[0] for a in os.walk(path)]
f_filter = [d+e for d in dirs for e in exts]
return [f for files in [glob.iglob(files) for files in f_filter] for f in files]
my_pictures = _globrec(r'C:\Temp', '\*.jpg','\*.bmp','\*.png','\*.gif')
for f in my_pictures:
print f
import sys, os, glob
dir_list = ["c:\\books\\heap"]
while len(dir_list) > 0:
cur_dir = dir_list[0]
del dir_list[0]
list_of_files = glob.glob(cur_dir+'\\*')
for book in list_of_files:
if os.path.isfile(book):
print(book)
else:
dir_list.append(book)
I modified the top answer in this posting.. and recently created this script which will loop through all files in a given directory (searchdir) and the sub-directories under it... and prints filename, rootdir, modified/creation date, and size.
Hope this helps someone... and they can walk the directory and get fileinfo.
import time
import fnmatch
import os
def fileinfo(file):
filename = os.path.basename(file)
rootdir = os.path.dirname(file)
lastmod = time.ctime(os.path.getmtime(file))
creation = time.ctime(os.path.getctime(file))
filesize = os.path.getsize(file)
print "%s**\t%s\t%s\t%s\t%s" % (rootdir, filename, lastmod, creation, filesize)
searchdir = r'D:\Your\Directory\Root'
matches = []
for root, dirnames, filenames in os.walk(searchdir):
## for filename in fnmatch.filter(filenames, '*.c'):
for filename in filenames:
## matches.append(os.path.join(root, filename))
##print matches
fileinfo(os.path.join(root, filename))
Here is a solution that will match the pattern against the full path and not just the base filename.
It uses fnmatch.translate to convert a glob-style pattern into a regular expression, which is then matched against the full path of each file found while walking the directory.
re.IGNORECASE is optional, but desirable on Windows since the file system itself is not case-sensitive. (I didn't bother compiling the regex because docs indicate it should be cached internally.)
import fnmatch
import os
import re
def findfiles(dir, pattern):
patternregex = fnmatch.translate(pattern)
for root, dirs, files in os.walk(dir):
for basename in files:
filename = os.path.join(root, basename)
if re.search(patternregex, filename, re.IGNORECASE):
yield filename
I needed a solution for python 2.x that works fast on large directories.
I endet up with this:
import subprocess
foundfiles= subprocess.check_output("ls src/*.c src/**/*.c", shell=True)
for foundfile in foundfiles.splitlines():
print foundfile
Note that you might need some exception handling in case ls doesn't find any matching file.

How can I check if there are any more JSON files remaining in a directory in Python? [duplicate]

This is what I have:
glob(os.path.join('src','*.c'))
but I want to search the subfolders of src. Something like this would work:
glob(os.path.join('src','*.c'))
glob(os.path.join('src','*','*.c'))
glob(os.path.join('src','*','*','*.c'))
glob(os.path.join('src','*','*','*','*.c'))
But this is obviously limited and clunky.
pathlib.Path.rglob
Use pathlib.Path.rglob from the pathlib module, which was introduced in Python 3.5.
from pathlib import Path
for path in Path('src').rglob('*.c'):
print(path.name)
If you don't want to use pathlib, use can use glob.glob('**/*.c'), but don't forget to pass in the recursive keyword parameter and it will use inordinate amount of time on large directories.
For cases where matching files beginning with a dot (.); like files in the current directory or hidden files on Unix based system, use the os.walk solution below.
os.walk
For older Python versions, use os.walk to recursively walk a directory and fnmatch.filter to match against a simple expression:
import fnmatch
import os
matches = []
for root, dirnames, filenames in os.walk('src'):
for filename in fnmatch.filter(filenames, '*.c'):
matches.append(os.path.join(root, filename))
For python >= 3.5 you can use **, recursive=True :
import glob
for f in glob.glob('/path/**/*.c', recursive=True):
print(f)
If recursive is True (default is False), the pattern ** will match any files and zero
or more directories and subdirectories. If the pattern is followed by
an os.sep, only directories and subdirectories match.
Python 3 Demo
Similar to other solutions, but using fnmatch.fnmatch instead of glob, since os.walk already listed the filenames:
import os, fnmatch
def find_files(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.join(root, basename)
yield filename
for filename in find_files('src', '*.c'):
print 'Found C source:', filename
Also, using a generator alows you to process each file as it is found, instead of finding all the files and then processing them.
I've modified the glob module to support ** for recursive globbing, e.g:
>>> import glob2
>>> all_header_files = glob2.glob('src/**/*.c')
https://github.com/miracle2k/python-glob2/
Useful when you want to provide your users with the ability to use the ** syntax, and thus os.walk() alone is not good enough.
Starting with Python 3.4, one can use the glob() method of one of the Path classes in the new pathlib module, which supports ** wildcards. For example:
from pathlib import Path
for file_path in Path('src').glob('**/*.c'):
print(file_path) # do whatever you need with these files
Update:
Starting with Python 3.5, the same syntax is also supported by glob.glob().
import os
import fnmatch
def recursive_glob(treeroot, pattern):
results = []
for base, dirs, files in os.walk(treeroot):
goodfiles = fnmatch.filter(files, pattern)
results.extend(os.path.join(base, f) for f in goodfiles)
return results
fnmatch gives you exactly the same patterns as glob, so this is really an excellent replacement for glob.glob with very close semantics. An iterative version (e.g. a generator), IOW a replacement for glob.iglob, is a trivial adaptation (just yield the intermediate results as you go, instead of extending a single results list to return at the end).
You'll want to use os.walk to collect filenames that match your criteria. For example:
import os
cfiles = []
for root, dirs, files in os.walk('src'):
for file in files:
if file.endswith('.c'):
cfiles.append(os.path.join(root, file))
Here's a solution with nested list comprehensions, os.walk and simple suffix matching instead of glob:
import os
cfiles = [os.path.join(root, filename)
for root, dirnames, filenames in os.walk('src')
for filename in filenames if filename.endswith('.c')]
It can be compressed to a one-liner:
import os;cfiles=[os.path.join(r,f) for r,d,fs in os.walk('src') for f in fs if f.endswith('.c')]
or generalized as a function:
import os
def recursive_glob(rootdir='.', suffix=''):
return [os.path.join(looproot, filename)
for looproot, _, filenames in os.walk(rootdir)
for filename in filenames if filename.endswith(suffix)]
cfiles = recursive_glob('src', '.c')
If you do need full glob style patterns, you can follow Alex's and
Bruno's example and use fnmatch:
import fnmatch
import os
def recursive_glob(rootdir='.', pattern='*'):
return [os.path.join(looproot, filename)
for looproot, _, filenames in os.walk(rootdir)
for filename in filenames
if fnmatch.fnmatch(filename, pattern)]
cfiles = recursive_glob('src', '*.c')
Consider pathlib.rglob().
This is like calling Path.glob() with "**/" added in front of the given relative pattern:
import pathlib
for p in pathlib.Path("src").rglob("*.c"):
print(p)
See also #taleinat's related post here and a similar post elsewhere.
import os, glob
for each in glob.glob('path/**/*.c', recursive=True):
print(f'Name with path: {each} \nName without path: {os.path.basename(each)}')
glob.glob('*.c') :matches all files ending in .c in current directory
glob.glob('*/*.c') :same as 1
glob.glob('**/*.c') :matches all files ending in .c in the immediate subdirectories only, but not in the current directory
glob.glob('*.c',recursive=True) :same as 1
glob.glob('*/*.c',recursive=True) :same as 3
glob.glob('**/*.c',recursive=True) :matches all files ending in .c in the current directory and in all subdirectories
In case this may interest anyone, I've profiled the top three proposed methods.
I have about ~500K files in the globbed folder (in total), and 2K files that match the desired pattern.
here's the (very basic) code
import glob
import json
import fnmatch
import os
from pathlib import Path
from time import time
def find_files_iglob():
return glob.iglob("./data/**/data.json", recursive=True)
def find_files_oswalk():
for root, dirnames, filenames in os.walk('data'):
for filename in fnmatch.filter(filenames, 'data.json'):
yield os.path.join(root, filename)
def find_files_rglob():
return Path('data').rglob('data.json')
t0 = time()
for f in find_files_oswalk(): pass
t1 = time()
for f in find_files_rglob(): pass
t2 = time()
for f in find_files_iglob(): pass
t3 = time()
print(t1-t0, t2-t1, t3-t2)
And the results I got were:
os_walk: ~3.6sec
rglob ~14.5sec
iglob: ~16.9sec
The platform: Ubuntu 16.04, x86_64 (core i7),
Recently I had to recover my pictures with the extension .jpg. I ran photorec and recovered 4579 directories 2.2 million files within, having tremendous variety of extensions.With the script below I was able to select 50133 files havin .jpg extension within minutes:
#!/usr/binenv python2.7
import glob
import shutil
import os
src_dir = "/home/mustafa/Masaüstü/yedek"
dst_dir = "/home/mustafa/Genel/media"
for mediafile in glob.iglob(os.path.join(src_dir, "*", "*.jpg")): #"*" is for subdirectory
shutil.copy(mediafile, dst_dir)
based on other answers this is my current working implementation, which retrieves nested xml files in a root directory:
files = []
for root, dirnames, filenames in os.walk(myDir):
files.extend(glob.glob(root + "/*.xml"))
I'm really having fun with python :)
For python 3.5 and later
import glob
#file_names_array = glob.glob('path/*.c', recursive=True)
#above works for files directly at path/ as guided by NeStack
#updated version
file_names_array = glob.glob('path/**/*.c', recursive=True)
further you might need
for full_path_in_src in file_names_array:
print (full_path_in_src ) # be like 'abc/xyz.c'
#Full system path of this would be like => 'path till src/abc/xyz.c'
Johan and Bruno provide excellent solutions on the minimal requirement as stated. I have just released Formic which implements Ant FileSet and Globs which can handle this and more complicated scenarios. An implementation of your requirement is:
import formic
fileset = formic.FileSet(include="/src/**/*.c")
for file_name in fileset.qualified_files():
print file_name
Another way to do it using just the glob module. Just seed the rglob method with a starting base directory and a pattern to match and it will return a list of matching file names.
import glob
import os
def _getDirs(base):
return [x for x in glob.iglob(os.path.join( base, '*')) if os.path.isdir(x) ]
def rglob(base, pattern):
list = []
list.extend(glob.glob(os.path.join(base,pattern)))
dirs = _getDirs(base)
if len(dirs):
for d in dirs:
list.extend(rglob(os.path.join(base,d), pattern))
return list
Or with a list comprehension:
>>> base = r"c:\User\xtofl"
>>> binfiles = [ os.path.join(base,f)
for base, _, files in os.walk(root)
for f in files if f.endswith(".jpg") ]
If the files are on a remote file system or inside an archive, you can use an implementation of the fsspec AbstractFileSystem class. For example, to list all the files in a zipfile:
from fsspec.implementations.zip import ZipFileSystem
fs = ZipFileSystem("/tmp/test.zip")
fs.glob("/**") # equivalent: fs.find("/")
or to list all the files in a publicly available S3 bucket:
from s3fs import S3FileSystem
fs_s3 = S3FileSystem(anon=True)
fs_s3.glob("noaa-goes16/ABI-L1b-RadF/2020/045/**") # or use fs_s3.find
you can also use it for a local filesystem, which may be interesting if your implementation should be filesystem-agnostic:
from fsspec.implementations.local import LocalFileSystem
fs = LocalFileSystem()
fs.glob("/tmp/test/**")
Other implementations include Google Cloud, Github, SFTP/SSH, Dropbox, and Azure. For details, see the fsspec API documentation.
Just made this.. it will print files and directory in hierarchical way
But I didn't used fnmatch or walk
#!/usr/bin/python
import os,glob,sys
def dirlist(path, c = 1):
for i in glob.glob(os.path.join(path, "*")):
if os.path.isfile(i):
filepath, filename = os.path.split(i)
print '----' *c + filename
elif os.path.isdir(i):
dirname = os.path.basename(i)
print '----' *c + dirname
c+=1
dirlist(i,c)
c-=1
path = os.path.normpath(sys.argv[1])
print(os.path.basename(path))
dirlist(path)
That one uses fnmatch or regular expression:
import fnmatch, os
def filepaths(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
try:
matched = pattern.match(basename)
except AttributeError:
matched = fnmatch.fnmatch(basename, pattern)
if matched:
yield os.path.join(root, basename)
# usage
if __name__ == '__main__':
from pprint import pprint as pp
import re
path = r'/Users/hipertracker/app/myapp'
pp([x for x in filepaths(path, re.compile(r'.*\.py$'))])
pp([x for x in filepaths(path, '*.py')])
In addition to the suggested answers, you can do this with some lazy generation and list comprehension magic:
import os, glob, itertools
results = itertools.chain.from_iterable(glob.iglob(os.path.join(root,'*.c'))
for root, dirs, files in os.walk('src'))
for f in results: print(f)
Besides fitting in one line and avoiding unnecessary lists in memory, this also has the nice side effect, that you can use it in a way similar to the ** operator, e.g., you could use os.path.join(root, 'some/path/*.c') in order to get all .c files in all sub directories of src that have this structure.
This is a working code on Python 2.7. As part of my devops work, I was required to write a script which would move the config files marked with live-appName.properties to appName.properties. There could be other extension files as well like live-appName.xml.
Below is a working code for this, which finds the files in the given directories (nested level) and then renames (moves) it to the required filename
def flipProperties(searchDir):
print "Flipping properties to point to live DB"
for root, dirnames, filenames in os.walk(searchDir):
for filename in fnmatch.filter(filenames, 'live-*.*'):
targetFileName = os.path.join(root, filename.split("live-")[1])
print "File "+ os.path.join(root, filename) + "will be moved to " + targetFileName
shutil.move(os.path.join(root, filename), targetFileName)
This function is called from a main script
flipProperties(searchDir)
Hope this helps someone struggling with similar issues.
Simplified version of Johan Dahlin's answer, without fnmatch.
import os
matches = []
for root, dirnames, filenames in os.walk('src'):
matches += [os.path.join(root, f) for f in filenames if f[-2:] == '.c']
Here is my solution using list comprehension to search for multiple file extensions recursively in a directory and all subdirectories:
import os, glob
def _globrec(path, *exts):
""" Glob recursively a directory and all subdirectories for multiple file extensions
Note: Glob is case-insensitive, i. e. for '\*.jpg' you will get files ending
with .jpg and .JPG
Parameters
----------
path : str
A directory name
exts : tuple
File extensions to glob for
Returns
-------
files : list
list of files matching extensions in exts in path and subfolders
"""
dirs = [a[0] for a in os.walk(path)]
f_filter = [d+e for d in dirs for e in exts]
return [f for files in [glob.iglob(files) for files in f_filter] for f in files]
my_pictures = _globrec(r'C:\Temp', '\*.jpg','\*.bmp','\*.png','\*.gif')
for f in my_pictures:
print f
import sys, os, glob
dir_list = ["c:\\books\\heap"]
while len(dir_list) > 0:
cur_dir = dir_list[0]
del dir_list[0]
list_of_files = glob.glob(cur_dir+'\\*')
for book in list_of_files:
if os.path.isfile(book):
print(book)
else:
dir_list.append(book)
I modified the top answer in this posting.. and recently created this script which will loop through all files in a given directory (searchdir) and the sub-directories under it... and prints filename, rootdir, modified/creation date, and size.
Hope this helps someone... and they can walk the directory and get fileinfo.
import time
import fnmatch
import os
def fileinfo(file):
filename = os.path.basename(file)
rootdir = os.path.dirname(file)
lastmod = time.ctime(os.path.getmtime(file))
creation = time.ctime(os.path.getctime(file))
filesize = os.path.getsize(file)
print "%s**\t%s\t%s\t%s\t%s" % (rootdir, filename, lastmod, creation, filesize)
searchdir = r'D:\Your\Directory\Root'
matches = []
for root, dirnames, filenames in os.walk(searchdir):
## for filename in fnmatch.filter(filenames, '*.c'):
for filename in filenames:
## matches.append(os.path.join(root, filename))
##print matches
fileinfo(os.path.join(root, filename))
Here is a solution that will match the pattern against the full path and not just the base filename.
It uses fnmatch.translate to convert a glob-style pattern into a regular expression, which is then matched against the full path of each file found while walking the directory.
re.IGNORECASE is optional, but desirable on Windows since the file system itself is not case-sensitive. (I didn't bother compiling the regex because docs indicate it should be cached internally.)
import fnmatch
import os
import re
def findfiles(dir, pattern):
patternregex = fnmatch.translate(pattern)
for root, dirs, files in os.walk(dir):
for basename in files:
filename = os.path.join(root, basename)
if re.search(patternregex, filename, re.IGNORECASE):
yield filename
I needed a solution for python 2.x that works fast on large directories.
I endet up with this:
import subprocess
foundfiles= subprocess.check_output("ls src/*.c src/**/*.c", shell=True)
for foundfile in foundfiles.splitlines():
print foundfile
Note that you might need some exception handling in case ls doesn't find any matching file.

Python folder names in the directory

how can i get the folder names existing in a directory using Python ?
I want to save all the subfolders into a list to work with the names after that but i dont know how to read the subfolder names ?
Thanks for you help
You can use os.walk()
# !/usr/bin/python
import os
directory_list = list()
for root, dirs, files in os.walk("/path/to/your/dir", topdown=False):
for name in dirs:
directory_list.append(os.path.join(root, name))
print directory_list
EDIT
If you only want the first level and not actually "walk" through the subdirectories, it is even less code:
import os
root, dirs, files = os.walk("/path/to/your/dir").next()
print dirs
This is not really what os.walk is made for. If you really only want one level of subdirectories, you can also use os.listdir() like Yannik Ammann suggested:
root='/path/to/my/dir'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]
print dirlist
Starting with Python 3.4, you can also use the new pathlib module:
from pathlib import Path
p = Path('some/folder')
subdirectories = [x for x in p.iterdir() if x.is_dir()]
print(subdirectories)
You can use os.listdir() here a link to the docs
Warning returns files and directories
example:
import os
path = 'pyth/to/dir/'
dir_list = os.listdir(path)
update: you need to check if the returned names are directories or files
import os
path = 'pyth/to/dir/'
# list of all content in a directory, filtered so only directories are returned
dir_list = [directory for directory in os.listdir(path) if os.path.isdir(path+directory)]
You should import os first.
import os
files=[]
files = [f for f in sorted(os.listdir(FileDirectoryPath))]
This would give you list with all files in the FileDirectoryPath sorted.
I use os.listdir
Get all folder names of a directory
folder_names = []
for entry_name in os.listdir(MYDIR):
entry_path = os.path.join(MYDIR, entry_name)
if os.path.isdir(entry_path):
folder_names.append(entry_name)
Get all folder paths of a directory
folder_paths = []
for entry_name in os.listdir(MYDIR):
entry_path = os.path.join(MYDIR, entry_name)
if os.path.isdir(entry_path):
folder_paths.append(entry_path)
Get all file names of a directory
file_names = []
for file_name in os.listdir(MYDIR):
file_path = os.path.join(MYDIR, file_name)
if os.path.isfile(file_path):
file_names.append(file_name)
Get all file paths of a directory
file_paths = []
for file_name in os.listdir(MYDIR):
file_path = os.path.join(MYDIR, file_name)
if os.path.isfile(file_path):
file_paths.append(file_path)
For python 3 I'm using this script
import os
root='./'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]
for dir in dirlist:
print(dir)
Use os.walk(path)
import os
path = 'C:\\'
for root, directories, files in os.walk(path):
for directory in directories:
print os.path.join(root, directory)
Python 3.x: If you want only the directories in a given directory, try:
import os
search_path = '.' # set your path here.
root, dirs, files = next(os.walk(search_path), ([],[],[]))
print(dirs)
The above example will print out a list of the directories in the current directory like this:
['dir1', 'dir2', 'dir3']
The output contains only the sub-directory names.
If the directory does not have sub-directories, it will print:
[]
os.walk() is a generator method, so use next() to only call it once. The 3-tuple of empty strings is for the error condition when the directory does not contain any sub-directories because the os.walk() generator returns 3-tuples for each layer in the directory tree. Without those, if the directory is empty, next() will raise a StopIteration exception.
For a more compact version:
dirs = next(os.walk(search_path), ([],[],[]))[1]

All Files in Dir & Sub-Dir

I would like to find all the files in a directory and all sub-directories.
code used:
import os
import sys
path = "C:\\"
dirs = os.listdir(path)
filename = "C.txt"
FILE = open(filename, "w")
FILE.write(str(dirs))
FILE.close()
print dirs
The problem is - this code only lists files in directories, not sub-directories. What do I need to change in order to also list files in subdirectories?
To traverse a directory tree you want to use os.walk() for this.
Here's an example to get you started:
import os
searchdir = r'C:\root_dir' # traversal starts in this directory (the root)
for root, dirs, files in os.walk(searchdir):
for name in files:
(base, ext) = os.path.splitext(name) # split base and extension
print base, ext
which would give you access to the file names and the components.
You'll find the functions in the os and os.path module to be of great use for this sort of work.
This function will help you: os.path.walk() http://docs.python.org/library/os.path.html#os.path.walk

How to use glob() to find files recursively?

This is what I have:
glob(os.path.join('src','*.c'))
but I want to search the subfolders of src. Something like this would work:
glob(os.path.join('src','*.c'))
glob(os.path.join('src','*','*.c'))
glob(os.path.join('src','*','*','*.c'))
glob(os.path.join('src','*','*','*','*.c'))
But this is obviously limited and clunky.
pathlib.Path.rglob
Use pathlib.Path.rglob from the pathlib module, which was introduced in Python 3.5.
from pathlib import Path
for path in Path('src').rglob('*.c'):
print(path.name)
If you don't want to use pathlib, use can use glob.glob('**/*.c'), but don't forget to pass in the recursive keyword parameter and it will use inordinate amount of time on large directories.
For cases where matching files beginning with a dot (.); like files in the current directory or hidden files on Unix based system, use the os.walk solution below.
os.walk
For older Python versions, use os.walk to recursively walk a directory and fnmatch.filter to match against a simple expression:
import fnmatch
import os
matches = []
for root, dirnames, filenames in os.walk('src'):
for filename in fnmatch.filter(filenames, '*.c'):
matches.append(os.path.join(root, filename))
For python >= 3.5 you can use **, recursive=True, i.e.:
import glob
for f in glob.glob('/path/**/*.c', recursive=True):
print(f)
If recursive is True (default is False), the pattern ** will match any files and zero
or more directories and subdirectories. If the pattern is followed by
an os.sep, only directories and subdirectories match.
Python 3 Demo
Similar to other solutions, but using fnmatch.fnmatch instead of glob, since os.walk already listed the filenames:
import os, fnmatch
def find_files(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.join(root, basename)
yield filename
for filename in find_files('src', '*.c'):
print 'Found C source:', filename
Also, using a generator alows you to process each file as it is found, instead of finding all the files and then processing them.
I've modified the glob module to support ** for recursive globbing, e.g:
>>> import glob2
>>> all_header_files = glob2.glob('src/**/*.c')
https://github.com/miracle2k/python-glob2/
Useful when you want to provide your users with the ability to use the ** syntax, and thus os.walk() alone is not good enough.
Starting with Python 3.4, one can use the glob() method of one of the Path classes in the new pathlib module, which supports ** wildcards. For example:
from pathlib import Path
for file_path in Path('src').glob('**/*.c'):
print(file_path) # do whatever you need with these files
Update:
Starting with Python 3.5, the same syntax is also supported by glob.glob().
import os
import fnmatch
def recursive_glob(treeroot, pattern):
results = []
for base, dirs, files in os.walk(treeroot):
goodfiles = fnmatch.filter(files, pattern)
results.extend(os.path.join(base, f) for f in goodfiles)
return results
fnmatch gives you exactly the same patterns as glob, so this is really an excellent replacement for glob.glob with very close semantics. An iterative version (e.g. a generator), IOW a replacement for glob.iglob, is a trivial adaptation (just yield the intermediate results as you go, instead of extending a single results list to return at the end).
You'll want to use os.walk to collect filenames that match your criteria. For example:
import os
cfiles = []
for root, dirs, files in os.walk('src'):
for file in files:
if file.endswith('.c'):
cfiles.append(os.path.join(root, file))
Here's a solution with nested list comprehensions, os.walk and simple suffix matching instead of glob:
import os
cfiles = [os.path.join(root, filename)
for root, dirnames, filenames in os.walk('src')
for filename in filenames if filename.endswith('.c')]
It can be compressed to a one-liner:
import os;cfiles=[os.path.join(r,f) for r,d,fs in os.walk('src') for f in fs if f.endswith('.c')]
or generalized as a function:
import os
def recursive_glob(rootdir='.', suffix=''):
return [os.path.join(looproot, filename)
for looproot, _, filenames in os.walk(rootdir)
for filename in filenames if filename.endswith(suffix)]
cfiles = recursive_glob('src', '.c')
If you do need full glob style patterns, you can follow Alex's and
Bruno's example and use fnmatch:
import fnmatch
import os
def recursive_glob(rootdir='.', pattern='*'):
return [os.path.join(looproot, filename)
for looproot, _, filenames in os.walk(rootdir)
for filename in filenames
if fnmatch.fnmatch(filename, pattern)]
cfiles = recursive_glob('src', '*.c')
Consider pathlib.rglob().
This is like calling Path.glob() with "**/" added in front of the given relative pattern:
import pathlib
for p in pathlib.Path("src").rglob("*.c"):
print(p)
See also #taleinat's related post here and a similar post elsewhere.
import os, glob
for each in glob.glob('path/**/*.c', recursive=True):
print(f'Name with path: {each} \nName without path: {os.path.basename(each)}')
glob.glob('*.c') :matches all files ending in .c in current directory
glob.glob('*/*.c') :same as 1
glob.glob('**/*.c') :matches all files ending in .c in the immediate subdirectories only, but not in the current directory
glob.glob('*.c',recursive=True) :same as 1
glob.glob('*/*.c',recursive=True) :same as 3
glob.glob('**/*.c',recursive=True) :matches all files ending in .c in the current directory and in all subdirectories
In case this may interest anyone, I've profiled the top three proposed methods.
I have about ~500K files in the globbed folder (in total), and 2K files that match the desired pattern.
here's the (very basic) code
import glob
import json
import fnmatch
import os
from pathlib import Path
from time import time
def find_files_iglob():
return glob.iglob("./data/**/data.json", recursive=True)
def find_files_oswalk():
for root, dirnames, filenames in os.walk('data'):
for filename in fnmatch.filter(filenames, 'data.json'):
yield os.path.join(root, filename)
def find_files_rglob():
return Path('data').rglob('data.json')
t0 = time()
for f in find_files_oswalk(): pass
t1 = time()
for f in find_files_rglob(): pass
t2 = time()
for f in find_files_iglob(): pass
t3 = time()
print(t1-t0, t2-t1, t3-t2)
And the results I got were:
os_walk: ~3.6sec
rglob ~14.5sec
iglob: ~16.9sec
The platform: Ubuntu 16.04, x86_64 (core i7),
Recently I had to recover my pictures with the extension .jpg. I ran photorec and recovered 4579 directories 2.2 million files within, having tremendous variety of extensions.With the script below I was able to select 50133 files havin .jpg extension within minutes:
#!/usr/binenv python2.7
import glob
import shutil
import os
src_dir = "/home/mustafa/Masaüstü/yedek"
dst_dir = "/home/mustafa/Genel/media"
for mediafile in glob.iglob(os.path.join(src_dir, "*", "*.jpg")): #"*" is for subdirectory
shutil.copy(mediafile, dst_dir)
based on other answers this is my current working implementation, which retrieves nested xml files in a root directory:
files = []
for root, dirnames, filenames in os.walk(myDir):
files.extend(glob.glob(root + "/*.xml"))
I'm really having fun with python :)
For python 3.5 and later
import glob
#file_names_array = glob.glob('path/*.c', recursive=True)
#above works for files directly at path/ as guided by NeStack
#updated version
file_names_array = glob.glob('path/**/*.c', recursive=True)
further you might need
for full_path_in_src in file_names_array:
print (full_path_in_src ) # be like 'abc/xyz.c'
#Full system path of this would be like => 'path till src/abc/xyz.c'
Johan and Bruno provide excellent solutions on the minimal requirement as stated. I have just released Formic which implements Ant FileSet and Globs which can handle this and more complicated scenarios. An implementation of your requirement is:
import formic
fileset = formic.FileSet(include="/src/**/*.c")
for file_name in fileset.qualified_files():
print file_name
Another way to do it using just the glob module. Just seed the rglob method with a starting base directory and a pattern to match and it will return a list of matching file names.
import glob
import os
def _getDirs(base):
return [x for x in glob.iglob(os.path.join( base, '*')) if os.path.isdir(x) ]
def rglob(base, pattern):
list = []
list.extend(glob.glob(os.path.join(base,pattern)))
dirs = _getDirs(base)
if len(dirs):
for d in dirs:
list.extend(rglob(os.path.join(base,d), pattern))
return list
Or with a list comprehension:
>>> base = r"c:\User\xtofl"
>>> binfiles = [ os.path.join(base,f)
for base, _, files in os.walk(root)
for f in files if f.endswith(".jpg") ]
If the files are on a remote file system or inside an archive, you can use an implementation of the fsspec AbstractFileSystem class. For example, to list all the files in a zipfile:
from fsspec.implementations.zip import ZipFileSystem
fs = ZipFileSystem("/tmp/test.zip")
fs.glob("/**") # equivalent: fs.find("/")
or to list all the files in a publicly available S3 bucket:
from s3fs import S3FileSystem
fs_s3 = S3FileSystem(anon=True)
fs_s3.glob("noaa-goes16/ABI-L1b-RadF/2020/045/**") # or use fs_s3.find
you can also use it for a local filesystem, which may be interesting if your implementation should be filesystem-agnostic:
from fsspec.implementations.local import LocalFileSystem
fs = LocalFileSystem()
fs.glob("/tmp/test/**")
Other implementations include Google Cloud, Github, SFTP/SSH, Dropbox, and Azure. For details, see the fsspec API documentation.
Just made this.. it will print files and directory in hierarchical way
But I didn't used fnmatch or walk
#!/usr/bin/python
import os,glob,sys
def dirlist(path, c = 1):
for i in glob.glob(os.path.join(path, "*")):
if os.path.isfile(i):
filepath, filename = os.path.split(i)
print '----' *c + filename
elif os.path.isdir(i):
dirname = os.path.basename(i)
print '----' *c + dirname
c+=1
dirlist(i,c)
c-=1
path = os.path.normpath(sys.argv[1])
print(os.path.basename(path))
dirlist(path)
That one uses fnmatch or regular expression:
import fnmatch, os
def filepaths(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
try:
matched = pattern.match(basename)
except AttributeError:
matched = fnmatch.fnmatch(basename, pattern)
if matched:
yield os.path.join(root, basename)
# usage
if __name__ == '__main__':
from pprint import pprint as pp
import re
path = r'/Users/hipertracker/app/myapp'
pp([x for x in filepaths(path, re.compile(r'.*\.py$'))])
pp([x for x in filepaths(path, '*.py')])
In addition to the suggested answers, you can do this with some lazy generation and list comprehension magic:
import os, glob, itertools
results = itertools.chain.from_iterable(glob.iglob(os.path.join(root,'*.c'))
for root, dirs, files in os.walk('src'))
for f in results: print(f)
Besides fitting in one line and avoiding unnecessary lists in memory, this also has the nice side effect, that you can use it in a way similar to the ** operator, e.g., you could use os.path.join(root, 'some/path/*.c') in order to get all .c files in all sub directories of src that have this structure.
This is a working code on Python 2.7. As part of my devops work, I was required to write a script which would move the config files marked with live-appName.properties to appName.properties. There could be other extension files as well like live-appName.xml.
Below is a working code for this, which finds the files in the given directories (nested level) and then renames (moves) it to the required filename
def flipProperties(searchDir):
print "Flipping properties to point to live DB"
for root, dirnames, filenames in os.walk(searchDir):
for filename in fnmatch.filter(filenames, 'live-*.*'):
targetFileName = os.path.join(root, filename.split("live-")[1])
print "File "+ os.path.join(root, filename) + "will be moved to " + targetFileName
shutil.move(os.path.join(root, filename), targetFileName)
This function is called from a main script
flipProperties(searchDir)
Hope this helps someone struggling with similar issues.
Simplified version of Johan Dahlin's answer, without fnmatch.
import os
matches = []
for root, dirnames, filenames in os.walk('src'):
matches += [os.path.join(root, f) for f in filenames if f[-2:] == '.c']
Here is my solution using list comprehension to search for multiple file extensions recursively in a directory and all subdirectories:
import os, glob
def _globrec(path, *exts):
""" Glob recursively a directory and all subdirectories for multiple file extensions
Note: Glob is case-insensitive, i. e. for '\*.jpg' you will get files ending
with .jpg and .JPG
Parameters
----------
path : str
A directory name
exts : tuple
File extensions to glob for
Returns
-------
files : list
list of files matching extensions in exts in path and subfolders
"""
dirs = [a[0] for a in os.walk(path)]
f_filter = [d+e for d in dirs for e in exts]
return [f for files in [glob.iglob(files) for files in f_filter] for f in files]
my_pictures = _globrec(r'C:\Temp', '\*.jpg','\*.bmp','\*.png','\*.gif')
for f in my_pictures:
print f
import sys, os, glob
dir_list = ["c:\\books\\heap"]
while len(dir_list) > 0:
cur_dir = dir_list[0]
del dir_list[0]
list_of_files = glob.glob(cur_dir+'\\*')
for book in list_of_files:
if os.path.isfile(book):
print(book)
else:
dir_list.append(book)
I modified the top answer in this posting.. and recently created this script which will loop through all files in a given directory (searchdir) and the sub-directories under it... and prints filename, rootdir, modified/creation date, and size.
Hope this helps someone... and they can walk the directory and get fileinfo.
import time
import fnmatch
import os
def fileinfo(file):
filename = os.path.basename(file)
rootdir = os.path.dirname(file)
lastmod = time.ctime(os.path.getmtime(file))
creation = time.ctime(os.path.getctime(file))
filesize = os.path.getsize(file)
print "%s**\t%s\t%s\t%s\t%s" % (rootdir, filename, lastmod, creation, filesize)
searchdir = r'D:\Your\Directory\Root'
matches = []
for root, dirnames, filenames in os.walk(searchdir):
## for filename in fnmatch.filter(filenames, '*.c'):
for filename in filenames:
## matches.append(os.path.join(root, filename))
##print matches
fileinfo(os.path.join(root, filename))
Here is a solution that will match the pattern against the full path and not just the base filename.
It uses fnmatch.translate to convert a glob-style pattern into a regular expression, which is then matched against the full path of each file found while walking the directory.
re.IGNORECASE is optional, but desirable on Windows since the file system itself is not case-sensitive. (I didn't bother compiling the regex because docs indicate it should be cached internally.)
import fnmatch
import os
import re
def findfiles(dir, pattern):
patternregex = fnmatch.translate(pattern)
for root, dirs, files in os.walk(dir):
for basename in files:
filename = os.path.join(root, basename)
if re.search(patternregex, filename, re.IGNORECASE):
yield filename
I needed a solution for python 2.x that works fast on large directories.
I endet up with this:
import subprocess
foundfiles= subprocess.check_output("ls src/*.c src/**/*.c", shell=True)
for foundfile in foundfiles.splitlines():
print foundfile
Note that you might need some exception handling in case ls doesn't find any matching file.

Categories