What is the best way to get a list of all files in a directory, sorted by date [created | modified], using python, on a windows machine?
I've done this in the past for a Python script to determine the last updated files in a directory:
import glob
import os
search_dir = "/mydir/"
# remove anything from the list that is not a file (directories, symlinks)
# thanks to J.F. Sebastion for pointing out that the requirement was a list
# of files (presumably not including directories)
files = list(filter(os.path.isfile, glob.glob(search_dir + "*")))
files.sort(key=lambda x: os.path.getmtime(x))
That should do what you're looking for based on file mtime.
EDIT: Note that you can also use os.listdir() in place of glob.glob() if desired - the reason I used glob in my original code was that I was wanting to use glob to only search for files with a particular set of file extensions, which glob() was better suited to. To use listdir here's what it would look like:
import os
search_dir = "/mydir/"
os.chdir(search_dir)
files = filter(os.path.isfile, os.listdir(search_dir))
files = [os.path.join(search_dir, f) for f in files] # add path to each file
files.sort(key=lambda x: os.path.getmtime(x))
Update: to sort dirpath's entries by modification date in Python 3:
import os
from pathlib import Path
paths = sorted(Path(dirpath).iterdir(), key=os.path.getmtime)
(put #Pygirl's answer here for greater visibility)
If you already have a list of filenames files, then to sort it inplace by creation time on Windows (make sure that list contains absolute path):
files.sort(key=os.path.getctime)
The list of files you could get, for example, using glob as shown in #Jay's answer.
old answer
Here's a more verbose version of #Greg Hewgill's answer. It is the most conforming to the question requirements. It makes a distinction between creation and modification dates (at least on Windows).
#!/usr/bin/env python
from stat import S_ISREG, ST_CTIME, ST_MODE
import os, sys, time
# path to the directory (relative or absolute)
dirpath = sys.argv[1] if len(sys.argv) == 2 else r'.'
# get all entries in the directory w/ stats
entries = (os.path.join(dirpath, fn) for fn in os.listdir(dirpath))
entries = ((os.stat(path), path) for path in entries)
# leave only regular files, insert creation date
entries = ((stat[ST_CTIME], path)
for stat, path in entries if S_ISREG(stat[ST_MODE]))
#NOTE: on Windows `ST_CTIME` is a creation date
# but on Unix it could be something else
#NOTE: use `ST_MTIME` to sort by a modification date
for cdate, path in sorted(entries):
print time.ctime(cdate), os.path.basename(path)
Example:
$ python stat_creation_date.py
Thu Feb 11 13:31:07 2009 stat_creation_date.py
There is an os.path.getmtime function that gives the number of seconds since the epoch
and should be faster than os.stat.
import os
os.chdir(directory)
sorted(filter(os.path.isfile, os.listdir('.')), key=os.path.getmtime)
Here's my version:
def getfiles(dirpath):
a = [s for s in os.listdir(dirpath)
if os.path.isfile(os.path.join(dirpath, s))]
a.sort(key=lambda s: os.path.getmtime(os.path.join(dirpath, s)))
return a
First, we build a list of the file names. isfile() is used to skip directories; it can be omitted if directories should be included. Then, we sort the list in-place, using the modify date as the key.
Here's a one-liner:
import os
import time
from pprint import pprint
pprint([(x[0], time.ctime(x[1].st_ctime)) for x in sorted([(fn, os.stat(fn)) for fn in os.listdir(".")], key = lambda x: x[1].st_ctime)])
This calls os.listdir() to get a list of the filenames, then calls os.stat() for each one to get the creation time, then sorts against the creation time.
Note that this method only calls os.stat() once for each file, which will be more efficient than calling it for each comparison in a sort.
In python 3.5+
from pathlib import Path
sorted(Path('.').iterdir(), key=lambda f: f.stat().st_mtime)
Without changing directory:
import os
path = '/path/to/files/'
name_list = os.listdir(path)
full_list = [os.path.join(path,i) for i in name_list]
time_sorted_list = sorted(full_list, key=os.path.getmtime)
print time_sorted_list
# if you want just the filenames sorted, simply remove the dir from each
sorted_filename_list = [ os.path.basename(i) for i in time_sorted_list]
print sorted_filename_list
from pathlib import Path
import os
sorted(Path('./').iterdir(), key=lambda t: t.stat().st_mtime)
or
sorted(Path('./').iterdir(), key=os.path.getmtime)
or
sorted(os.scandir('./'), key=lambda t: t.stat().st_mtime)
where m time is modified time.
Here's my answer using glob without filter if you want to read files with a certain extension in date order (Python 3).
dataset_path='/mydir/'
files = glob.glob(dataset_path+"/morepath/*.extension")
files.sort(key=os.path.getmtime)
# *** the shortest and best way ***
# getmtime --> sort by modified time
# getctime --> sort by created time
import glob,os
lst_files = glob.glob("*.txt")
lst_files.sort(key=os.path.getmtime)
print("\n".join(lst_files))
sorted(filter(os.path.isfile, os.listdir('.')),
key=lambda p: os.stat(p).st_mtime)
You could use os.walk('.').next()[-1] instead of filtering with os.path.isfile, but that leaves dead symlinks in the list, and os.stat will fail on them.
For completeness with os.scandir (2x faster over pathlib):
import os
sorted(os.scandir('/tmp/test'), key=lambda d: d.stat().st_mtime)
this is a basic step for learn:
import os, stat, sys
import time
dirpath = sys.argv[1] if len(sys.argv) == 2 else r'.'
listdir = os.listdir(dirpath)
for i in listdir:
os.chdir(dirpath)
data_001 = os.path.realpath(i)
listdir_stat1 = os.stat(data_001)
listdir_stat2 = ((os.stat(data_001), data_001))
print time.ctime(listdir_stat1.st_ctime), data_001
Alex Coventry's answer will produce an exception if the file is a symlink to an unexistent file, the following code corrects that answer:
import time
import datetime
sorted(filter(os.path.isfile, os.listdir('.')),
key=lambda p: os.path.exists(p) and os.stat(p).st_mtime or time.mktime(datetime.now().timetuple())
When the file doesn't exist, now() is used, and the symlink will go at the very end of the list.
This was my version:
import os
folder_path = r'D:\Movies\extra\new\dramas' # your path
os.chdir(folder_path) # make the path active
x = sorted(os.listdir(), key=os.path.getctime) # sorted using creation time
folder = 0
for folder in range(len(x)):
print(x[folder]) # print all the foldername inside the folder_path
folder = +1
Here is a simple couple lines that looks for extention as well as provides a sort option
def get_sorted_files(src_dir, regex_ext='*', sort_reverse=False):
files_to_evaluate = [os.path.join(src_dir, f) for f in os.listdir(src_dir) if re.search(r'.*\.({})$'.format(regex_ext), f)]
files_to_evaluate.sort(key=os.path.getmtime, reverse=sort_reverse)
return files_to_evaluate
Add the file directory/folder in path, if you want to have specific file type add the file extension, and then get file name in chronological order.
This works for me.
import glob, os
from pathlib import Path
path = os.path.expanduser(file_location+"/"+date_file)
os.chdir(path)
saved_file=glob.glob('*.xlsx')
saved_file.sort(key=os.path.getmtime)
print(saved_file)
Turns out os.listdir sorts by last modified but in reverse so you can do:
import os
last_modified=os.listdir()[::-1]
Maybe you should use shell commands. In Unix/Linux, find piped with sort will probably be able to do what you want.
Related
I need to get the latest file of a folder using python. While using the code:
max(files, key = os.path.getctime)
I am getting the below error:
FileNotFoundError: [WinError 2] The system cannot find the file specified: 'a'
Whatever is assigned to the files variable is incorrect. Use the following code.
import glob
import os
list_of_files = glob.glob('/path/to/folder/*') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
print(latest_file)
max(files, key = os.path.getctime)
is quite incomplete code. What is files? It probably is a list of file names, coming out of os.listdir().
But this list lists only the filename parts (a. k. a. "basenames"), because their path is common. In order to use it correctly, you have to combine it with the path leading to it (and used to obtain it).
Such as (untested):
def newest(path):
files = os.listdir(path)
paths = [os.path.join(path, basename) for basename in files]
return max(paths, key=os.path.getctime)
I lack the reputation to comment but ctime from Marlon Abeykoons response did not give the correct result for me. Using mtime does the trick though. (key=os.path.getmtime))
import glob
import os
list_of_files = glob.glob('/path/to/folder/*') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getmtime)
print(latest_file)
I found two answers for that problem:
python os.path.getctime max does not return latest
Difference between python - getmtime() and getctime() in unix system
I would suggest using glob.iglob() instead of the glob.glob(), as it is more efficient.
glob.iglob() Return an iterator which yields the same values as glob() without actually storing them all simultaneously.
Which means glob.iglob() will be more efficient.
I mostly use below code to find the latest file matching to my pattern:
LatestFile = max(glob.iglob(fileNamePattern),key=os.path.getctime)
NOTE:
There are variants of max function, In case of finding the latest file we will be using below variant:
max(iterable, *[, key, default])
which needs iterable so your first parameter should be iterable.
In case of finding max of nums we can use beow variant : max (num1, num2, num3, *args[, key])
I've been using this in Python 3, including pattern matching on the filename.
from pathlib import Path
def latest_file(path: Path, pattern: str = "*"):
files = path.glob(pattern)
return max(files, key=lambda x: x.stat().st_ctime)
Try to sort items by creation time. Example below sorts files in a folder and gets first element which is latest.
import glob
import os
files_path = os.path.join(folder, '*')
files = sorted(
glob.iglob(files_path), key=os.path.getctime, reverse=True)
print files[0]
Most of the answers are correct but if there is a requirement like getting the latest two or three latest then it could fail or need to modify the code.
I found the below sample is more useful and relevant as we can use the same code to get the latest 2,3 and n files too.
import glob
import os
folder_path = "/Users/sachin/Desktop/Files/"
files_path = os.path.join(folder_path, '*')
files = sorted(glob.iglob(files_path), key=os.path.getctime, reverse=True)
print (files[0]) #latest file
print (files[0],files[1]) #latest two files
A much faster method on windows (0.05s), call a bat script that does this:
get_latest.bat
#echo off
for /f %%i in ('dir \\directory\in\question /b/a-d/od/t:c') do set LAST=%%i
%LAST%
where \\directory\in\question is the directory you want to investigate.
get_latest.py
from subprocess import Popen, PIPE
p = Popen("get_latest.bat", shell=True, stdout=PIPE,)
stdout, stderr = p.communicate()
print(stdout, stderr)
if it finds a file stdout is the path and stderr is None.
Use stdout.decode("utf-8").rstrip() to get the usable string representation of the file name.
(Edited to improve answer)
First define a function get_latest_file
def get_latest_file(path, *paths):
fullpath = os.path.join(path, paths)
...
get_latest_file('example', 'files','randomtext011.*.txt')
You may also use a docstring !
def get_latest_file(path, *paths):
"""Returns the name of the latest (most recent) file
of the joined path(s)"""
fullpath = os.path.join(path, *paths)
If you use Python 3, you can use iglob instead.
Complete code to return the name of latest file:
def get_latest_file(path, *paths):
"""Returns the name of the latest (most recent) file
of the joined path(s)"""
fullpath = os.path.join(path, *paths)
files = glob.glob(fullpath) # You may use iglob in Python3
if not files: # I prefer using the negation
return None # because it behaves like a shortcut
latest_file = max(files, key=os.path.getctime)
_, filename = os.path.split(latest_file)
return filename
I have tried to use the above suggestions and my program crashed, than I figured out the file I'm trying to identify was used and when trying to use 'os.path.getctime' it crashed.
what finally worked for me was:
files_before = glob.glob(os.path.join(my_path,'*'))
**code where new file is created**
new_file = set(files_before).symmetric_difference(set(glob.glob(os.path.join(my_path,'*'))))
this codes gets the uncommon object between the two sets of file lists
its not the most elegant, and if multiple files are created at the same time it would probably won't be stable
My python script executes an os.listdir(path) where the path is a queue containing archives that I need to treat one by one.
The problem is that I'm getting the list in an array and then I just do a simple array.pop(0). It was working fine until I put the project in subversion. Now I get the .svn folder in my array and of course it makes my application crash.
So here is my question: is there a function that ignores hidden files when executing an os.listdir() and if not what would be the best way?
You can write one yourself:
import os
def listdir_nohidden(path):
for f in os.listdir(path):
if not f.startswith('.'):
yield f
Or you can use a glob:
import glob
import os
def listdir_nohidden(path):
return glob.glob(os.path.join(path, '*'))
Either of these will ignore all filenames beginning with '.'.
This is an old question, but seems like it is missing the obvious answer of using list comprehension, so I'm adding it here for completeness:
[f for f in os.listdir(path) if not f.startswith('.')]
As a side note, the docs state listdir will return results in 'arbitrary order' but a common use case is to have them sorted alphabetically. If you want the directory contents alphabetically sorted without regards to capitalization, you can use:
sorted((f for f in os.listdir() if not f.startswith(".")), key=str.lower)
(Edited to use key=str.lower instead of a lambda)
On Windows, Linux and OS X:
if os.name == 'nt':
import win32api, win32con
def folder_is_hidden(p):
if os.name== 'nt':
attribute = win32api.GetFileAttributes(p)
return attribute & (win32con.FILE_ATTRIBUTE_HIDDEN | win32con.FILE_ATTRIBUTE_SYSTEM)
else:
return p.startswith('.') #linux-osx
Joshmaker has the right solution to your question.
How to ignore hidden files using os.listdir()?
In Python 3 however, it is recommended to use pathlib instead of os.
from pathlib import Path
visible_files = [
file for file in Path(".").iterdir() if not file.name.startswith(".")
]
glob:
>>> import glob
>>> glob.glob('*')
(glob claims to use listdir and fnmatch under the hood, but it also checks for a leading '.', not by using fnmatch.)
I think it is too much of work to go through all of the items in a loop. I would prefer something simpler like this:
lst = os.listdir(path)
if '.DS_Store' in lst:
lst.remove('.DS_Store')
If the directory contains more than one hidden files, then this can help:
all_files = os.popen('ls -1').read()
lst = all_files.split('\n')
for platform independence as #Josh mentioned the glob works well:
import glob
glob.glob('*')
filenames = (f.name for f in os.scandir() if not f.name.startswith('.'))
You can just use a simple for loop that will exclude any file or directory that has "." in the front.
Code for professionals:
import os
directory_things = [i for i in os.listdir() if i[0] != "."] # Exclude all with . in the start
Code for noobs
items_in_directory = os.listdir()
final_items_in_directory = []
for i in items_in_directory:
if i[0] != ".": # If the item doesn't have any '.' in the start
final_items_in_directory.append(i)
Consider the following piece of code:
files = sorted(os.listdir('dumps'), key=os.path.getctime)
The objective is to sort the listed files based on the creation time. However since the the os.listdir gives only the filename and not the absolute path the key ie, the os.path.getctime throws an exception saying
OSError: [Errno 2] No such file or directory: 'very_important_file.txt'
Is there a workaround to this situation or do I need to write my own sort function?
You can use glob.
import os
from glob import glob
glob_pattern = os.path.join('dumps', '*')
files = sorted(glob(glob_pattern), key=os.path.getctime)
files = sorted(os.listdir('dumps'), key=lambda fn:os.path.getctime(os.path.join('dumps', fn)))
files = sorted([os.path.join('dumps', file) for file in os.listdir('dumps')], key=os.path.getctime)
Getting a list of absolute paths for all files in a directory using pathlib in python3.9 on Windows
from pathlib import Path
# directory name is 'dumps'
[str(child.resolve()) for child in Path.iterdir(Path('dumps'))]
Path.iterdir() takes in a pathlib object, and so we do Path(dir) to get that object. It then spits out each file as the child, but as a relative path. child.resolve() gives the absolute path, but again as a pathlib object, so we do str() on it to return a list of strings.
You can also use os.path.join with os.path.abspath, combined with map and lambda in Python.
>>>list(map(lambda x: os.path.join(os.path.abspath('mydir'), x),os.listdir('mydir')))
This will join the absolute path of mydir with os.listdir('mydir').
The output:
['/home/ahmad/Desktop/RedBuffer/Testdata/testing.avi',
'/home/ahmad/Desktop/RedBuffer/Testdata/testing2.avi',
'/home/ahmad/Desktop/RedBuffer/Testdata/text_changing.avi',
'/home/ahmad/Desktop/RedBuffer/Testdata/text_static.avi',
'/home/ahmad/Desktop/RedBuffer/Testdata/test_img.png']
Here is another solution resulting in an np array instead of list, if it works better for someone. Still uses os
import numpy as np
import os
NPFileListFullURL=np.char.add(Folder_Path, os.listdir(Folder_Path))
I have a little task for my company
I have multiple files which start with swale-randomnumber
I want to copy then to some directory (does shutil.copy allow wildmasks?)
anyway I then want to choose the largest file and rename it to sync.dat and then run a program.
I get the logic, I will use a loop to do each individual piece of work then move on to the next, but I am unsure how to choose a single largest file or a single file at all for that matter as when I type in swale* surely it will just choose them all?
Sorry I havnt written any source code yet, I am still trying to get my head around how this will work.
Thanks for any help you may provide
The accepted answer of this question proposes a nice portable implementation of file copy with wildcard support:
from glob import iglob
from shutil import copy
from os.path import join
def copy_files(src_glob, dst_folder):
for fname in iglob(src_glob):
copy(fname, join(dst_folder, fname))
If you want to compare file sizes, you can use either of these functions:
import os
os.path.getsize(path)
os.stat(path).st_size
This might work :
import os.path
import glob
import shutil
source = "My Source Path" # Replace these variables with the appropriate data
dest = "My Dest Path"
command = "My command"
# Find the files that need to be copied
files = glob.glob(os.path.join(source, "swale-*"))
# Copy the files to the destination
for file in files:
shutil.copy(os.path.join(source, "swale-*"), dest)
# Create a sorted list of files - using the file sizes
# biggest first, and then use the 1st item
biggest = sorted([file for file in files],
cmp=lambda x,y : cmp(x,y),
key=lambda x: os.path.size( os.path.join( dest, x)), reverse = True)[0]
# Rename that biggest file to swale.dat
shutil.move( os.path.join(dest,biggest), os.path.join(dest,"swale.date") )
# Run the command
os.system( command )
# Only use os.system if you know your command is completely secure and you don't need the output. Use the popen module if you need more security and need the output.
Note : None of this is tested - but it should work
from os import *
from os.path import *
directory = '/your/directory/'
# You now have list of files in directory that starts with "swale-"
fileList = [join(directory,f) for f in listdir(directory) if f.startswith("swale-") and isfile(join(directory,f))]
# Order it by file size - from big to small
fileList.sort(key=getsize, reverse=True)
# First file in array is biggest
biggestFile = fileList[0]
# Do whatever you want with this files - using shutil.*, os.*, or anything else..
# ...
# ...
My python script executes an os.listdir(path) where the path is a queue containing archives that I need to treat one by one.
The problem is that I'm getting the list in an array and then I just do a simple array.pop(0). It was working fine until I put the project in subversion. Now I get the .svn folder in my array and of course it makes my application crash.
So here is my question: is there a function that ignores hidden files when executing an os.listdir() and if not what would be the best way?
You can write one yourself:
import os
def listdir_nohidden(path):
for f in os.listdir(path):
if not f.startswith('.'):
yield f
Or you can use a glob:
import glob
import os
def listdir_nohidden(path):
return glob.glob(os.path.join(path, '*'))
Either of these will ignore all filenames beginning with '.'.
This is an old question, but seems like it is missing the obvious answer of using list comprehension, so I'm adding it here for completeness:
[f for f in os.listdir(path) if not f.startswith('.')]
As a side note, the docs state listdir will return results in 'arbitrary order' but a common use case is to have them sorted alphabetically. If you want the directory contents alphabetically sorted without regards to capitalization, you can use:
sorted((f for f in os.listdir() if not f.startswith(".")), key=str.lower)
(Edited to use key=str.lower instead of a lambda)
On Windows, Linux and OS X:
if os.name == 'nt':
import win32api, win32con
def folder_is_hidden(p):
if os.name== 'nt':
attribute = win32api.GetFileAttributes(p)
return attribute & (win32con.FILE_ATTRIBUTE_HIDDEN | win32con.FILE_ATTRIBUTE_SYSTEM)
else:
return p.startswith('.') #linux-osx
Joshmaker has the right solution to your question.
How to ignore hidden files using os.listdir()?
In Python 3 however, it is recommended to use pathlib instead of os.
from pathlib import Path
visible_files = [
file for file in Path(".").iterdir() if not file.name.startswith(".")
]
glob:
>>> import glob
>>> glob.glob('*')
(glob claims to use listdir and fnmatch under the hood, but it also checks for a leading '.', not by using fnmatch.)
I think it is too much of work to go through all of the items in a loop. I would prefer something simpler like this:
lst = os.listdir(path)
if '.DS_Store' in lst:
lst.remove('.DS_Store')
If the directory contains more than one hidden files, then this can help:
all_files = os.popen('ls -1').read()
lst = all_files.split('\n')
for platform independence as #Josh mentioned the glob works well:
import glob
glob.glob('*')
filenames = (f.name for f in os.scandir() if not f.name.startswith('.'))
You can just use a simple for loop that will exclude any file or directory that has "." in the front.
Code for professionals:
import os
directory_things = [i for i in os.listdir() if i[0] != "."] # Exclude all with . in the start
Code for noobs
items_in_directory = os.listdir()
final_items_in_directory = []
for i in items_in_directory:
if i[0] != ".": # If the item doesn't have any '.' in the start
final_items_in_directory.append(i)