How to find oldest and newest file in a directory? - python

My code should find the newest and oldest files in a folder and its subfolders. It works for the top-level folder but it doesn't include files within subfolders.
import os
import glob
mypath = 'C:/RDS/*'
print(min(glob.glob(mypath), key=os.path.getmtime))
print(max(glob.glob(mypath), key=os.path.getmtime))
How do I make it recurse into the subfolders?

Try using pathlib, also getmtime gives the last modified time, you want the time file was created so use getctime
if you strictly want only files:
import os
import pathlib
mypath = 'your path'
taggedrootdir = pathlib.Path(mypath)
print(min([f for f in taggedrootdir.resolve().glob('**/*') if f.is_file()], key=os.path.getctime))
print(max([f for f in taggedrootdir.resolve().glob('**/*') if f.is_file()], key=os.path.getctime))
if results may include folders:
import os
import pathlib
mypath = 'your path'
taggedrootdir = pathlib.Path(mypath)
print(min(taggedrootdir.resolve().glob('**/*'), key=os.path.getctime))
print(max(taggedrootdir.resolve().glob('**/*'), key=os.path.getctime))

As the docs show, you can add a recursive=True keyword argument to glob.glob()
so your code becomes:
import os
import glob
mypath = 'C:/RDS/*'
print(min(glob.glob(mypath, recursive=True), key=os.path.getmtime))
print(max(glob.glob(mypath, recursive=True), key=os.path.getmtime))
This should give you the oldest and newest file in your folder and all its subfolders.

Pay attention to the os filepath separator: "/" (on unix) vs. "\" (on windows).
You can try something like below.
It saves the files list in a variable, it is faster than traversing twice the file system.
There is one line for debugging, comment it in production.
import os
import glob
mypath = 'D:\RDS\**'
allFilesAndFolders = glob.glob(mypath, recursive=True)
# just for debugging
print(allFilesAndFolders)
print(min(allFilesAndFolders, key=os.path.getmtime))
print(max(allFilesAndFolders, key=os.path.getmtime))

Here's a fairly efficient way of doing it. It determines the oldest and newest files by iterating through them all once. Since it uses iteration, there's no need to first create a list of them and go through it twice to determine the two extremes.
mport os
import pathlib
def max_min(iterable, keyfunc=None):
if keyfunc is None:
keyfunc = lambda x: x # Identity.
iterator = iter(iterable)
most = least = next(iterator)
mostkey = leastkey = keyfunc(most)
for item in iterator:
key = keyfunc(item)
if key > mostkey:
most = item
mostkey = key
elif key < leastkey:
least = item
leastkey = key
return most, least
mypath = '.'
files = (f for f in pathlib.Path(mypath).resolve().glob('**/*') if f.is_file())
oldest, newest = max_min(files, keyfunc=os.path.getmtime)
print(f'oldest file: {oldest}')
print(f'newest file: {newest}')

Related

How do i iterate through a directory, and print out the file names and their sizes and make the printout clean looking?

The code i am currently working on, enters a directory. Once in that directory I need to iterate through the files in that directory and print the file names and extensions, along with the file size.
os.chdir(Path('pets', 'cats'))
current = Path.cwd()
for file in os.listdir(current):
fileName = os.path.split(file)
fileSize = os.path.getsize(file)
print(str(fileName) + ': ' + str(fileSize))
The issue I am having is that the printout includes ('' '<filename.ext>'). I want to omit all these extra characters and just have <filename.ext>. Any clues on how I can clean this up?
It looks like you're using the pathlib module, so you can write:
import os
from pathlib import Path
path = Path('dogs', 'cats')
for item in path.iterdir():
if not item.is_file():
continue
fstat = os.stat(item)
print(f'{item.name}: {fstat.st_size}')
This would yield output like:
foo.txt: 32
bar.txt: 64
You could use this piece of code for the task:
import glob
import os
folPath = r'dir_address'
for fPath in glob.glob("{0}\**\*".format(folPath), recursive=True):
# get file size
fSize = os.stat(fPath).st_size/1024
print('size of {0} is {1}'.format(fPath, fSize))
I hope this helps you.

How to loop and optimise data extraction - Python [duplicate]

I need to iterate through all .asm files inside a given directory and do some actions on them.
How can this be done in a efficient way?
Python 3.6 version of the above answer, using os - assuming that you have the directory path as a str object in a variable called directory_in_str:
import os
directory = os.fsencode(directory_in_str)
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".asm") or filename.endswith(".py"):
# print(os.path.join(directory, filename))
continue
else:
continue
Or recursively, using pathlib:
from pathlib import Path
pathlist = Path(directory_in_str).glob('**/*.asm')
for path in pathlist:
# because path is object not string
path_in_str = str(path)
# print(path_in_str)
Use rglob to replace glob('**/*.asm') with rglob('*.asm')
This is like calling Path.glob() with '**/' added in front of the given relative pattern:
from pathlib import Path
pathlist = Path(directory_in_str).rglob('*.asm')
for path in pathlist:
# because path is object not string
path_in_str = str(path)
# print(path_in_str)
Original answer:
import os
for filename in os.listdir("/path/to/dir/"):
if filename.endswith(".asm") or filename.endswith(".py"):
# print(os.path.join(directory, filename))
continue
else:
continue
This will iterate over all descendant files, not just the immediate children of the directory:
import os
for subdir, dirs, files in os.walk(rootdir):
for file in files:
#print os.path.join(subdir, file)
filepath = subdir + os.sep + file
if filepath.endswith(".asm"):
print (filepath)
You can try using glob module:
import glob
for filepath in glob.iglob('my_dir/*.asm'):
print(filepath)
and since Python 3.5 you can search subdirectories as well:
glob.glob('**/*.txt', recursive=True) # => ['2.txt', 'sub/3.txt']
From the docs:
The glob module finds all the pathnames matching a specified pattern according to the rules used by the Unix shell, although results are returned in arbitrary order. No tilde expansion is done, but *, ?, and character ranges expressed with [] will be correctly matched.
Since Python 3.5, things are much easier with os.scandir() and 2-20x faster (source):
with os.scandir(path) as it:
for entry in it:
if entry.name.endswith(".asm") and entry.is_file():
print(entry.name, entry.path)
Using scandir() instead of listdir() can significantly increase the
performance of code that also needs file type or file attribute
information, because os.DirEntry objects expose this information if
the operating system provides it when scanning a directory. All
os.DirEntry methods may perform a system call, but is_dir() and
is_file() usually only require a system call for symbolic links;
os.DirEntry.stat() always requires a system call on Unix but only
requires one for symbolic links on Windows.
Python 3.4 and later offer pathlib in the standard library. You could do:
from pathlib import Path
asm_pths = [pth for pth in Path.cwd().iterdir()
if pth.suffix == '.asm']
Or if you don't like list comprehensions:
asm_paths = []
for pth in Path.cwd().iterdir():
if pth.suffix == '.asm':
asm_pths.append(pth)
Path objects can easily be converted to strings.
Here's how I iterate through files in Python:
import os
path = 'the/name/of/your/path'
folder = os.fsencode(path)
filenames = []
for file in os.listdir(folder):
filename = os.fsdecode(file)
if filename.endswith( ('.jpeg', '.png', '.gif') ): # whatever file types you're using...
filenames.append(filename)
filenames.sort() # now you have the filenames and can do something with them
NONE OF THESE TECHNIQUES GUARANTEE ANY ITERATION ORDERING
Yup, super unpredictable. Notice that I sort the filenames, which is important if the order of the files matters, i.e. for video frames or time dependent data collection. Be sure to put indices in your filenames though!
You can use glob for referring the directory and the list :
import glob
import os
#to get the current working directory name
cwd = os.getcwd()
#Load the images from images folder.
for f in glob.glob('images\*.jpg'):
dir_name = get_dir_name(f)
image_file_name = dir_name + '.jpg'
#To print the file name with path (path will be in string)
print (image_file_name)
To get the list of all directory in array you can use os :
os.listdir(directory)
I'm not quite happy with this implementation yet, I wanted to have a custom constructor that does DirectoryIndex._make(next(os.walk(input_path))) such that you can just pass the path you want a file listing for. Edits welcome!
import collections
import os
DirectoryIndex = collections.namedtuple('DirectoryIndex', ['root', 'dirs', 'files'])
for file_name in DirectoryIndex(*next(os.walk('.'))).files:
file_path = os.path.join(path, file_name)
I really like using the scandir directive that is built into the os library. Here is a working example:
import os
i = 0
with os.scandir('/usr/local/bin') as root_dir:
for path in root_dir:
if path.is_file():
i += 1
print(f"Full path is: {path} and just the name is: {path.name}")
print(f"{i} files scanned successfully.")
Get all the .asm files in a directory by doing this.
import os
path = "path_to_file"
file_type = '.asm'
for filename in os.listdir(path=path):
if filename.endswith(file_type):
print(filename)
print(f"{path}/{filename}")
# do something below
I don't understand why some answers are complicated. This is how I would do it with Python 2.7. Replace DIRECTORY_TO_LOOP with the directory you want to use.
import os
DIRECTORY_TO_LOOP = '/var/www/files/'
for root, dirs, files in os.walk(DIRECTORY_TO_LOOP, topdown=False):
for name in files:
print(os.path.join(root, name))

How to delete a file by extension in Python?

I was messing around just trying to make a script that deletes items by ".zip" extension.
import sys
import os
from os import listdir
test=os.listdir("/Users/ben/downloads/")
for item in test:
if item.endswith(".zip"):
os.remove(item)
Whenever I run the script I get:
OSError: [Errno 2] No such file or directory: 'cities1000.zip'
cities1000.zip is obviously a file in my downloads folder.
What did I do wrong here? Is the issue that os.remove requires the full path to the file? If this is the issue, than how can I do that in this current script without completely rewriting it.
You can set the path in to a dir_name variable, then use os.path.join for your os.remove.
import os
dir_name = "/Users/ben/downloads/"
test = os.listdir(dir_name)
for item in test:
if item.endswith(".zip"):
os.remove(os.path.join(dir_name, item))
For this operation you need to append the file name on to the file path so the command knows what folder you are looking into.
You can do this correctly and in a portable way in python using the os.path.join command.
For example:
import os
directory = "/Users/ben/downloads/"
test = os.listdir( directory )
for item in test:
if item.endswith(".zip"):
os.remove( os.path.join( directory, item ) )
Alternate approach that avoids join-ing yourself over and over: Use glob module to join once, then let it give you back the paths directly.
import glob
import os
dir = "/Users/ben/downloads/"
for zippath in glob.iglob(os.path.join(dir, '*.zip')):
os.remove(zippath)
I think you could use Pathlib-- a modern way, like the following:
import pathlib
dir = pathlib.Path("/Users/ben/downloads/")
zip_files = dir.glob(dir / "*.zip")
for zf in zip_files:
zf.unlink()
If you want to delete all zip files recursively, just write so:
import pathlib
dir = pathlib.Path("/Users/ben/downloads/")
zip_files = dir.rglob(dir / "*.zip") # recursively
for zf in zip_files:
zf.unlink()
Just leaving my two cents on this issue: if you want to be chic you can use glob or iglob from the glob package, like so:
import glob
import os
files_in_dir = glob.glob('/Users/ben/downloads/*.zip')
# or if you want to be fancy, you can use iglob, which returns an iterator:
files_in_dir = glob.iglob('/Users/ben/downloads/*.zip')
for _file in files_in_dir:
print(_file) # just to be sure, you know how it is...
os.remove(_file)
origfolder = "/Users/ben/downloads/"
test = os.listdir(origfolder)
for item in test:
if item.endswith(".zip"):
os.remove(os.path.join(origfolder, item))
The dirname is not included in the os.listdir output. You have to attach it to reference the file from the list returned by said function.
Prepend the directory to the filename
os.remove("/Users/ben/downloads/" + item)
EDIT: or change the current working directory using os.chdir.

How can I iterate over files in a given directory?

I need to iterate through all .asm files inside a given directory and do some actions on them.
How can this be done in a efficient way?
Python 3.6 version of the above answer, using os - assuming that you have the directory path as a str object in a variable called directory_in_str:
import os
directory = os.fsencode(directory_in_str)
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".asm") or filename.endswith(".py"):
# print(os.path.join(directory, filename))
continue
else:
continue
Or recursively, using pathlib:
from pathlib import Path
pathlist = Path(directory_in_str).glob('**/*.asm')
for path in pathlist:
# because path is object not string
path_in_str = str(path)
# print(path_in_str)
Use rglob to replace glob('**/*.asm') with rglob('*.asm')
This is like calling Path.glob() with '**/' added in front of the given relative pattern:
from pathlib import Path
pathlist = Path(directory_in_str).rglob('*.asm')
for path in pathlist:
# because path is object not string
path_in_str = str(path)
# print(path_in_str)
Original answer:
import os
for filename in os.listdir("/path/to/dir/"):
if filename.endswith(".asm") or filename.endswith(".py"):
# print(os.path.join(directory, filename))
continue
else:
continue
This will iterate over all descendant files, not just the immediate children of the directory:
import os
for subdir, dirs, files in os.walk(rootdir):
for file in files:
#print os.path.join(subdir, file)
filepath = subdir + os.sep + file
if filepath.endswith(".asm"):
print (filepath)
You can try using glob module:
import glob
for filepath in glob.iglob('my_dir/*.asm'):
print(filepath)
and since Python 3.5 you can search subdirectories as well:
glob.glob('**/*.txt', recursive=True) # => ['2.txt', 'sub/3.txt']
From the docs:
The glob module finds all the pathnames matching a specified pattern according to the rules used by the Unix shell, although results are returned in arbitrary order. No tilde expansion is done, but *, ?, and character ranges expressed with [] will be correctly matched.
Since Python 3.5, things are much easier with os.scandir() and 2-20x faster (source):
with os.scandir(path) as it:
for entry in it:
if entry.name.endswith(".asm") and entry.is_file():
print(entry.name, entry.path)
Using scandir() instead of listdir() can significantly increase the
performance of code that also needs file type or file attribute
information, because os.DirEntry objects expose this information if
the operating system provides it when scanning a directory. All
os.DirEntry methods may perform a system call, but is_dir() and
is_file() usually only require a system call for symbolic links;
os.DirEntry.stat() always requires a system call on Unix but only
requires one for symbolic links on Windows.
Python 3.4 and later offer pathlib in the standard library. You could do:
from pathlib import Path
asm_pths = [pth for pth in Path.cwd().iterdir()
if pth.suffix == '.asm']
Or if you don't like list comprehensions:
asm_paths = []
for pth in Path.cwd().iterdir():
if pth.suffix == '.asm':
asm_pths.append(pth)
Path objects can easily be converted to strings.
Here's how I iterate through files in Python:
import os
path = 'the/name/of/your/path'
folder = os.fsencode(path)
filenames = []
for file in os.listdir(folder):
filename = os.fsdecode(file)
if filename.endswith( ('.jpeg', '.png', '.gif') ): # whatever file types you're using...
filenames.append(filename)
filenames.sort() # now you have the filenames and can do something with them
NONE OF THESE TECHNIQUES GUARANTEE ANY ITERATION ORDERING
Yup, super unpredictable. Notice that I sort the filenames, which is important if the order of the files matters, i.e. for video frames or time dependent data collection. Be sure to put indices in your filenames though!
You can use glob for referring the directory and the list :
import glob
import os
#to get the current working directory name
cwd = os.getcwd()
#Load the images from images folder.
for f in glob.glob('images\*.jpg'):
dir_name = get_dir_name(f)
image_file_name = dir_name + '.jpg'
#To print the file name with path (path will be in string)
print (image_file_name)
To get the list of all directory in array you can use os :
os.listdir(directory)
I'm not quite happy with this implementation yet, I wanted to have a custom constructor that does DirectoryIndex._make(next(os.walk(input_path))) such that you can just pass the path you want a file listing for. Edits welcome!
import collections
import os
DirectoryIndex = collections.namedtuple('DirectoryIndex', ['root', 'dirs', 'files'])
for file_name in DirectoryIndex(*next(os.walk('.'))).files:
file_path = os.path.join(path, file_name)
I really like using the scandir directive that is built into the os library. Here is a working example:
import os
i = 0
with os.scandir('/usr/local/bin') as root_dir:
for path in root_dir:
if path.is_file():
i += 1
print(f"Full path is: {path} and just the name is: {path.name}")
print(f"{i} files scanned successfully.")
Get all the .asm files in a directory by doing this.
import os
path = "path_to_file"
file_type = '.asm'
for filename in os.listdir(path=path):
if filename.endswith(file_type):
print(filename)
print(f"{path}/{filename}")
# do something below
I don't understand why some answers are complicated. This is how I would do it with Python 2.7. Replace DIRECTORY_TO_LOOP with the directory you want to use.
import os
DIRECTORY_TO_LOOP = '/var/www/files/'
for root, dirs, files in os.walk(DIRECTORY_TO_LOOP, topdown=False):
for name in files:
print(os.path.join(root, name))

Find the newest folder in a directory in Python

I am trying to have an automated script that enters into the most recently created folder.
I have some code below
import datetime, os, shutil
today = datetime.datetime.now().isoformat()
file_time = datetime.datetime.fromtimestamp(os.path.getmtime('/folders*'))
if file_time < today:
changedirectory('/folders*')
I am not sure how to get this to check the latest timestamp from now. Any ideas?
Thanks
There is no actual trace of the "time created" in most OS / filesystems: what you get as mtime is the time a file or directory was modified (so for example creating a file in a directory updates the directory's mtime) -- and from ctime, when offered, the time of the latest inode change (so it would be updated by creating or removing a sub-directory).
Assuming you're fine with e.g. "last-modified" (and your use of "created" in the question was just an error), you can find (e.g.) all subdirectories of the current directory:
import os
all_subdirs = [d for d in os.listdir('.') if os.path.isdir(d)]
and get the one with the latest mtime (in Python 2.5 or better):
latest_subdir = max(all_subdirs, key=os.path.getmtime)
If you need to operate elsewhere than the current directory, it's not very different, e.g.:
def all_subdirs_of(b='.'):
result = []
for d in os.listdir(b):
bd = os.path.join(b, d)
if os.path.isdir(bd): result.append(bd)
return result
the latest_subdir assignment does not change given, as all_subdirs, any list of paths
(be they paths of directories or files, that max call gets the latest-modified one).
One liner to find latest
# Find latest
import os, glob
max(glob.glob(os.path.join(directory, '*/')), key=os.path.getmtime)
One liner to find n'th latest
# Find n'th latest
import os, glob
sorted(glob.glob(os.path.join(directory, '*/')), key=os.path.getmtime)[-n]
And a quick one-liner:
directory = 'some/path/to/the/main/dir'
max([os.path.join(directory,d) for d in os.listdir(directory)], key=os.path.getmtime)
Python Version 3.4+
We can try pathlib and the solution will be one liner
Find latest
import pathlib
max(pathlib.Path(directory).glob('*/'), key=os.path.getmtime)
To get nth latest
import pathlib
sorted(pathlib.Path(directory).glob('*/'), key=os.path.getmtime)[-n]
here's one way to find latest directory
import os
import time
import operator
alist={}
now = time.time()
directory=os.path.join("/home","path")
os.chdir(directory)
for file in os.listdir("."):
if os.path.isdir(file):
timestamp = os.path.getmtime( file )
# get timestamp and directory name and store to dictionary
alist[os.path.join(os.getcwd(),file)]=timestamp
# sort the timestamp
for i in sorted(alist.iteritems(), key=operator.itemgetter(1)):
latest="%s" % ( i[0])
# latest=sorted(alist.iteritems(), key=operator.itemgetter(1))[-1]
print "newest directory is ", latest
os.chdir(latest)
import os, datetime, operator
dir = "/"
folders = [(f,os.path.getmtime('%s/%s'%(dir,f))) for f in os.listdir(dir) if os.path.isdir(f)]
(newest_folder, mtime) = sorted(folders, key=operator.itemgetter(1), reverse=True)[0]

Categories