I'm trying to organize some data before processing it.
What I have is a folder of raw tiff files (they're raster bands from a drone sensor).
I want to move these files into new, individual folders. e.g., IMG_001_1, IMG_001_2, IMG_001_3, IMG_001_4 and IMG_001_5 are all moved into a new folder titled IMG_001. I am ok with changing the naming structure of the files in order to make the code simpler.
An additional issue is that there are a few images missing from the folder. The current files are IMG0016 - IMG0054 (no IMG0055), IMG0056 - IMG0086 (no IMG0087), and IMG0087 - IMG0161. This is why I think it would be simpler to just rename the new image folders from 1-143.
My main problem is actually moving the files into the new folders - creating the folders is fairly simple.
Played around a little and this script came out, which should do what you want:
import os
import shutil
import re
UNORG = "C:\\Users\joshuarb\Desktop\Unorganized_Images\\"
ORG = "C:\\Users\joshuarb\Desktop\Organized_Images\\"
def main():
file_names = [os.path.join(UNORG, i) for i in get_files_of(UNORG)]
for count in range(0, 143):
current_dir = "{}IMG_{:04d}".format(ORG, count)
move_files = get_files_to_move(file_names, count)
print move_files
for i in move_files:
shutil.move(i, os.path.join(current_dir, os.path.basename(i)))
def get_files_to_move(file_names, count):
return [i for i in file_names if re.match('.*IMG{}_.*'.format(count), i)]
def get_files_of(mypath):
(dirpath, dirnames, filenames) = os.walk(mypath).next()
return filenames
if __name__ == '__main__':
As you see, the code is not commented. But feel free to ask if something is unclear;)
Problem solved!
import os
import shutil
srcpath = "C:\Users\joshuarb\Desktop\Python_Test\UnorganizedImages"
srcfiles = os.listdir(srcpath)
destpath = "C:\Users\joshuarb\Desktop\Python_Test\OrganizedImages"
# extract the three letters from filenames and filter out duplicates
destdirs = list(set([filename[0:8] for filename in srcfiles]))
def create(dirname, destpath):
full_path = os.path.join(destpath, dirname)
return full_path
def move(filename, dirpath):
shutil.move(os.path.join(srcpath, filename)
# create destination directories and store their names along with full paths
targets = [
(folder, create(folder, destpath)) for folder in destdirs
for dirname, full_path in targets:
for filename in srcfiles:
if dirname == filename[0:8]:
move(filename, full_path)
I have a folder directories look somewhat like this:
I want to move all png to the main folder i.e. Documents.
def recur(input_path):
dir_list = os.listdir(input_path)
for directory in dir_list:
path_name = os.path.join(input_path, directory)
p = pathlib.Path(path_name)
if p.is_dir():
input_path = path_name
return recur(input_path)
return input_path
I have some code to get the deepest path inside a folder, but i am not so sure how to use the recursive function to achieve what i wanted.
Any help would be really appreciated, thanks!!
Below program get all files recursively from parent directory and copies files to parent directory.
import os
import glob
import shutil
files_abs_paths = []
def get_all_files(parent_dir):
files_n_folders = glob.glob(f'{parent_dir}/**')
for fl_or_fldr in files_n_folders:
if os.path.isdir(fl_or_fldr):
folder = fl_or_fldr
file = fl_or_fldr
parent_dir = r"C:'/Documents"
# get all files recursively in parent dir
# copies files to parent_dir
for fl in files_abs_paths:
# gets file_name
file_name = os.path.basename(fl)
# create file in parent_dir
new_file_loc = f'{parent_dir}/{file_name}'
if os.path.exists(new_file_loc) is False:
shutil.copyfile(fl, new_file_loc)
You can also get all the files from a folder tree using os.walk:
If you don't mind overwriting files with duplicate names:
from os import walk, rename
from os.path import join
def collect_files(root):
for src_path, _, files in walk(root):
if src_path != root:
for name in files:
rename(join(src_path, name), join(root, name))
If you want to add a number to the end of files with duplicate names:
from os import walk, rename
from os.path import join, splitext, exists
def collect_files(root):
for src_path, _, files in walk(root):
if src_path != root:
for name in files:
dst_name = name
dst_name_parts = splitext(dst_name)
file_num = 1
while exists(join(root, dst_name)):
dst_name = '{}_{:0>3}{}'.format(dst_name_parts[0], file_num, dst_name_parts[1])
file_num += 1
rename(join(src_path, name), join(root, dst_name))
the file location is: /Users/D/Desktop/Files/1/no.h5 and its the same filename (no.h5) in the folders 1-400. I want all these files to collect in the same folder with their number as their new names.
Your help is greatly appreciated!
You could use a glob pattern to find the target files to do the move. The pathlib library provides a convenient way to manage the paths.
from pathlib import Path
base = Path(" /Users/D/Desktop/Files")
target_folder = Path("/some/where/else")
for target in base.glob("*/no.h5"):
name = target.parent.name
if name.isdigit():
target.rename(target_folder/(name + ".h5"))
You can use the shutil module (https://docs.python.org/3/library/shutil.html):
import shutil
shutil.copyfile(src_fname, os.path.join(target_dir, new_fname))
You can also walk through your base directory to create a list of files that need to be copied:
import os
dir_base = fr'\Users\D\Desktop\Files'
fname_base = 'no'
fname_suffix = 'h5'
list_files = []
for subdir, dirs, files in os.walk(dir_base):
for file in files:
if file.startswith(fname_base) and file.endswith(fname_suffix):
list_files.append(os.path.join(subdir, file))
The whole code for an example I created on my system looks like this:
import shutil
import os
#dir_base = fr'\Users\D\Desktop\Files' #<============ base directory for your case
dir_base = fr'./test' #<============ base dir for my example
fname_base = 'no'
#fname_suffix = 'h5' #<============ extension of your files
fname_suffix = 'txt' #<============ extension of files in my example
target_base_fname='abc' #<============ base filename for new files in my example
target_dir = fr'./test/tgt' #<============ target dir for my example
list_files = []
for subdir, dirs, files in os.walk(dir_base):
for file in files:
if file.startswith(fname_base) and file.endswith(fname_suffix):
list_files.append(os.path.join(subdir, file))
for elem in list_files:
print(f'processing: {elem}')
this_dir_path = os.path.dirname(elem)
this_dir_name = os.path.basename(this_dir_path)
new_fname = target_base_fname + '_' + this_dir_name + '.' + fname_suffix
shutil.copyfile(elem, os.path.join(target_dir, new_fname))
I have a main directory(root) which countain 6 sub directory.
I would like to count the number of files present in each sub directory and add all to a simple python list.
For this result : mylist = [497643, 5976, 3698, 12, 456, 745]
I'm blocked on that code:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
list = dirs.append(len(sub_dir))
My trying for the list fill doesn't work and i'm dramaticaly at my best...
Finding a way to iterate sub-directory of a main directory and fill a list with a function applied on each sub directory would sky rocket the speed of my actual data science project!
Thanks for your help
You can use os.path.isfile and os.path.isdir
res = [len(list(map(os.path.isfile, os.listdir(os.path.join(path, name))))) for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]
Using the for loop
res = []
for name in os.listdir(path):
dir_path = os.path.join(path, name)
if os.path.isdir(dir_path):
res.append(len(list(map(os.path.isfile, os.listdir(dir_path)))))
You need to use os.listdir on each subdirectory. The current code simply takes the length of a filepath.
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
temp = os.listdir(sub_dir)
list = dirs.append(len(temp))
Adding this line to the code will list out the subdirectory
You were almost there:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir(path)
# This would print all the files and directories
for file in dirs:
for sub_dir in dirs:
if os.path.isdir(sub_dir):
list.append(len(os.listdir(os.path.join(path, sub_dir))))
As an alternative, you can also utilize glob module for this and other related tasks.
I have created a test directory containing 3 subdirectories l,m and k containing 3 test files each.
import os, glob
list = []
path = "test" # you can leave this "." if you want files in the current directory
for root, dirs, files in os.walk(path, topdown=True):
for name in dirs:
list.append(len(glob.glob(root + '/' + name + '/*')))
Output :
[3, 3, 3]
I have a range of folders which are named like folder0, folder2,..., folder99. Now I want to walk through folder0,..., folderX and print their files. X should stay variable and easy to change.
My code looks something like this but its not working how I want it to work yet because I can't decide until which number I want to go.
import os
import re
rootdir = r'path'
for root, dirs, files in os.walk(rootdir):
for dir in dirs:
if not re.match(r'folder[0-9]+$', dir):
for file in files:
print files
Assuming your name scheme is consistent, which you state, why do the os.walk?
import os
dir_path = '/path/to/folders/folder{}'
x = 10
for i in range(0, x):
formatted_path = dir_path.format(i)
for f in os.listdir(formatted_path):
filename = os.path.join(formatted_path, f)
if os.path.isfile(filename):
print filename
except OSError:
print "{} does not exist".format(formatted_path)
I'm looking for a way to include/exclude files patterns and exclude directories from a os.walk() call.
Here's what I'm doing by now:
import fnmatch
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def _filter(paths):
for path in paths:
if os.path.isdir(path) and not path in excludes:
yield path
for pattern in (includes + excludes):
if not os.path.isdir(path) and fnmatch.fnmatch(path, pattern):
yield path
for root, dirs, files in os.walk('/home/paulo-freitas'):
dirs[:] = _filter(map(lambda d: os.path.join(root, d), dirs))
files[:] = _filter(map(lambda f: os.path.join(root, f), files))
for filename in files:
filename = os.path.join(root, filename)
Is there a better way to do this? How?
This solution uses fnmatch.translate to convert glob patterns to regular expressions (it assumes the includes only is used for files):
import fnmatch
import os
import os.path
import re
includes = ['*.doc', '*.odt'] # for files only
excludes = ['/home/paulo-freitas/Documents'] # for dirs and files
# transform glob patterns to regular expressions
includes = r'|'.join([fnmatch.translate(x) for x in includes])
excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.'
for root, dirs, files in os.walk('/home/paulo-freitas'):
# exclude dirs
dirs[:] = [os.path.join(root, d) for d in dirs]
dirs[:] = [d for d in dirs if not re.match(excludes, d)]
# exclude/include files
files = [os.path.join(root, f) for f in files]
files = [f for f in files if not re.match(excludes, f)]
files = [f for f in files if re.match(includes, f)]
for fname in files:
print fname
From docs.python.org:
os.walk(top[, topdown=True[, onerror=None[, followlinks=False]]])
When topdown is True, the caller can modify the dirnames list in-place … this can be used to prune the search …
for root, dirs, files in os.walk('/home/paulo-freitas', topdown=True):
# excludes can be done with fnmatch.filter and complementary set,
# but it's more annoying to read.
dirs[:] = [d for d in dirs if d not in excludes]
for pat in includes:
for f in fnmatch.filter(files, pat):
print os.path.join(root, f)
I should point out that the above code assumes excludes is a pattern, not a full path. You would need to adjust the list comprehension to filter if os.path.join(root, d) not in excludes to match the OP case.
why fnmatch?
import os
for ROOT,DIR,FILES in os.walk("/path"):
for file in FILES:
if file.endswith(('doc','odt')):
print file
for directory in DIR:
if not directory in excludes :
print directory
not exhaustively tested
dirtools is perfect for your use-case:
from dirtools import Dir
print(Dir('.', exclude_file='.gitignore').files())
Here is one way to do that
import fnmatch
import os
excludes = ['/home/paulo-freitas/Documents']
matches = []
for path, dirs, files in os.walk(os.getcwd()):
for eachpath in excludes:
if eachpath in path:
for result in [os.path.abspath(os.path.join(path, filename)) for
filename in files if fnmatch.fnmatch(filename,'*.doc') or fnmatch.fnmatch(filename,'*.odt')]:
print matches
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def file_search(path, exe):
for x,y,z in os.walk(path):
for a in z:
if a[-4:] == exe:
print os.path.join(x,a)
for x in includes:
This is an example of excluding directories and files with os.walk():
def copyTree(src, dest, onerror=None):
src = os.path.abspath(src)
src_prefix = len(src) + len(os.path.sep)
for root, dirs, files in os.walk(src, onerror=onerror):
for pattern in ignoreDirPatterns:
if pattern in root:
#If the above break didn't work, this part will be executed
for file in files:
for pattern in ignoreFilePatterns:
if pattern in file:
#If the above break didn't work, this part will be executed
dirpath = os.path.join(dest, root[src_prefix:])
except OSError as e:
if onerror is not None:
continue;#If the above else didn't executed, this will be reached
continue;#If the above else didn't executed, this will be reached
python >=3.2 due to exist_ok in makedirs
The above methods had not worked for me.
So, This is what I came up with an expansion of my original answer to another question.
What worked for me was:
if (not (str(root) + '/').startswith(tuple(exclude_foldr)))
which compiled a path and excluded the tuple of my listed folders.
This gave me the exact result I was looking for.
My goal for this was to keep my mac organized.
I can Search any folder by path, locate & move specific file.types, ignore subfolders and i preemptively prompt the user if they want to move the files.
NOTE: the Prompt is only one time per run and is NOT per file
By Default the prompt defaults to NO when you hit enter instead of [y/N], and will just list the Potential files to be moved.
This is only a snippet of my GitHub Please visit for the total script.
HINT: Read the script below as I added info per line as to what I had done.
#!/usr/bin/env python3
# =============================================================================
# Created On : MAC OSX High Sierra 10.13.6 (17G65)
# Created On : Python 3.7.0
# Created By : Jeromie Kirchoff
# =============================================================================
# =============================================================================
from os import walk
from os import path
from shutil import move
import getpass
import click
mac_username = getpass.getuser()
includes_file_extensn = ([".jpg", ".gif", ".png", ".jpeg", ])
search_dir = path.dirname('/Users/' + mac_username + '/Documents/')
target_foldr = path.dirname('/Users/' + mac_username + '/Pictures/Archive/')
exclude_foldr = set([target_foldr,
path.dirname('/Users/' + mac_username +
path.dirname('/Users/' + mac_username +
path.dirname('/Users/' + mac_username +
if click.confirm("Would you like to move files?",
question_moving = True
question_moving = False
def organize_files():
# topdown=True required for filtering.
# "Root" had all info i needed to filter folders not dir...
for root, dir, files in walk(search_dir, topdown=True):
for file in files:
# creating a directory to str and excluding folders that start with
if (not (str(root) + '/').startswith(tuple(exclude_foldr))):
# showcase only the file types looking for
if (file.endswith(tuple(includes_file_extensn))):
# using path.normpath as i found an issue with double //
# in file paths.
filetomove = path.normpath(str(root) + '/' +
# forward slash required for both to split
movingfileto = path.normpath(str(target_foldr) + '/' +
# Answering "NO" this only prints the files "TO BE Moved"
print('Files To Move: ' + str(filetomove))
# This is using the prompt you answered at the beginning
if question_moving is True:
print('Moving File: ' + str(filetomove) +
"\n To:" + str(movingfileto))
# This is the command that moves the file
move(filetomove, movingfileto)
# The rest is ignoring explicitly and continuing
if __name__ == '__main__':
Example of running my script from terminal:
$ python3 organize_files.py
Exclude list: {'/Users/jkirchoff/Pictures/Archive', '/Users/jkirchoff/Documents/Stupid_Folder', '/Users/jkirchoff/Documents/Random', '/Users/jkirchoff/Documents/GitHub'}
Files found will be moved to this folder:/Users/jkirchoff/Pictures/Archive
Would you like to move files?
No? This will just list the files.
Yes? This will Move your files to the target folder.
Example of listing files:
Files To Move: /Users/jkirchoff/Documents/Archive/JayWork/1.custom-award-768x512.jpg
Files To Move: /Users/jkirchoff/Documents/Archive/JayWork/10351458_318162838331056_9023492155204267542_n.jpg
Example of moving files:
Moving File: /Users/jkirchoff/Documents/Archive/JayWork/1.custom-award-768x512.jpg
To: /Users/jkirchoff/Pictures/Archive/1.custom-award-768x512.jpg
Moving File: /Users/jkirchoff/Documents/Archive/JayWork/10351458_318162838331056_9023492155204267542_n.jpg
To: /Users/jkirchoff/Pictures/Archive/10351458_318162838331056_9023492155204267542_n.jpg