I'm trying to organize some data before processing it.
What I have is a folder of raw tiff files (they're raster bands from a drone sensor).
I want to move these files into new, individual folders. e.g., IMG_001_1, IMG_001_2, IMG_001_3, IMG_001_4 and IMG_001_5 are all moved into a new folder titled IMG_001. I am ok with changing the naming structure of the files in order to make the code simpler.
An additional issue is that there are a few images missing from the folder. The current files are IMG0016 - IMG0054 (no IMG0055), IMG0056 - IMG0086 (no IMG0087), and IMG0087 - IMG0161. This is why I think it would be simpler to just rename the new image folders from 1-143.
My main problem is actually moving the files into the new folders - creating the folders is fairly simple.
Played around a little and this script came out, which should do what you want:
import os
import shutil
import re
UNORG = "C:\\Users\joshuarb\Desktop\Unorganized_Images\\"
ORG = "C:\\Users\joshuarb\Desktop\Organized_Images\\"
def main():
file_names = [os.path.join(UNORG, i) for i in get_files_of(UNORG)]
for count in range(0, 143):
current_dir = "{}IMG_{:04d}".format(ORG, count)
os.makedirs(current_dir)
move_files = get_files_to_move(file_names, count)
print move_files
for i in move_files:
shutil.move(i, os.path.join(current_dir, os.path.basename(i)))
def get_files_to_move(file_names, count):
return [i for i in file_names if re.match('.*IMG{}_.*'.format(count), i)]
def get_files_of(mypath):
(dirpath, dirnames, filenames) = os.walk(mypath).next()
return filenames
if __name__ == '__main__':
main()
As you see, the code is not commented. But feel free to ask if something is unclear;)
Problem solved!
import os
import shutil
srcpath = "C:\Users\joshuarb\Desktop\Python_Test\UnorganizedImages"
srcfiles = os.listdir(srcpath)
destpath = "C:\Users\joshuarb\Desktop\Python_Test\OrganizedImages"
# extract the three letters from filenames and filter out duplicates
destdirs = list(set([filename[0:8] for filename in srcfiles]))
def create(dirname, destpath):
full_path = os.path.join(destpath, dirname)
os.mkdir(full_path)
return full_path
def move(filename, dirpath):
shutil.move(os.path.join(srcpath, filename)
,dirpath)
# create destination directories and store their names along with full paths
targets = [
(folder, create(folder, destpath)) for folder in destdirs
]
for dirname, full_path in targets:
for filename in srcfiles:
if dirname == filename[0:8]:
move(filename, full_path)
Related
I have a folder directories look somewhat like this:
C:/Documents/A350/a/1.png
/2.png
b/1.png
/B777/a/1.png
/B747/a/1.png
/2.png
b/1.png
c/1.png
d/1.png
/2.png
I want to move all png to the main folder i.e. Documents.
def recur(input_path):
dir_list = os.listdir(input_path)
for directory in dir_list:
path_name = os.path.join(input_path, directory)
p = pathlib.Path(path_name)
if p.is_dir():
input_path = path_name
return recur(input_path)
return input_path
I have some code to get the deepest path inside a folder, but i am not so sure how to use the recursive function to achieve what i wanted.
Any help would be really appreciated, thanks!!
Below program get all files recursively from parent directory and copies files to parent directory.
import os
import glob
import shutil
files_abs_paths = []
def get_all_files(parent_dir):
files_n_folders = glob.glob(f'{parent_dir}/**')
for fl_or_fldr in files_n_folders:
if os.path.isdir(fl_or_fldr):
folder = fl_or_fldr
get_all_files(folder)
else:
file = fl_or_fldr
files_abs_paths.append(file)
parent_dir = r"C:'/Documents"
# get all files recursively in parent dir
get_all_files(parent_dir)
# copies files to parent_dir
for fl in files_abs_paths:
# gets file_name
file_name = os.path.basename(fl)
# create file in parent_dir
new_file_loc = f'{parent_dir}/{file_name}'
if os.path.exists(new_file_loc) is False:
shutil.copyfile(fl, new_file_loc)
You can also get all the files from a folder tree using os.walk:
If you don't mind overwriting files with duplicate names:
from os import walk, rename
from os.path import join
def collect_files(root):
for src_path, _, files in walk(root):
if src_path != root:
for name in files:
rename(join(src_path, name), join(root, name))
If you want to add a number to the end of files with duplicate names:
from os import walk, rename
from os.path import join, splitext, exists
def collect_files(root):
for src_path, _, files in walk(root):
if src_path != root:
for name in files:
dst_name = name
dst_name_parts = splitext(dst_name)
file_num = 1
while exists(join(root, dst_name)):
dst_name = '{}_{:0>3}{}'.format(dst_name_parts[0], file_num, dst_name_parts[1])
file_num += 1
rename(join(src_path, name), join(root, dst_name))
the file location is: /Users/D/Desktop/Files/1/no.h5 and its the same filename (no.h5) in the folders 1-400. I want all these files to collect in the same folder with their number as their new names.
Your help is greatly appreciated!
You could use a glob pattern to find the target files to do the move. The pathlib library provides a convenient way to manage the paths.
from pathlib import Path
base = Path(" /Users/D/Desktop/Files")
target_folder = Path("/some/where/else")
for target in base.glob("*/no.h5"):
name = target.parent.name
if name.isdigit():
target.rename(target_folder/(name + ".h5"))
You can use the shutil module (https://docs.python.org/3/library/shutil.html):
import shutil
...
shutil.copyfile(src_fname, os.path.join(target_dir, new_fname))
You can also walk through your base directory to create a list of files that need to be copied:
import os
dir_base = fr'\Users\D\Desktop\Files'
fname_base = 'no'
fname_suffix = 'h5'
list_files = []
for subdir, dirs, files in os.walk(dir_base):
for file in files:
if file.startswith(fname_base) and file.endswith(fname_suffix):
list_files.append(os.path.join(subdir, file))
The whole code for an example I created on my system looks like this:
import shutil
import os
#dir_base = fr'\Users\D\Desktop\Files' #<============ base directory for your case
dir_base = fr'./test' #<============ base dir for my example
fname_base = 'no'
#fname_suffix = 'h5' #<============ extension of your files
fname_suffix = 'txt' #<============ extension of files in my example
target_base_fname='abc' #<============ base filename for new files in my example
target_dir = fr'./test/tgt' #<============ target dir for my example
list_files = []
for subdir, dirs, files in os.walk(dir_base):
for file in files:
if file.startswith(fname_base) and file.endswith(fname_suffix):
list_files.append(os.path.join(subdir, file))
for elem in list_files:
print(f'processing: {elem}')
this_dir_path = os.path.dirname(elem)
this_dir_name = os.path.basename(this_dir_path)
new_fname = target_base_fname + '_' + this_dir_name + '.' + fname_suffix
shutil.copyfile(elem, os.path.join(target_dir, new_fname))
I have a main directory(root) which countain 6 sub directory.
I would like to count the number of files present in each sub directory and add all to a simple python list.
For this result : mylist = [497643, 5976, 3698, 12, 456, 745]
I'm blocked on that code:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
list = dirs.append(len(sub_dir))
My trying for the list fill doesn't work and i'm dramaticaly at my best...
Finding a way to iterate sub-directory of a main directory and fill a list with a function applied on each sub directory would sky rocket the speed of my actual data science project!
Thanks for your help
Abel
You can use os.path.isfile and os.path.isdir
res = [len(list(map(os.path.isfile, os.listdir(os.path.join(path, name))))) for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]
print(res)
Using the for loop
res = []
for name in os.listdir(path):
dir_path = os.path.join(path, name)
if os.path.isdir(dir_path):
res.append(len(list(map(os.path.isfile, os.listdir(dir_path)))))
You need to use os.listdir on each subdirectory. The current code simply takes the length of a filepath.
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
temp = os.listdir(sub_dir)
list = dirs.append(len(temp))
Adding this line to the code will list out the subdirectory
You were almost there:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir(path)
# This would print all the files and directories
for file in dirs:
print(file)
for sub_dir in dirs:
if os.path.isdir(sub_dir):
list.append(len(os.listdir(os.path.join(path, sub_dir))))
print(list)
As an alternative, you can also utilize glob module for this and other related tasks.
I have created a test directory containing 3 subdirectories l,m and k containing 3 test files each.
import os, glob
list = []
path = "test" # you can leave this "." if you want files in the current directory
for root, dirs, files in os.walk(path, topdown=True):
for name in dirs:
list.append(len(glob.glob(root + '/' + name + '/*')))
print(list)
Output :
[3, 3, 3]
I have a range of folders which are named like folder0, folder2,..., folder99. Now I want to walk through folder0,..., folderX and print their files. X should stay variable and easy to change.
My code looks something like this but its not working how I want it to work yet because I can't decide until which number I want to go.
import os
import re
rootdir = r'path'
for root, dirs, files in os.walk(rootdir):
for dir in dirs:
if not re.match(r'folder[0-9]+$', dir):
dirs.remove(dir)
for file in files:
print files
Assuming your name scheme is consistent, which you state, why do the os.walk?
import os
dir_path = '/path/to/folders/folder{}'
x = 10
for i in range(0, x):
formatted_path = dir_path.format(i)
try:
for f in os.listdir(formatted_path):
filename = os.path.join(formatted_path, f)
if os.path.isfile(filename):
print filename
except OSError:
print "{} does not exist".format(formatted_path)
I'm looking for a way to include/exclude files patterns and exclude directories from a os.walk() call.
Here's what I'm doing by now:
import fnmatch
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def _filter(paths):
for path in paths:
if os.path.isdir(path) and not path in excludes:
yield path
for pattern in (includes + excludes):
if not os.path.isdir(path) and fnmatch.fnmatch(path, pattern):
yield path
for root, dirs, files in os.walk('/home/paulo-freitas'):
dirs[:] = _filter(map(lambda d: os.path.join(root, d), dirs))
files[:] = _filter(map(lambda f: os.path.join(root, f), files))
for filename in files:
filename = os.path.join(root, filename)
print(filename)
Is there a better way to do this? How?
This solution uses fnmatch.translate to convert glob patterns to regular expressions (it assumes the includes only is used for files):
import fnmatch
import os
import os.path
import re
includes = ['*.doc', '*.odt'] # for files only
excludes = ['/home/paulo-freitas/Documents'] # for dirs and files
# transform glob patterns to regular expressions
includes = r'|'.join([fnmatch.translate(x) for x in includes])
excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.'
for root, dirs, files in os.walk('/home/paulo-freitas'):
# exclude dirs
dirs[:] = [os.path.join(root, d) for d in dirs]
dirs[:] = [d for d in dirs if not re.match(excludes, d)]
# exclude/include files
files = [os.path.join(root, f) for f in files]
files = [f for f in files if not re.match(excludes, f)]
files = [f for f in files if re.match(includes, f)]
for fname in files:
print fname
From docs.python.org:
os.walk(top[, topdown=True[, onerror=None[, followlinks=False]]])
When topdown is True, the caller can modify the dirnames list in-place … this can be used to prune the search …
for root, dirs, files in os.walk('/home/paulo-freitas', topdown=True):
# excludes can be done with fnmatch.filter and complementary set,
# but it's more annoying to read.
dirs[:] = [d for d in dirs if d not in excludes]
for pat in includes:
for f in fnmatch.filter(files, pat):
print os.path.join(root, f)
I should point out that the above code assumes excludes is a pattern, not a full path. You would need to adjust the list comprehension to filter if os.path.join(root, d) not in excludes to match the OP case.
why fnmatch?
import os
excludes=....
for ROOT,DIR,FILES in os.walk("/path"):
for file in FILES:
if file.endswith(('doc','odt')):
print file
for directory in DIR:
if not directory in excludes :
print directory
not exhaustively tested
dirtools is perfect for your use-case:
from dirtools import Dir
print(Dir('.', exclude_file='.gitignore').files())
Here is one way to do that
import fnmatch
import os
excludes = ['/home/paulo-freitas/Documents']
matches = []
for path, dirs, files in os.walk(os.getcwd()):
for eachpath in excludes:
if eachpath in path:
continue
else:
for result in [os.path.abspath(os.path.join(path, filename)) for
filename in files if fnmatch.fnmatch(filename,'*.doc') or fnmatch.fnmatch(filename,'*.odt')]:
matches.append(result)
print matches
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def file_search(path, exe):
for x,y,z in os.walk(path):
for a in z:
if a[-4:] == exe:
print os.path.join(x,a)
for x in includes:
file_search(excludes[0],x)
This is an example of excluding directories and files with os.walk():
ignoreDirPatterns=[".git"]
ignoreFilePatterns=[".php"]
def copyTree(src, dest, onerror=None):
src = os.path.abspath(src)
src_prefix = len(src) + len(os.path.sep)
for root, dirs, files in os.walk(src, onerror=onerror):
for pattern in ignoreDirPatterns:
if pattern in root:
break
else:
#If the above break didn't work, this part will be executed
for file in files:
for pattern in ignoreFilePatterns:
if pattern in file:
break
else:
#If the above break didn't work, this part will be executed
dirpath = os.path.join(dest, root[src_prefix:])
try:
os.makedirs(dirpath,exist_ok=True)
except OSError as e:
if onerror is not None:
onerror(e)
filepath=os.path.join(root,file)
shutil.copy(filepath,dirpath)
continue;#If the above else didn't executed, this will be reached
continue;#If the above else didn't executed, this will be reached
python >=3.2 due to exist_ok in makedirs
The above methods had not worked for me.
So, This is what I came up with an expansion of my original answer to another question.
What worked for me was:
if (not (str(root) + '/').startswith(tuple(exclude_foldr)))
which compiled a path and excluded the tuple of my listed folders.
This gave me the exact result I was looking for.
My goal for this was to keep my mac organized.
I can Search any folder by path, locate & move specific file.types, ignore subfolders and i preemptively prompt the user if they want to move the files.
NOTE: the Prompt is only one time per run and is NOT per file
By Default the prompt defaults to NO when you hit enter instead of [y/N], and will just list the Potential files to be moved.
This is only a snippet of my GitHub Please visit for the total script.
HINT: Read the script below as I added info per line as to what I had done.
#!/usr/bin/env python3
# =============================================================================
# Created On : MAC OSX High Sierra 10.13.6 (17G65)
# Created On : Python 3.7.0
# Created By : Jeromie Kirchoff
# =============================================================================
"""THE MODULE HAS BEEN BUILD FOR KEEPING YOUR FILES ORGANIZED."""
# =============================================================================
from os import walk
from os import path
from shutil import move
import getpass
import click
mac_username = getpass.getuser()
includes_file_extensn = ([".jpg", ".gif", ".png", ".jpeg", ])
search_dir = path.dirname('/Users/' + mac_username + '/Documents/')
target_foldr = path.dirname('/Users/' + mac_username + '/Pictures/Archive/')
exclude_foldr = set([target_foldr,
path.dirname('/Users/' + mac_username +
'/Documents/GitHub/'),
path.dirname('/Users/' + mac_username +
'/Documents/Random/'),
path.dirname('/Users/' + mac_username +
'/Documents/Stupid_Folder/'),
])
if click.confirm("Would you like to move files?",
default=False):
question_moving = True
else:
question_moving = False
def organize_files():
"""THE MODULE HAS BEEN BUILD FOR KEEPING YOUR FILES ORGANIZED."""
# topdown=True required for filtering.
# "Root" had all info i needed to filter folders not dir...
for root, dir, files in walk(search_dir, topdown=True):
for file in files:
# creating a directory to str and excluding folders that start with
if (not (str(root) + '/').startswith(tuple(exclude_foldr))):
# showcase only the file types looking for
if (file.endswith(tuple(includes_file_extensn))):
# using path.normpath as i found an issue with double //
# in file paths.
filetomove = path.normpath(str(root) + '/' +
str(file))
# forward slash required for both to split
movingfileto = path.normpath(str(target_foldr) + '/' +
str(file))
# Answering "NO" this only prints the files "TO BE Moved"
print('Files To Move: ' + str(filetomove))
# This is using the prompt you answered at the beginning
if question_moving is True:
print('Moving File: ' + str(filetomove) +
"\n To:" + str(movingfileto))
# This is the command that moves the file
move(filetomove, movingfileto)
pass
# The rest is ignoring explicitly and continuing
else:
pass
pass
else:
pass
else:
pass
if __name__ == '__main__':
organize_files()
Example of running my script from terminal:
$ python3 organize_files.py
Exclude list: {'/Users/jkirchoff/Pictures/Archive', '/Users/jkirchoff/Documents/Stupid_Folder', '/Users/jkirchoff/Documents/Random', '/Users/jkirchoff/Documents/GitHub'}
Files found will be moved to this folder:/Users/jkirchoff/Pictures/Archive
Would you like to move files?
No? This will just list the files.
Yes? This will Move your files to the target folder.
[y/N]:
Example of listing files:
Files To Move: /Users/jkirchoff/Documents/Archive/JayWork/1.custom-award-768x512.jpg
Files To Move: /Users/jkirchoff/Documents/Archive/JayWork/10351458_318162838331056_9023492155204267542_n.jpg
...etc
Example of moving files:
Moving File: /Users/jkirchoff/Documents/Archive/JayWork/1.custom-award-768x512.jpg
To: /Users/jkirchoff/Pictures/Archive/1.custom-award-768x512.jpg
Moving File: /Users/jkirchoff/Documents/Archive/JayWork/10351458_318162838331056_9023492155204267542_n.jpg
To: /Users/jkirchoff/Pictures/Archive/10351458_318162838331056_9023492155204267542_n.jpg
...