How to remove part of a tree but keep the files and directories in python?
I have paths like this:
r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\A\file1.txt"
r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\B\C\file2.txt"
r"C:\User\Desktop\g1sr56g41f2d3s1gf\file3.txt"
r"C:\User\Desktop\F2F31DS5FDSF1S2F3DS2F1D23\file4.txt"
r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\B\C\file5.txt"
r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\D\E\file6.txt"
I want to move them to:
r"C:\User\Desktop\Document\A\file1.txt"
r"C:\User\Desktop\Document\B\C\file2.txt"
r"C:\User\Desktop\file3.txt"
r"C:\User\Desktop\file4.txt"
r"C:\User\Desktop\Document\B\C\file5.txt"
r"C:\User\Desktop\Document\D\E\file6.txt"
SOme simply dirty way to do it
import os
paths = r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\D\E\file6.txt"
path = paths.replace(os.sep, '/')
l=(list(path.split("/")))
trim = l[3]
print(trim)
final_path = path.replace("/"+trim,'')
final_path = final_path.replace('/', os.sep)
print(final_path)
output
C:\User\Desktop\Document\D\E\file6.txt
Solution 2
import os
import re
paths = r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\D\E\file6.txt"
path = paths.replace(os.sep, '/')
l=(list(path.split("/")))
del l[3]
final_path = os.sep.join(l)
print(final_path)
output
C:\User\Desktop\Document\D\E\file6.txt
Here is my code:
#!/usr/bin/python3
import os, shutil
DST = 'Desktop'
toDel = []
for folder_name in os.listdir(DST):
folder = os.path.join(DST, folder_name)
if not os.path.isdir(folder):
continue
for path, _, files in os.walk(folder):
relpath = os.path.join(DST, os.path.relpath(path, folder))
for file in files:
search = os.path.join(path, file)
destination = os.path.join(relpath, file)
if not os.path.exists(relpath):
os.mkdir(relpath)
os.replace(search, destination)
if len(toDel) == 0 or not path.startswith(toDel[-1] + os.sep):
toDel.append(path)
for folder in toDel:
shutil.rmtree(folder)
Related
I have a folder directories look somewhat like this:
C:/Documents/A350/a/1.png
/2.png
b/1.png
/B777/a/1.png
/B747/a/1.png
/2.png
b/1.png
c/1.png
d/1.png
/2.png
I want to move all png to the main folder i.e. Documents.
def recur(input_path):
dir_list = os.listdir(input_path)
for directory in dir_list:
path_name = os.path.join(input_path, directory)
p = pathlib.Path(path_name)
if p.is_dir():
input_path = path_name
return recur(input_path)
return input_path
I have some code to get the deepest path inside a folder, but i am not so sure how to use the recursive function to achieve what i wanted.
Any help would be really appreciated, thanks!!
Below program get all files recursively from parent directory and copies files to parent directory.
import os
import glob
import shutil
files_abs_paths = []
def get_all_files(parent_dir):
files_n_folders = glob.glob(f'{parent_dir}/**')
for fl_or_fldr in files_n_folders:
if os.path.isdir(fl_or_fldr):
folder = fl_or_fldr
get_all_files(folder)
else:
file = fl_or_fldr
files_abs_paths.append(file)
parent_dir = r"C:'/Documents"
# get all files recursively in parent dir
get_all_files(parent_dir)
# copies files to parent_dir
for fl in files_abs_paths:
# gets file_name
file_name = os.path.basename(fl)
# create file in parent_dir
new_file_loc = f'{parent_dir}/{file_name}'
if os.path.exists(new_file_loc) is False:
shutil.copyfile(fl, new_file_loc)
You can also get all the files from a folder tree using os.walk:
If you don't mind overwriting files with duplicate names:
from os import walk, rename
from os.path import join
def collect_files(root):
for src_path, _, files in walk(root):
if src_path != root:
for name in files:
rename(join(src_path, name), join(root, name))
If you want to add a number to the end of files with duplicate names:
from os import walk, rename
from os.path import join, splitext, exists
def collect_files(root):
for src_path, _, files in walk(root):
if src_path != root:
for name in files:
dst_name = name
dst_name_parts = splitext(dst_name)
file_num = 1
while exists(join(root, dst_name)):
dst_name = '{}_{:0>3}{}'.format(dst_name_parts[0], file_num, dst_name_parts[1])
file_num += 1
rename(join(src_path, name), join(root, dst_name))
I am trying to find duplicates between two folders and copy only unique image files to the 'dest' folder. I can copy all the non-dupes using the code below, however it doesn't maintain the source directory structure. I think OS.walk returns 3 tuples, but they aren't linked so not sure how to re-construct the sub dir?
Example:
import shutil, os
from difPy import dif
source = input('Input source folder:')
dest = input('Input backup \ destination folder:')
ext = ('.jpg','.jpeg','.gif','.JPG','.JPEG','.GIF')
search = dif(source, dest)
result = search.result
result
dupes = []
srcfiles = []
filecount = []
failed = []
removed = []
for i in result.values():
dupes.append(i['location'])
for dirpath, subdirs, files in os.walk(source):
for x in files:
if x.endswith(ext):
srcfiles.append(os.path.join(dirpath, x))
for f in srcfiles:
if f not in dupes:
shutil.copy(f, dest)
print('File copied successfully - '+f)
filecount.append(f)
else:
print('File not copied successfully !!!! - '+f)
failed.append(f)
I have also tried using the shutil.copytree function with an ignore list, however it requires a new folder and can't get the ignore list function to work
shutil.copytree example:
for i in result.values():
df = []
df.append(i['filename'])
def ignorelist(source, df):
return [f for f in df if os.path.isfile(os.path.join(source, f))]
shutil.copytree(source, destnew, ignore=ignorelist)
This function ignorelist should do the trick:
import shutil, os
from difPy import dif
source = input('Input source folder:')
dest = input('Input backup \ destination folder:')
ext = ('.jpg','.jpeg','.gif')
search = dif(source, dest)
dupes = [value['location'] for value in search.result.values()]
def ignorelist(source, files):
return [file for file in files
if (os.path.isfile(os.path.join(source, file))
and (os.path.join(source, file) in dupes
or not file.lower().endswith(ext)))]
shutil.copytree(source, dest, ignore=ignorelist)
And the other "more manual" way would be
import shutil, os
from difPy import dif
source = input('Input source folder:').rstrip('/\\')
dest = input('Input backup \ destination folder:').rstrip('/\\')
ext = ('.jpg','.jpeg','.gif')
search = dif(source, dest)
dupes = [value['location'] for value in search.result.values()]
srcfiles = []
copied = []
failed = []
skipped = []
for dirpath, subdirs, files in os.walk(source):
for file in files:
if file.lower().endswith(ext):
srcfile = os.path.join(dirpath,file)
srcfiles.append(srcfile)
if srcfile in dupes:
print('File not copied (duplicate) - '+srcfile)
skipped.append(srcfile)
else:
try:
destfile = os.path.join(dest,srcfile[len(source)+1:])
os.makedirs(os.path.dirname(destfile), exist_ok=True)
shutil.copy(srcfile,destfile)
print('File copied successfully - '+srcfile)
copied.append(srcfile)
except Exception as err:
print('File not copied (error %s) - %s' % (str(err),srcfile))
failed.append(f)
I have changed some variable names to make them more descriptive. And what you call failed is really just a list of files that are not copied because they are duplicates rather than files whose copying was attempted but failed.
import shutil, os
from difPy import dif
source = input('Input source folder: ')
dest = input('Input backup \ destination folder: ')
# Remove trailing path separators if they exist:
if source.endswith(('/', '\\')):
source = source[:-1]
if dest.endswith(('/', '\\')):
dest = dest[:-1]
# Use the correct path separator to
# ensure correct matching with dif results:
if os.sep == '/':
source = source.replace('\\', os.sep)
elif os.sep == '\\':
source = source.replace('/', os.sep)
source_directory_length = len(source) + 1
ext = ('.jpg','.jpeg','.gif','.JPG','.JPEG','.GIF')
search = dif(source, dest)
result = search.result
# Set comprehension:
dupes = {duplicate['location'] for duplicate in result.values()}
copied = []
not_copied = []
for dirpath, subdirs, files in os.walk(source):
for file in files:
if file.endswith(ext):
source_path = os.path.join(dirpath, file)
if source_path not in dupes:
# get subdirectory of source directory that this file is in:
file_length = len(file) + 1
# Get subdirectory relative to the source directory:
subdirectory = source_path[source_directory_length:-file_length]
if subdirectory:
dest_directory = os.path.join(dest, subdirectory)
# ensure directory exists:
os.makedirs(dest_directory, exist_ok=True)
else:
dest_directory = dest
dest_path = os.path.join(dest_directory, file)
shutil.copy(source_path, dest_path)
print('File copied successfully -', source_path)
copied.append(source_path)
else:
print('File not copied -', source_path)
not_copied.append(source_path)
I have a directory that consists of other directories. Each of those sub-directories have files that I need the absolute path for. For example, let's say the parent directory is /home/Documents and each of the sub-directories is 1, 2,..., 10. I have tried something like files = [os.path.abspath(f) for d in os.listdir('/home/Documents') for f in os.listdir(d)], but that gets me something like (for a file) /home/Documents/file1, when it should be /home/Documents/1/file1. Is there a way to do this with the sub-directory in there?
Yes. You can try os.walk.
Consider the following path which has 3 sub directories: '1', '2', '3'.
- '1' has a file ("123.txt")
- '2' is empty
- '3' has 2 files ("123.txt", "1234.txt")
path = r"C:\Users\hvasala\Documents\Udemy Course\project\del"
import os
for dirname, _, filenames in os.walk(path):
for filename in filenames:
print(os.path.join(dirname, filename))
Output:
C:\Users\hvasala\Documents\Udemy Course\project\del\1\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\1234.txt
Use os.path.join:
root = '/tmp/project'
files = [os.path.join(root, d, f) for d in os.listdir(root) for f in os.listdir(os.path.join(root, d))]
print files
Output:
['/tmp/project/auth/__init__.py', '/tmp/project/controllers/__init__.py']
Try this code below:
import os
def find_file_name(path=None):
paths = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
if os.path.isdir(full_path):
paths += find_file_name(path=full_path)
else:
paths.append(full_path)
else:
return paths
def find_file_name(path=None, extention=".pdf"):
pdf_files = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
file = full_path.split("/")[-1]
if os.path.isdir(full_path):
pdf_files += find_file_name(path=full_path)
else:
if extention in file:
pdf_files.append(file)
return pdf_files
I'm looking for a quick way to copy the entire directory structure (including sub folders and files), with the following conditions:
Copy file if it does not exist in the destination or source is newer
Allow excluding a list of sub folders i.e. ['temp', '.git']
Allow excluding files by type i.e. ['.txt', '.pyc', '*.zip']
I have seen some of the answers using shutil.copy and copytree but none is doing what I was looking for...
I am hoping this could by done by using one of the standard utilities by providing arguments etc. If not I will write a script to do it...
This is what I ended up writing... it does the job, I was hoping this basic functionality would be provided by one of the standard libraries...
import os, sys, pathlib, shutil
def copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include):
srcdir = str(pathlib.Path(srcdir)).replace('\\', '/')
dstdir = str(pathlib.Path(dstdir)).replace('\\', '/')
for dirpath, dirs, files in os.walk(pathlib.Path(srcdir)):
this_dir = dirpath.replace('\\', "/")
if os.path.basename(this_dir) in sub_folder_to_include:
dest_dir = this_dir.replace(srcdir, dstdir)
# create folder in the destinatin if it does not exist
pathlib.Path(dest_dir).mkdir(parents=True, exist_ok=True)
for filename in files:
dest_file = os.path.join(dest_dir, os.path.basename(filename))
source_file = os.path.join(this_dir, filename)
if os.path.isfile(source_file) and filename.endswith(extensions_to_include):
# copy file if destination is older by more than a second, or does not exist
if (not os.path.exists(dest_file)) or (os.stat(source_file).st_mtime - os.stat(dest_file).st_mtime > 1):
print (f'Copying {source_file} to {dest_dir}')
shutil.copy2(source_file, dest_dir)
else:
print (f'.....Skipping {source_file} to {dest_dir}')
srcdir = 'c:/temp/a'
dstdir = 'c:/temp/j'
sub_folder_to_include = ('a', 'aa','bb')
extensions_to_include = ('.py', '.png', '.gif', '.txt')
copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include)
This is the solution:
import os, sys, pathlib, shutil
def copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include):
srcdir = str(pathlib.Path(srcdir)).replace('\\', '/')
dstdir = str(pathlib.Path(dstdir)).replace('\\', '/')
for dirpath, dirs, files in os.walk(pathlib.Path(srcdir)):
this_dir = dirpath.replace('\\', "/")
if os.path.basename(this_dir) in sub_folder_to_include:
dest_dir = this_dir.replace(srcdir, dstdir)
# create folder in the destinatin if it does not exist
pathlib.Path(dest_dir).mkdir(parents=True, exist_ok=True)
for filename in files:
dest_file = os.path.join(dest_dir, os.path.basename(filename))
source_file = os.path.join(this_dir, filename)
if os.path.isfile(source_file) and filename.endswith(extensions_to_include):
# copy file if destination is older by more than a second, or does not exist
if (not os.path.exists(dest_file)) or (os.stat(source_file).st_mtime - os.stat(dest_file).st_mtime > 1):
print (f'Copying {source_file} to {dest_dir}')
shutil.copy2(source_file, dest_dir)
else:
print (f'.....Skipping {source_file} to {dest_dir}')
srcdir = 'c:/temp/a'
dstdir = 'c:/temp/j'
sub_folder_to_include = ('a', 'aa','bb')
extensions_to_include = ('.py', '.png', '.gif', '.txt')
copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include)
I want to move img only files to another folder
if folder didn't exist I will create it with original name+ImageOnly
ex.
D:\Test #contain some folder
D:\Test\aaa\img1.jpg
D:\Test\bbb\ccc\img2.jpg
import os
import shutil
def moveImage(srcdirs):
for roots, dirs, files in os.walk(srcdirs):
grand_father = srcdirs #D:\Test
not_need =('.zip','.js','.html','.log','.lst','.txt','.ini')
imgExt = ('.jpg','.png','.gif','.jpeg')
father = os.path.split(roots)[1]+'-ImageOnly'
for file in files:
if file.endswith(imgExt) and not file.endswith(not_need):
path = os.path.join(roots,file)
des= os.path.join(grand_father,father)
if not os.path.exists(des):
createFolder(father)
print("folder created")
shutil.move(path,des)
elif file.endswith(not_need): #remove unnecessary file
n1 = os.path.join(roots,file)
os.remove(n1)
def createFolder(directory):
dirs = ('./%s/'%directory)
try:
if not os.path.exists(dirs):
os.makedirs(dirs)
except OSError:
print ('Error: Creating directory. ' + dirs)
src = r'D:\Test'
moveImage(src)
My code gives me
img1.jpg move to aaa-ImageOnly
but for img2.jpg it moved to ccc-ImageOnly
I want it to move to bbb-ImageOnly
to first subfolder name (I call it right?), not it last subfolder name.
Here you go:
import os
import shutil
FOLDER = r'D:\Test'
EXTENSIONS = ('.jpg', '.png', '.gif', '.jpeg')
def move_images(root):
levels = len(root.split(os.sep))
for path, dirs, files in os.walk(root):
for file in files:
if file.endswith(EXTENSIONS):
src_file = os.path.join(path, file)
dst_dir = os.path.join(root, '{}-ImageOnly'.format(path.split(os.sep)[levels]))
dst_file = os.path.join(dst_dir, file)
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
shutil.move(src_file, dst_file)
move_images(FOLDER)
It produces me:
D:\Test\aaa-ImageOnly\img1.jpg
D:\Test\bbb-ImageOnly\img2.jpg